From de8d245d3665f8f414df3ebf47805202bc6571a8 Mon Sep 17 00:00:00 2001 From: Lan Honglin Date: Tue, 26 Dec 2023 18:03:38 +0800 Subject: [PATCH 01/12] media: i2c: os04d10: fix adapter fastboot issue Change-Id: Ia11491a7602eb2a6a1a2453083fa574aaef1ea61 Signed-off-by: Lan Honglin --- drivers/media/i2c/os04d10.c | 41 +++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/media/i2c/os04d10.c b/drivers/media/i2c/os04d10.c index e14b53d2d997..1ebdea7eae00 100644 --- a/drivers/media/i2c/os04d10.c +++ b/drivers/media/i2c/os04d10.c @@ -871,32 +871,30 @@ static int __os04d10_power_on(struct os04d10 *os04d10) dev_err(dev, "could not set pins\n"); } - if (os04d10->is_thunderboot) - return 0; + if (!os04d10->is_thunderboot) { + if (!IS_ERR(os04d10->reset_gpio)) + gpiod_set_value_cansleep(os04d10->reset_gpio, 0); - if (!IS_ERR(os04d10->reset_gpio)) - gpiod_set_value_cansleep(os04d10->reset_gpio, 0); + usleep_range(5000, 6000); - usleep_range(5000, 6000); + ret = regulator_bulk_enable(OS04D10_NUM_SUPPLIES, os04d10->supplies); + if (ret < 0) { + dev_err(dev, "Failed to enable regulators\n"); + goto disable_clk; + } - ret = regulator_bulk_enable(OS04D10_NUM_SUPPLIES, os04d10->supplies); - if (ret < 0) { - dev_err(dev, "Failed to enable regulators\n"); - goto disable_clk; - } + if (!IS_ERR(os04d10->reset_gpio)) + gpiod_set_value_cansleep(os04d10->reset_gpio, 1); - if (!IS_ERR(os04d10->reset_gpio)) - gpiod_set_value_cansleep(os04d10->reset_gpio, 1); + usleep_range(500, 1000); - usleep_range(500, 1000); + if (!IS_ERR(os04d10->reset_gpio)) + usleep_range(8000, 10000); + else + usleep_range(12000, 16000); - if (!IS_ERR(os04d10->reset_gpio)) - usleep_range(8000, 10000); - else usleep_range(12000, 16000); - - usleep_range(12000, 16000); - + } ret = clk_set_rate(os04d10->xvclk, OS04D10_XVCLK_FREQ); if (ret < 0) dev_warn(dev, "Failed to set xvclk rate (24MHz)\n"); @@ -908,6 +906,9 @@ static int __os04d10_power_on(struct os04d10 *os04d10) return ret; } + if (os04d10->is_thunderboot) + return 0; + /* 8192 cycles prior to first SCCB transaction */ delay_us = os04d10_cal_delay(8192); usleep_range(delay_us, delay_us * 2); @@ -1352,7 +1353,7 @@ static int os04d10_probe(struct i2c_client *client, if (IS_ERR(os04d10->reset_gpio)) dev_warn(dev, "Failed to get reset-gpios\n"); - if (!IS_ERR(os04d10->reset_gpio)) + if (!IS_ERR(os04d10->reset_gpio) && !os04d10->is_thunderboot) gpiod_set_value_cansleep(os04d10->reset_gpio, 0); os04d10->pinctrl = devm_pinctrl_get(dev); From 80bc7372a89dbeee1d99ceabdada4d29cd282f80 Mon Sep 17 00:00:00 2001 From: Lan Honglin Date: Wed, 27 Dec 2023 11:51:56 +0800 Subject: [PATCH 02/12] media: i2c: os04d10: add reg list of 1440p@30fps and 360p@120fps Change-Id: Ifda327acddd93f993c9ce0508ec6b02a9c997cf2 Signed-off-by: Lan Honglin --- drivers/media/i2c/os04d10.c | 332 ++++++++++++++++++++++++++++++++++++ 1 file changed, 332 insertions(+) diff --git a/drivers/media/i2c/os04d10.c b/drivers/media/i2c/os04d10.c index 1ebdea7eae00..24e34034bc6e 100644 --- a/drivers/media/i2c/os04d10.c +++ b/drivers/media/i2c/os04d10.c @@ -171,6 +171,308 @@ static const struct regval os04d10_global_regs[] = { {REG_NULL, 0x00}, }; +/* + * Xclk 24Mhz + * max_framerate 120fps + * mipi_datarate per lane 720Mbps, 2lane + * 4X4 binning to 640X360 + */ +static const struct regval os04d10_linear_10_640x360_regs[] = { + {0xfd, 0x00}, + {0x20, 0x00}, + {0x20, 0x01}, + {0x20, 0x01}, + {0x20, 0x01}, + {0x20, 0x01}, + {0x41, 0xa8}, + {0x45, 0x24}, + {0x30, 0x02}, + {0x31, 0x24}, + {0x35, 0xc9}, + {0x38, 0x15}, + {0xfd, 0x01}, + {0x03, 0x00}, + {0x04, 0x04}, + {0x06, 0x01}, + {0x24, 0xff}, + {0x31, 0x26}, + {0x02, 0x01}, + {0x42, 0x5a}, + {0x47, 0x0c}, + {0x45, 0x02}, + {0x48, 0x0c}, + {0x4b, 0x88}, + {0xd4, 0x05}, + {0xd5, 0xd2}, + {0xd7, 0x05}, + {0xd8, 0xd2}, + {0x50, 0x01}, + {0x51, 0x11}, + {0x52, 0x18}, + {0x53, 0x01}, + {0x54, 0x01}, + {0x55, 0x01}, + {0x57, 0x08}, + {0x5c, 0x40}, + {0x7c, 0x06}, + {0x7d, 0x05}, + {0x7e, 0x05}, + {0x7f, 0x05}, + {0x90, 0x60}, + {0x91, 0x0f}, + {0x92, 0x35}, + {0x93, 0x36}, + {0x94, 0x0f}, + {0x95, 0x7e}, + {0x98, 0x5d}, + {0xa8, 0x50}, + {0xaa, 0x14}, + {0xab, 0x05}, + {0xac, 0x14}, + {0xad, 0x05}, + {0xae, 0x4a}, + {0xaf, 0x0e}, + {0xb2, 0x07}, + {0xb3, 0x0c}, + {0xc9, 0x28}, + {0xca, 0x5e}, + {0xcb, 0x5e}, + {0xcc, 0x5e}, + {0xcd, 0x5e}, + {0xce, 0x5c}, + {0xcf, 0x5c}, + {0xd0, 0x5c}, + {0xd1, 0x5c}, + {0xd2, 0x7c}, + {0xd3, 0x7c}, + {0xdb, 0x0f}, + {0xfd, 0x01}, + {0x46, 0x77}, + {0xdd, 0x00}, + {0xde, 0x3f}, + {0xfd, 0x03}, + {0x2b, 0x0a}, + {0x01, 0x22}, + {0x02, 0x03}, + {0x00, 0x06}, + {0x2a, 0x22}, + {0x29, 0x0b}, + {0x1e, 0x10}, + {0x1f, 0x02}, + {0x1a, 0x24}, + {0x1b, 0x62}, + {0x1c, 0xce}, + {0x1d, 0xd3}, + {0x04, 0x0f}, + {0x36, 0x00}, + {0x37, 0x05}, + {0x38, 0x09}, + {0x39, 0x19}, + {0x3a, 0x38}, + {0x3b, 0x22}, + {0x3c, 0x22}, + {0x3d, 0x22}, + {0x3e, 0x03}, + {0xfd, 0x02}, + {0xc1, 0x05}, + {0x8c, 0x03}, + {0x8d, 0x01}, + {0x95, 0x02}, + {0x98, 0x02}, + {0x5e, 0x22}, + {0xa1, 0x00}, + {0xa2, 0x01}, + {0xa3, 0x68}, + {0xa5, 0x02}, + {0xa6, 0x02}, + {0xa7, 0x80}, + {0x8e, 0x02}, + {0x8f, 0x80}, + {0x90, 0x01}, + {0x91, 0x68}, + {0xce, 0x65}, + {0xfd, 0x03}, + {0x03, 0x30}, + {0x05, 0x00}, + {0x12, 0x70}, + {0x13, 0x70}, + {0x16, 0x13}, + {0x21, 0xca}, + {0x27, 0x95}, + {0x2c, 0x55}, + {0x2d, 0x08}, + {0x2e, 0xca}, + {0x3f, 0xe7}, + {0xfd, 0x00}, + {0x8b, 0x01}, + {0x8d, 0x00}, + {0xfd, 0x01}, + {0x01, 0x02}, + {0xfd, 0x05}, + {0xc4, 0x62}, + {0xc5, 0x62}, + {0xc6, 0x62}, + {0xc7, 0x62}, + {0xce, 0x3e}, + {0xf0, 0x40}, + {0xf1, 0x40}, + {0xf2, 0x40}, + {0xf3, 0x40}, + {0xf4, 0x00}, + {0xf9, 0x03}, + {0xfa, 0x5d}, + {0xfb, 0x6b}, + {0xb1, 0x01}, + {REG_NULL, 0x00}, +}; + +/* + * Xclk 24Mhz + * max_framerate 30fps + * mipi_datarate per lane 720Mbps, 2lane + * raw 10 + * 2560 x 1440 + */ +static const struct regval os04d10_linear_10_2560x1440_regs[] = { + {0xfd, 0x00}, + {0x20, 0x00}, + {0x20, 0x01}, + {0x20, 0x01}, + {0x20, 0x01}, + {0x20, 0x01}, + {0x41, 0xa8}, + {0x45, 0x24}, + {0x31, 0x20}, + {0x38, 0x15}, + {0xfd, 0x01}, + {0x03, 0x00}, + {0x04, 0x04}, + {0x06, 0x01}, + {0x24, 0xff}, + {0x02, 0x01}, + {0x42, 0x5a}, + {0x47, 0x0c}, + {0x45, 0x02}, + {0x48, 0x0c}, + {0x4b, 0x88}, + {0xd4, 0x05}, + {0xd5, 0xd2}, + {0xd7, 0x05}, + {0xd8, 0xd2}, + {0x50, 0x01}, + {0x51, 0x11}, + {0x52, 0x18}, + {0x53, 0x01}, + {0x54, 0x01}, + {0x55, 0x01}, + {0x57, 0x08}, + {0x5c, 0x40}, + {0x7c, 0x06}, + {0x7d, 0x05}, + {0x7e, 0x05}, + {0x7f, 0x05}, + {0x90, 0x60}, + {0x91, 0x0f}, + {0x92, 0x35}, + {0x93, 0x36}, + {0x94, 0x0f}, + {0x95, 0x7e}, + {0x98, 0x5d}, + {0xa8, 0x50}, + {0xaa, 0x14}, + {0xab, 0x05}, + {0xac, 0x14}, + {0xad, 0x05}, + {0xae, 0x4a}, + {0xaf, 0x0e}, + {0xb2, 0x07}, + {0xb3, 0x0c}, + {0xc9, 0x28}, + {0xca, 0x5e}, + {0xcb, 0x5e}, + {0xcc, 0x5e}, + {0xcd, 0x5e}, + {0xce, 0x5c}, + {0xcf, 0x5c}, + {0xd0, 0x5c}, + {0xd1, 0x5c}, + {0xd2, 0x7c}, + {0xd3, 0x7c}, + {0xdb, 0x0f}, + {0xfd, 0x01}, + {0x46, 0x77}, + {0xdd, 0x00}, + {0xde, 0x3f}, + {0xfd, 0x03}, + {0x2b, 0x0a}, + {0x01, 0x22}, + {0x02, 0x03}, + {0x00, 0x06}, + {0x2a, 0x22}, + {0x29, 0x0b}, + {0x1e, 0x10}, + {0x1f, 0x02}, + {0x1a, 0x24}, + {0x1b, 0x62}, + {0x1c, 0xce}, + {0x1d, 0xd3}, + {0x04, 0x0f}, + {0x36, 0x00}, + {0x37, 0x05}, + {0x38, 0x09}, + {0x39, 0x19}, + {0x3a, 0x38}, + {0x3b, 0x22}, + {0x3c, 0x22}, + {0x3d, 0x22}, + {0x3e, 0x03}, + {0xfd, 0x02}, + {0x5e, 0x22}, + {0xa1, 0x04}, + {0xa2, 0x05}, + {0xa3, 0xa0}, + {0xa5, 0x04}, + {0xa6, 0x0a}, + {0xa7, 0x00}, + {0x8e, 0x0a}, + {0x8f, 0x00}, + {0x90, 0x05}, + {0x91, 0xa0}, + {0xce, 0x65}, + {0xfd, 0x03}, + {0x03, 0x30}, + {0x05, 0x00}, + {0x12, 0x70}, + {0x13, 0x70}, + {0x16, 0x13}, + {0x21, 0xca}, + {0x27, 0x95}, + {0x2c, 0x55}, + {0x2d, 0x08}, + {0x2e, 0xca}, + {0x3f, 0xe7}, + {0xfd, 0x00}, + {0x8b, 0x01}, + {0x8d, 0x00}, + {0xfd, 0x01}, + {0x01, 0x02}, + {0xfd, 0x05}, + {0xc4, 0x62}, + {0xc5, 0x62}, + {0xc6, 0x62}, + {0xc7, 0x62}, + {0xf0, 0x40}, + {0xf1, 0x40}, + {0xf2, 0x40}, + {0xf3, 0x40}, + {0xf4, 0x00}, + {0xf9, 0x03}, + {0xfa, 0x5d}, + {0xfb, 0x6b}, + {0xb1, 0x01}, + {REG_NULL, 0x00}, +}; + /* * Xclk 24Mhz * max_framerate 15fps @@ -307,6 +609,21 @@ static const struct regval os04d10_linear_10_2568x1448_regs[] = { }; static const struct os04d10_mode supported_modes[] = { + { + .width = 2560, + .height = 1440, + .max_fps = { + .numerator = 10000, + .denominator = 300000, + }, + .exp_def = 0x0080, + .hts_def = 0x032e, + .vts_def = 0x05c1, + .bus_fmt = MEDIA_BUS_FMT_SBGGR10_1X10, + .reg_list = os04d10_linear_10_2560x1440_regs, + .hdr_mode = NO_HDR, + .vc[PAD0] = V4L2_MBUS_CSI2_CHANNEL_0, + }, { .width = 2568, .height = 1448, @@ -321,6 +638,21 @@ static const struct os04d10_mode supported_modes[] = { .reg_list = os04d10_linear_10_2568x1448_regs, .hdr_mode = NO_HDR, .vc[PAD0] = V4L2_MBUS_CSI2_CHANNEL_0, + }, + { + .width = 640, + .height = 360, + .max_fps = { + .numerator = 10000, + .denominator = 1200000, + }, + .exp_def = 0x0080, + .hts_def = 0x032e, + .vts_def = 0x0171, + .bus_fmt = MEDIA_BUS_FMT_SBGGR10_1X10, + .reg_list = os04d10_linear_10_640x360_regs, + .hdr_mode = NO_HDR, + .vc[PAD0] = V4L2_MBUS_CSI2_CHANNEL_0, } }; From 9d4ae424d0ddaba6f53d73a70507f8ce8ea93ab9 Mon Sep 17 00:00:00 2001 From: Zhang Yubing Date: Wed, 27 Dec 2023 16:49:00 +0800 Subject: [PATCH 03/12] mfd: rkx110_x120: disable phy when stop video stream Change-Id: I4efe4873dafc74d549fc8322fdb35490140eba5b Signed-off-by: Zhang Yubing --- drivers/mfd/rkx110_x120/rkx110_x120.h | 2 + drivers/mfd/rkx110_x120/rkx110_x120_display.c | 62 +++++++++++++++++-- drivers/mfd/rkx110_x120/rkx120.c | 11 ++++ drivers/mfd/rkx110_x120/rkx120_combtxphy.c | 14 +++++ 4 files changed, 85 insertions(+), 4 deletions(-) diff --git a/drivers/mfd/rkx110_x120/rkx110_x120.h b/drivers/mfd/rkx110_x120/rkx110_x120.h index 319a3286d895..ca875c91eccb 100644 --- a/drivers/mfd/rkx110_x120/rkx110_x120.h +++ b/drivers/mfd/rkx110_x120/rkx110_x120.h @@ -363,6 +363,8 @@ int rkx120_display_linkrx_enable(struct rk_serdes *serdes, int rkx120_rgb_tx_enable(struct rk_serdes *serdes, struct rk_serdes_route *route, u8 remote_id); int rkx120_lvds_tx_enable(struct rk_serdes *serdes, struct rk_serdes_route *route, u8 remote_id, u8 phy_id); +int rkx120_lvds_tx_disable(struct rk_serdes *serdes, struct rk_serdes_route *route, u8 remote_id, + u8 phy_id); void rkx120_linkrx_gpi_gpo_mux_cfg(struct rk_serdes *serdes, u32 mux, u8 remote_id); void rkx110_linktx_gpi_gpo_mux_cfg(struct rk_serdes *serdes, u32 mux, u8 remote_id); int rkx110_rgb_rx_enable(struct rk_serdes *serdes, struct rk_serdes_route *route); diff --git a/drivers/mfd/rkx110_x120/rkx110_x120_display.c b/drivers/mfd/rkx110_x120/rkx110_x120_display.c index 319edd1f38fc..dff07ed21230 100644 --- a/drivers/mfd/rkx110_x120/rkx110_x120_display.c +++ b/drivers/mfd/rkx110_x120/rkx110_x120_display.c @@ -185,11 +185,65 @@ int rk_serdes_display_route_enable(struct rk_serdes *serdes, struct rk_serdes_ro int rk_serdes_display_route_disable(struct rk_serdes *serdes, struct rk_serdes_route *route) { - if (route->remote0_port0 & RK_SERDES_DSI_TX0) - rkx120_dsi_tx_disable(serdes, route, DEVICE_REMOTE0); + if (route->remote0_port0) { + switch (route->remote0_port0) { + case RK_SERDES_RGB_TX: + break; + case RK_SERDES_LVDS_TX0: + rkx120_lvds_tx_disable(serdes, route, DEVICE_REMOTE0, 0); + break; + case RK_SERDES_LVDS_TX1: + rkx120_lvds_tx_disable(serdes, route, DEVICE_REMOTE0, 1); + break; + case RK_SERDES_DUAL_LVDS_TX: + rkx120_lvds_tx_disable(serdes, route, DEVICE_REMOTE0, 0); + rkx120_lvds_tx_disable(serdes, route, DEVICE_REMOTE0, 1); + break; + case RK_SERDES_DSI_TX0: + rkx120_dsi_tx_disable(serdes, route, DEVICE_REMOTE0); + break; + default: + dev_err(serdes->dev, "undefined remote0_port0\n"); + break; + } + } - if (route->remote1_port0 & RK_SERDES_DSI_TX0) - rkx120_dsi_tx_disable(serdes, route, DEVICE_REMOTE1); + if (route->remote1_port0) { + switch (route->remote1_port0) { + case RK_SERDES_RGB_TX: + break; + case RK_SERDES_LVDS_TX0: + rkx120_lvds_tx_disable(serdes, route, DEVICE_REMOTE1, 0); + break; + case RK_SERDES_LVDS_TX1: + rkx120_lvds_tx_disable(serdes, route, DEVICE_REMOTE1, 1); + break; + case RK_SERDES_DUAL_LVDS_TX: + rkx120_lvds_tx_disable(serdes, route, DEVICE_REMOTE1, 0); + rkx120_lvds_tx_disable(serdes, route, DEVICE_REMOTE1, 1); + break; + case RK_SERDES_DSI_TX0: + rkx120_dsi_tx_disable(serdes, route, DEVICE_REMOTE1); + break; + default: + dev_err(serdes->dev, "undefined remote1_port0\n"); + break; + } + } + + if (route->remote0_port1) { + switch (route->remote0_port1) { + case RK_SERDES_LVDS_TX0: + rkx120_lvds_tx_disable(serdes, route, DEVICE_REMOTE0, 0); + break; + case RK_SERDES_LVDS_TX1: + rkx120_lvds_tx_disable(serdes, route, DEVICE_REMOTE0, 1); + break; + default: + dev_err(serdes->dev, "undefined remote0_port1\n"); + break; + } + } if (serdes->version == SERDES_V1) { rk_serdes_display_video_start(serdes, route, false); diff --git a/drivers/mfd/rkx110_x120/rkx120.c b/drivers/mfd/rkx110_x120/rkx120.c index 2f38af1fe0db..eb7cfaff00a1 100644 --- a/drivers/mfd/rkx110_x120/rkx120.c +++ b/drivers/mfd/rkx110_x120/rkx120.c @@ -327,3 +327,14 @@ int rkx120_lvds_tx_enable(struct rk_serdes *serdes, struct rk_serdes_route *rout return 0; } + +int rkx120_lvds_tx_disable(struct rk_serdes *serdes, struct rk_serdes_route *route, u8 remote_id, + u8 phy_id) +{ + struct rk_serdes_panel *sd_panel = container_of(route, struct rk_serdes_panel, route); + struct rkx120_combtxphy *combtxphy = &sd_panel->combtxphy; + + rkx120_combtxphy_power_off(serdes, combtxphy, remote_id, phy_id); + + return 0; +} diff --git a/drivers/mfd/rkx110_x120/rkx120_combtxphy.c b/drivers/mfd/rkx110_x120/rkx120_combtxphy.c index 6e516c7826b3..7537c66bdc4e 100644 --- a/drivers/mfd/rkx110_x120/rkx120_combtxphy.c +++ b/drivers/mfd/rkx110_x120/rkx120_combtxphy.c @@ -307,6 +307,19 @@ static void rkx120_combtxphy_lvds_power_on(struct rk_serdes *des, des->i2c_write_reg(client, grf_base + GRF_MIPITX_CON13, TX_IDLE(0)); } +static void rkx120_combtxphy_lvds_power_off(struct rk_serdes *des, + struct rkx120_combtxphy *combtxphy, + u8 dev_id, u8 phy_id) +{ + struct i2c_client *client = des->chip[dev_id].client; + u32 grf_base = (phy_id == 0) ? + RKX120_GRF_MIPI0_BASE : RKX120_GRF_MIPI1_BASE; + + des->i2c_write_reg(client, grf_base + GRF_MIPITX_CON14, TX_PD(0)); + des->i2c_write_reg(client, grf_base + GRF_MIPITX_CON0, PHYSHUTDWN(0)); + des->i2c_write_reg(client, grf_base + GRF_MIPITX_CON1, PWON_PLL(0)); +} + void rkx120_combtxphy_power_on(struct rk_serdes *des, struct rkx120_combtxphy *combtxphy, u8 dev_id, u8 phy_id) { @@ -330,6 +343,7 @@ void rkx120_combtxphy_power_off(struct rk_serdes *des, struct rkx120_combtxphy * rkx120_combtxphy_dsi_power_off(des, dev_id); break; case COMBTX_PHY_MODE_VIDEO_LVDS: + rkx120_combtxphy_lvds_power_off(des, combtxphy, dev_id, phy_id); break; case COMBTX_PHY_MODE_GPIO: break; From bd48aceb6abc7e6b51933cb2d417c7e13ef54b3f Mon Sep 17 00:00:00 2001 From: LongChang Ma Date: Tue, 12 Dec 2023 10:53:30 +0800 Subject: [PATCH 04/12] media: i2c: cam-sleep-wakeup: optimize sensor wakeup workflow for reset Signed-off-by: LongChang Ma Change-Id: I3fa6752a404e8b9f70dfd5c75a554f5511e6e49a --- drivers/media/i2c/cam-sleep-wakeup.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/media/i2c/cam-sleep-wakeup.c b/drivers/media/i2c/cam-sleep-wakeup.c index f8a6c83369b4..22053f86e232 100644 --- a/drivers/media/i2c/cam-sleep-wakeup.c +++ b/drivers/media/i2c/cam-sleep-wakeup.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -155,9 +156,6 @@ int cam_sw_prepare_wakeup(struct cam_sw_info *info, struct device *dev) return -EINVAL; } - if (!IS_ERR(info->pin.reset_gpio)) - gpiod_set_value_cansleep(info->pin.reset_gpio, info->pin.reset_active_state); - if (!IS_ERR(info->pin.supplies) && info->pin.supplies_num) { ret = regulator_bulk_enable(info->pin.supplies_num, info->pin.supplies); if (ret != 0) @@ -183,7 +181,7 @@ int cam_sw_prepare_sleep(struct cam_sw_info *info) gpiod_set_value_cansleep(info->pin.pwdn_gpio, !info->pin.pwdn_active_state); if (!IS_ERR(info->pin.reset_gpio)) - gpiod_set_value_cansleep(info->pin.reset_gpio, !info->pin.reset_active_state); + gpiod_set_value_cansleep(info->pin.reset_gpio, info->pin.reset_active_state); if (!IS_ERR_OR_NULL(info->pin.pins_sleep)) pinctrl_select_state(info->pin.pinctrl, info->pin.pins_sleep); From 730004820bcdd4385ff492f8274c43552e40ca89 Mon Sep 17 00:00:00 2001 From: Zhang Yubing Date: Thu, 21 Dec 2023 19:14:59 +0800 Subject: [PATCH 05/12] mfd: rkx110_x120: reset dsi tx host when generator pattern Change-Id: I92b4733152532c62e79d39eabdf2ec99d006a257 Signed-off-by: Zhang Yubing --- drivers/mfd/rkx110_x120/pattern_gen.c | 9 ++++++++- drivers/mfd/rkx110_x120/rkx120_dsi_tx.c | 6 ++++++ drivers/mfd/rkx110_x120/rkx120_dsi_tx.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/rkx110_x120/pattern_gen.c b/drivers/mfd/rkx110_x120/pattern_gen.c index 8ad25745280a..32762f16c035 100644 --- a/drivers/mfd/rkx110_x120/pattern_gen.c +++ b/drivers/mfd/rkx110_x120/pattern_gen.c @@ -9,6 +9,7 @@ #include "rkx110_x120.h" #include "rkx110_x120_display.h" +#include "rkx120_dsi_tx.h" #include "hal/cru_api.h" #define PATTERN_GEN_PATTERN_CTRL 0x0000 @@ -114,7 +115,7 @@ static void pattern_start_stream(struct pattern_gen *pattern_gen, bool is_patter return; if (pattern_gen->chip != &serdes->chip[DEVICE_LOCAL]) - return; + goto out; if (!strcmp(pattern_gen->name, "lvds0")) { hwclk_reset_deassert(serdes->chip[DEVICE_LOCAL].hwclk, @@ -170,6 +171,12 @@ static void pattern_start_stream(struct pattern_gen *pattern_gen, bool is_patter } rk_serdes_display_video_start(serdes, pattern_gen->route, true); + +out: + if (pattern_gen->route->remote0_port0 == RK_SERDES_DSI_TX0) + rkx120_dsi_tx_reset(serdes, DEVICE_REMOTE0); + if (pattern_gen->route->remote1_port0 == RK_SERDES_DSI_TX0) + rkx120_dsi_tx_reset(serdes, DEVICE_REMOTE1); } static void pattern_switch_clk_to_pattern(struct pattern_gen *pattern_gen, struct videomode *vm) diff --git a/drivers/mfd/rkx110_x120/rkx120_dsi_tx.c b/drivers/mfd/rkx110_x120/rkx120_dsi_tx.c index 4e262c4ea1d9..86c5e4dc81d6 100644 --- a/drivers/mfd/rkx110_x120/rkx120_dsi_tx.c +++ b/drivers/mfd/rkx110_x120/rkx120_dsi_tx.c @@ -1181,3 +1181,9 @@ void rkx120_dsi_tx_disable(struct rk_serdes *des, struct rk_serdes_route *route, dsi_write(des, remote_id, DSI_MODE_CFG, CMD_VIDEO_MODE(COMMAND_MODE)); dsi_write(des, remote_id, DSI_PWR_UP, POWER_UP); } + +void rkx120_dsi_tx_reset(struct rk_serdes *des, u8 remote_id) +{ + dsi_write(des, remote_id, DSI_PWR_UP, RESET); + dsi_write(des, remote_id, DSI_PWR_UP, POWER_UP); +} diff --git a/drivers/mfd/rkx110_x120/rkx120_dsi_tx.h b/drivers/mfd/rkx110_x120/rkx120_dsi_tx.h index 1703766c3608..b426fc583f22 100644 --- a/drivers/mfd/rkx110_x120/rkx120_dsi_tx.h +++ b/drivers/mfd/rkx110_x120/rkx120_dsi_tx.h @@ -20,4 +20,5 @@ void rkx120_dsi_tx_post_disable(struct rk_serdes *serdes, struct rk_serdes_route *route, u8 remote_id); void rkx120_dsi_tx_disable(struct rk_serdes *serdes, struct rk_serdes_route *route, u8 remote_id); +void rkx120_dsi_tx_reset(struct rk_serdes *des, u8 remote_id); #endif From 4de536c973e35b7614bcaac26625cfbae4e7d810 Mon Sep 17 00:00:00 2001 From: Zefa Chen Date: Thu, 28 Dec 2023 09:26:33 +0800 Subject: [PATCH 06/12] media: rockchip: vicap: support use reserved mem for aov when run with thunderboot mode Signed-off-by: Zefa Chen Change-Id: Ic0dca3b4e8b32c217abece05a6566beb54bc6eaa --- drivers/media/platform/rockchip/cif/capture.c | 22 ++++++++++++++----- drivers/media/platform/rockchip/cif/dev.c | 3 +++ drivers/media/platform/rockchip/cif/dev.h | 3 +++ .../media/platform/rockchip/cif/subdev-itf.c | 10 +++++++-- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/media/platform/rockchip/cif/capture.c b/drivers/media/platform/rockchip/cif/capture.c index dfa9d8ed5d5d..209d4969d04d 100644 --- a/drivers/media/platform/rockchip/cif/capture.c +++ b/drivers/media/platform/rockchip/cif/capture.c @@ -4569,6 +4569,7 @@ void rkcif_free_rx_buf(struct rkcif_stream *stream, int buf_num) phys_addr_t resmem_free_start; phys_addr_t resmem_free_end; u32 share_head_size = 0; + u32 rtt_min_size = 0; if (!priv) return; @@ -4577,7 +4578,7 @@ void rkcif_free_rx_buf(struct rkcif_stream *stream, int buf_num) if (!sd) return; - if (dev->is_rtt_suspend && dev->is_thunderboot) { + if ((dev->is_rtt_suspend || dev->is_aov_reserved) && dev->is_thunderboot) { stream->curr_buf_toisp = NULL; stream->next_buf_toisp = NULL; INIT_LIST_HEAD(&stream->rx_buf_head); @@ -4596,7 +4597,14 @@ void rkcif_free_rx_buf(struct rkcif_stream *stream, int buf_num) v4l2_info(&stream->cifdev->v4l2_dev, "share mem head error, rtt head size %d, arm head size %d\n", dev->share_mem_size, share_head_size); - resmem_free_start = dev->resmem_pa + share_head_size + dev->nr_buf_size; + if (share_head_size + dev->nr_buf_size > stream->pixm.plane_fmt[0].sizeimage) + rtt_min_size = share_head_size + dev->nr_buf_size; + else + rtt_min_size = stream->pixm.plane_fmt[0].sizeimage; + if (dev->is_rtt_suspend) + resmem_free_start = dev->resmem_pa + rtt_min_size; + else + resmem_free_start = dev->resmem_pa + stream->pixm.plane_fmt[0].sizeimage; resmem_free_end = dev->resmem_pa + dev->resmem_size; v4l2_info(&stream->cifdev->v4l2_dev, "free reserved mem start 0x%x, end 0x%x, share_head_size 0x%x, nr_buf_size 0x%x\n", @@ -4604,6 +4612,10 @@ void rkcif_free_rx_buf(struct rkcif_stream *stream, int buf_num) free_reserved_area(phys_to_virt(resmem_free_start), phys_to_virt(resmem_free_end), -1, "rkisp_thunderboot"); + if (dev->is_rtt_suspend) + dev->resmem_size = rtt_min_size; + else + dev->resmem_size = stream->pixm.plane_fmt[0].sizeimage; } atomic_set(&stream->buf_cnt, 0); stream->total_buf_num = 0; @@ -4692,7 +4704,7 @@ int rkcif_init_rx_buf(struct rkcif_stream *stream, int buf_num) dummy->size = pixm->plane_fmt[0].sizeimage; dummy->is_need_vaddr = true; dummy->is_need_dbuf = true; - if (dev->is_thunderboot) { + if (dev->is_thunderboot || dev->is_rtt_suspend || dev->is_aov_reserved) { if (i == 0) rkcif_get_resmem_head(dev); buf->buf_idx = i; @@ -7295,8 +7307,6 @@ static bool rkcif_check_can_be_online(struct rkcif_device *cif_dev) static int rkcif_do_reset_work(struct rkcif_device *cif_dev, enum rkmodule_reset_src reset_src); -static bool rkcif_check_single_dev_stream_on(struct rkcif_hw *hw); - static long rkcif_ioctl_default(struct file *file, void *fh, bool valid_prio, unsigned int cmd, void *arg) { @@ -10467,7 +10477,7 @@ static bool rkcif_check_buffer_prepare(struct rkcif_stream *stream) return is_update; } -static bool rkcif_check_single_dev_stream_on(struct rkcif_hw *hw) +bool rkcif_check_single_dev_stream_on(struct rkcif_hw *hw) { struct rkcif_device *cif_dev = NULL; struct rkcif_stream *stream = NULL; diff --git a/drivers/media/platform/rockchip/cif/dev.c b/drivers/media/platform/rockchip/cif/dev.c index 260fb83d9305..c6988c76e38a 100644 --- a/drivers/media/platform/rockchip/cif/dev.c +++ b/drivers/media/platform/rockchip/cif/dev.c @@ -2152,6 +2152,7 @@ static int rkcif_get_reserved_mem(struct rkcif_device *cif_dev) cif_dev->is_thunderboot = false; cif_dev->is_rtt_suspend = false; + cif_dev->is_aov_reserved = false; /* Get reserved memory region from Device-tree */ np = of_parse_phandle(dev->of_node, "memory-region-thunderboot", 0); if (!np) { @@ -2173,6 +2174,8 @@ static int rkcif_get_reserved_mem(struct rkcif_device *cif_dev) if (device_property_read_bool(dev, "rtt-suspend")) cif_dev->is_rtt_suspend = true; + if (device_property_read_bool(dev, "aov-reserved")) + cif_dev->is_aov_reserved = true; if (IS_ENABLED(CONFIG_VIDEO_ROCKCHIP_THUNDER_BOOT_ISP)) cif_dev->is_thunderboot = true; dev_info(dev, "Allocated reserved memory, paddr: 0x%x, size 0x%x\n", diff --git a/drivers/media/platform/rockchip/cif/dev.h b/drivers/media/platform/rockchip/cif/dev.h index 31316c003a16..912e41abb48a 100644 --- a/drivers/media/platform/rockchip/cif/dev.h +++ b/drivers/media/platform/rockchip/cif/dev.h @@ -907,6 +907,7 @@ struct rkcif_device { bool is_rdbk_to_online; bool is_support_tools; bool is_rtt_suspend; + bool is_aov_reserved; bool sensor_state_change; bool is_toisp_reset; int rdbk_debug; @@ -1032,4 +1033,6 @@ static inline u64 rkcif_time_get_ns(struct rkcif_device *dev) return ktime_get_ns(); } +bool rkcif_check_single_dev_stream_on(struct rkcif_hw *hw); + #endif diff --git a/drivers/media/platform/rockchip/cif/subdev-itf.c b/drivers/media/platform/rockchip/cif/subdev-itf.c index 441b7df79cc0..971ff11055c9 100644 --- a/drivers/media/platform/rockchip/cif/subdev-itf.c +++ b/drivers/media/platform/rockchip/cif/subdev-itf.c @@ -828,6 +828,7 @@ static int sditf_s_rx_buffer(struct v4l2_subdev *sd, u32 diff_time = 1000000; u32 early_time = 0; bool is_free = false; + bool is_single_dev = false; if (!buf) { v4l2_err(&cif_dev->v4l2_dev, "buf is NULL\n"); @@ -868,9 +869,12 @@ static int sditf_s_rx_buffer(struct v4l2_subdev *sd, stream->last_rx_buf_idx = dbufs->sequence + 1; atomic_inc(&stream->buf_cnt); + is_single_dev = rkcif_check_single_dev_stream_on(cif_dev->hw_dev); if (!list_empty(&stream->rx_buf_head) && cif_dev->is_thunderboot && - (!cif_dev->is_rtt_suspend) && + ((!cif_dev->is_rtt_suspend && + !cif_dev->is_aov_reserved) || + !is_single_dev) && (dbufs->type == BUF_SHORT || (dbufs->type != BUF_SHORT && (!dbufs->is_switch)))) { spin_lock_irqsave(&cif_dev->buffree_lock, buffree_flags); @@ -899,7 +903,9 @@ static int sditf_s_rx_buffer(struct v4l2_subdev *sd, offset = rx_buf->dummy.size - stream->pixm.plane_fmt[0].bytesperline * 3; memset(rx_buf->dummy.vaddr + offset, 0x00, stream->pixm.plane_fmt[0].bytesperline * 3); - if (cif_dev->is_thunderboot) + if (cif_dev->is_thunderboot || + cif_dev->is_rtt_suspend || + cif_dev->is_aov_reserved) dma_sync_single_for_device(cif_dev->dev, rx_buf->dummy.dma_addr + rx_buf->dummy.size - stream->pixm.plane_fmt[0].bytesperline * 3, From b0e840f8a4378fe0c550312f465907f6dc972c85 Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Thu, 21 Dec 2023 17:01:46 +0800 Subject: [PATCH 07/12] drm/rockchip: analogix_dp: add dual channel mode support Signed-off-by: Damon Ding Change-Id: I5d98ddc887da6c1a36be72d930d8b2312be0e446 --- drivers/gpu/drm/bridge/analogix/analogix_dp_core.c | 2 +- drivers/gpu/drm/rockchip/analogix_dp-rockchip.c | 5 ++++- include/drm/bridge/analogix_dp.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 53264c731c69..dda150cec4a0 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1325,7 +1325,7 @@ static int analogix_dp_get_modes(struct drm_connector *connector) if (dp->plat_data->get_modes) num_modes += dp->plat_data->get_modes(dp->plat_data, connector); - if (num_modes > 0 && dp->plat_data->split_mode) { + if (num_modes > 0 && dp->plat_data->split_mode && !dp->plat_data->dual_channel_mode) { struct drm_display_mode *mode; list_for_each_entry(mode, &connector->probed_modes, head) diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index 9f09525b4bc0..b6464e5f19f7 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -689,7 +689,9 @@ static int rockchip_dp_probe(struct platform_device *pdev) if (IS_ERR(dp->adp)) return PTR_ERR(dp->adp); - if (dp->data->split_mode && device_property_read_bool(dev, "split-mode")) { + if (dp->data->split_mode && + (device_property_read_bool(dev, "split-mode") || + device_property_read_bool(dev, "dual-channel"))) { struct rockchip_dp_device *secondary = rockchip_dp_find_by_id(dev->driver, !dp->id); if (!secondary) { @@ -699,6 +701,7 @@ static int rockchip_dp_probe(struct platform_device *pdev) dp->plat_data.right = secondary->adp; dp->plat_data.split_mode = true; + dp->plat_data.dual_channel_mode = device_property_read_bool(dev, "dual-channel"); secondary->plat_data.panel = dp->plat_data.panel; secondary->plat_data.left = dp->adp; secondary->plat_data.split_mode = true; diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h index 09f0dfeff6e8..46e29e4c2096 100644 --- a/include/drm/bridge/analogix_dp.h +++ b/include/drm/bridge/analogix_dp.h @@ -43,6 +43,7 @@ struct analogix_dp_plat_data { bool ssc; bool split_mode; + bool dual_channel_mode; /* split with other display interface */ bool dual_connector_split; From e2f9938790a6d9eed3c285c90be45575b6e4c70f Mon Sep 17 00:00:00 2001 From: Algea Cao Date: Tue, 26 Dec 2023 14:50:04 +0800 Subject: [PATCH 08/12] phy: rockchip-samsung-hdptx-hdmi: Calculate the pll frequency based on the register value Change-Id: Ifd069b1c26efc6b57065e98a4d694c069c29173f Signed-off-by: Algea Cao --- .../phy-rockchip-samsung-hdptx-hdmi.c | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx-hdmi.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx-hdmi.c index 15f361facdf3..ccd0874551f1 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx-hdmi.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx-hdmi.c @@ -892,7 +892,7 @@ static inline void hdptx_grf_write(struct rockchip_hdptx_phy *hdptx, u32 reg, u3 regmap_write(hdptx->grf, reg, val); } -static inline u8 hdptx_grf_read(struct rockchip_hdptx_phy *hdptx, u32 reg) +static inline u32 hdptx_grf_read(struct rockchip_hdptx_phy *hdptx, u32 reg) { u32 val; @@ -2045,12 +2045,48 @@ static void rockchip_hdptx_phy_runtime_disable(void *data) pm_runtime_disable(hdptx->dev); } +#define PLL_REF_CLK 24000000ULL + static unsigned long hdptx_phy_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { struct rockchip_hdptx_phy *hdptx = to_rockchip_hdptx_phy(hw); + u8 mdiv, sdiv, sdm_num, sdm_deno, sdc_n, sdc_num, sdc_deno; + u64 fout, sdm; + u32 val; + bool sdm_en, sdm_num_sign; - return hdptx->rate; + if (hdptx->rate) + return hdptx->rate; + + val = hdptx_grf_read(hdptx, GRF_HDPTX_CON0); + if (!(val & HDPTX_I_PLL_EN)) + return 0; + + mdiv = hdptx_read(hdptx, CMN_REG0051); + sdm_en = hdptx_read(hdptx, CMN_REG005E) & ROPLL_SDM_EN_MASK; + sdm_num_sign = hdptx_read(hdptx, CMN_REG0064) & ROPLL_SDM_NUM_SIGN_RBR_MASK; + sdm_num = hdptx_read(hdptx, CMN_REG0065); + sdm_deno = hdptx_read(hdptx, CMN_REG0060); + sdc_n = (hdptx_read(hdptx, CMN_REG0069) & ROPLL_SDC_N_RBR_MASK) + 3; + sdc_num = hdptx_read(hdptx, CMN_REG006C); + sdc_deno = hdptx_read(hdptx, CMN_REG0070); + sdiv = ((hdptx_read(hdptx, CMN_REG0086) & PLL_PCG_POSTDIV_SEL_MASK) >> 4) + 1; + + fout = PLL_REF_CLK * mdiv; + if (sdm_en) { + sdm = div_u64(PLL_REF_CLK * sdc_deno * mdiv * sdm_num, + 16 * sdm_deno * (sdc_deno * sdc_n - sdc_num)); + + if (sdm_num_sign) + fout = fout - sdm; + else + fout = fout + sdm; + } + + fout = div_u64(fout * 2, sdiv * 10); + + return fout; } static long hdptx_phy_clk_round_rate(struct clk_hw *hw, unsigned long rate, From a94d0b968ad6275250a6b6bd66f75dcb688ef253 Mon Sep 17 00:00:00 2001 From: Zhen Chen Date: Mon, 5 Jun 2023 10:40:56 +0800 Subject: [PATCH 09/12] MALI: rockchip: upgrade bifrost DDK to g21p0-01eac0, from g18p0-01eac0 NOTE: For RK3588, the mali_csffw.bin used with this driver MUST be from DDK g21p0-01eac0 correspondingly. Change-Id: Ifab61806a6a350ba53c5dc0296d20628c28d633a Signed-off-by: Zhen Chen --- Documentation/ABI/testing/sysfs-device-mali | 29 +- .../sysfs-device-mali-coresight-source | 128 +- Documentation/csf_sync_state_dump.txt | 64 +- .../devicetree/bindings/arm/mali-bifrost.txt | 32 +- .../bindings/arm/mali-coresight-source.txt | 58 +- drivers/base/arm/Kbuild | 3 +- drivers/base/arm/Kconfig | 2 +- drivers/base/arm/Makefile | 116 +- .../dma-buf-test-exporter.c | 180 +- .../memory_group_manager.c | 185 +- .../protected_memory_allocator.c | 151 +- drivers/gpu/arm/bifrost/Kbuild | 10 +- drivers/gpu/arm/bifrost/Kconfig | 62 +- drivers/gpu/arm/bifrost/Makefile | 321 +- .../arm/bifrost/arbiter/mali_kbase_arbif.c | 20 +- .../bifrost/arbiter/mali_kbase_arbiter_pm.c | 245 +- .../bifrost/arbiter/mali_kbase_arbiter_pm.h | 14 +- drivers/gpu/arm/bifrost/backend/gpu/Kbuild | 4 +- .../backend/gpu/mali_kbase_backend_config.h | 3 +- .../gpu/mali_kbase_cache_policy_backend.c | 82 +- .../gpu/mali_kbase_cache_policy_backend.h | 31 +- .../gpu/mali_kbase_clk_rate_trace_mgr.c | 73 +- .../gpu/mali_kbase_clk_rate_trace_mgr.h | 30 +- .../gpu/mali_kbase_debug_job_fault_backend.c | 158 +- .../bifrost/backend/gpu/mali_kbase_devfreq.c | 142 +- .../bifrost/backend/gpu/mali_kbase_devfreq.h | 17 +- .../backend/gpu/mali_kbase_gpuprops_backend.c | 201 +- .../backend/gpu/mali_kbase_instr_backend.c | 93 +- .../backend/gpu/mali_kbase_irq_internal.h | 9 +- .../backend/gpu/mali_kbase_irq_linux.c | 152 +- .../bifrost/backend/gpu/mali_kbase_jm_as.c | 30 +- .../bifrost/backend/gpu/mali_kbase_jm_defs.h | 12 +- .../bifrost/backend/gpu/mali_kbase_jm_hw.c | 485 +-- .../backend/gpu/mali_kbase_jm_internal.h | 14 +- .../bifrost/backend/gpu/mali_kbase_jm_rb.c | 653 ++- .../backend/gpu/mali_kbase_js_backend.c | 186 +- .../backend/gpu/mali_kbase_l2_mmu_config.c | 59 +- .../backend/gpu/mali_kbase_model_dummy.c | 844 ++-- .../backend/gpu/mali_kbase_model_dummy.h | 74 +- .../gpu/mali_kbase_model_error_generator.c | 39 +- .../backend/gpu/mali_kbase_model_linux.c | 58 +- .../backend/gpu/mali_kbase_pm_always_on.c | 20 +- .../backend/gpu/mali_kbase_pm_always_on.h | 3 +- .../backend/gpu/mali_kbase_pm_backend.c | 232 +- .../bifrost/backend/gpu/mali_kbase_pm_ca.c | 25 +- .../bifrost/backend/gpu/mali_kbase_pm_ca.h | 4 +- .../backend/gpu/mali_kbase_pm_ca_devfreq.h | 3 +- .../backend/gpu/mali_kbase_pm_coarse_demand.c | 18 +- .../bifrost/backend/gpu/mali_kbase_pm_defs.h | 49 +- .../backend/gpu/mali_kbase_pm_driver.c | 1468 ++++--- .../backend/gpu/mali_kbase_pm_internal.h | 117 +- .../backend/gpu/mali_kbase_pm_metrics.c | 88 +- .../backend/gpu/mali_kbase_pm_policy.c | 61 +- .../backend/gpu/mali_kbase_pm_policy.h | 15 +- .../arm/bifrost/backend/gpu/mali_kbase_time.c | 295 +- drivers/gpu/arm/bifrost/build.bp | 34 +- .../context/backend/mali_kbase_context_csf.c | 55 +- .../context/backend/mali_kbase_context_jm.c | 74 +- .../arm/bifrost/context/mali_kbase_context.c | 86 +- .../arm/bifrost/context/mali_kbase_context.h | 29 +- .../context/mali_kbase_context_internal.h | 10 +- drivers/gpu/arm/bifrost/csf/Kbuild | 7 +- .../ipa_control/mali_kbase_csf_ipa_control.c | 403 +- .../ipa_control/mali_kbase_csf_ipa_control.h | 23 +- drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c | 1084 +++-- drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h | 107 +- .../bifrost/csf/mali_kbase_csf_cpu_queue.c | 132 + .../bifrost/csf/mali_kbase_csf_cpu_queue.h | 90 + .../csf/mali_kbase_csf_cpu_queue_debugfs.c | 136 +- .../csf/mali_kbase_csf_cpu_queue_debugfs.h | 59 +- .../gpu/arm/bifrost/csf/mali_kbase_csf_csg.c | 648 +++ .../gpu/arm/bifrost/csf/mali_kbase_csf_csg.h | 59 + .../bifrost/csf/mali_kbase_csf_csg_debugfs.c | 589 +-- .../bifrost/csf/mali_kbase_csf_csg_debugfs.h | 16 +- .../gpu/arm/bifrost/csf/mali_kbase_csf_defs.h | 189 +- .../arm/bifrost/csf/mali_kbase_csf_event.c | 53 +- .../arm/bifrost/csf/mali_kbase_csf_event.h | 17 +- .../arm/bifrost/csf/mali_kbase_csf_firmware.c | 1095 ++--- .../arm/bifrost/csf/mali_kbase_csf_firmware.h | 136 +- .../bifrost/csf/mali_kbase_csf_firmware_cfg.c | 170 +- .../bifrost/csf/mali_kbase_csf_firmware_cfg.h | 36 +- .../csf/mali_kbase_csf_firmware_core_dump.c | 37 +- .../bifrost/csf/mali_kbase_csf_firmware_log.c | 168 +- .../csf/mali_kbase_csf_firmware_no_mali.c | 453 +- .../csf/mali_kbase_csf_heap_context_alloc.c | 66 +- .../csf/mali_kbase_csf_heap_context_alloc.h | 20 +- .../gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c | 1042 +++-- .../gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h | 48 +- .../bifrost/csf/mali_kbase_csf_kcpu_debugfs.c | 70 +- .../csf/mali_kbase_csf_kcpu_fence_debugfs.c | 157 + .../mali_kbase_csf_kcpu_fence_debugfs.h} | 29 +- .../csf/mali_kbase_csf_mcu_shared_reg.c | 58 +- .../csf/mali_kbase_csf_protected_memory.c | 44 +- .../csf/mali_kbase_csf_protected_memory.h | 20 +- .../bifrost/csf/mali_kbase_csf_registers.h | 1254 +++--- .../bifrost/csf/mali_kbase_csf_reset_gpu.c | 143 +- .../bifrost/csf/mali_kbase_csf_scheduler.c | 2481 ++++++----- .../bifrost/csf/mali_kbase_csf_scheduler.h | 120 +- .../gpu/arm/bifrost/csf/mali_kbase_csf_sync.c | 838 ++++ .../gpu/arm/bifrost/csf/mali_kbase_csf_sync.h | 51 + .../bifrost/csf/mali_kbase_csf_sync_debugfs.c | 725 +--- .../bifrost/csf/mali_kbase_csf_sync_debugfs.h | 4 +- .../bifrost/csf/mali_kbase_csf_tiler_heap.c | 82 +- .../bifrost/csf/mali_kbase_csf_tiler_heap.h | 7 +- .../csf/mali_kbase_csf_tiler_heap_debugfs.c | 26 +- .../csf/mali_kbase_csf_tiler_heap_def.h | 8 +- .../csf/mali_kbase_csf_tiler_heap_reclaim.c | 36 +- .../csf/mali_kbase_csf_tiler_heap_reclaim.h | 6 +- .../arm/bifrost/csf/mali_kbase_csf_timeout.c | 42 +- .../bifrost/csf/mali_kbase_csf_tl_reader.c | 116 +- .../bifrost/csf/mali_kbase_csf_tl_reader.h | 8 +- .../bifrost/csf/mali_kbase_csf_trace_buffer.c | 225 +- .../bifrost/csf/mali_kbase_csf_trace_buffer.h | 44 +- .../gpu/arm/bifrost/csf/mali_kbase_csf_util.c | 242 ++ .../gpu/arm/bifrost/csf/mali_kbase_csf_util.h | 101 + .../bifrost/csf/mali_kbase_debug_csf_fault.c | 16 +- .../bifrost/csf/mali_kbase_debug_csf_fault.h | 9 +- .../backend/mali_kbase_debug_coresight_csf.c | 24 +- .../mali_kbase_debug_ktrace_codes_csf.h | 24 +- .../mali_kbase_debug_ktrace_codes_jm.h | 19 +- .../backend/mali_kbase_debug_ktrace_csf.c | 77 +- .../backend/mali_kbase_debug_ktrace_csf.h | 129 +- .../mali_kbase_debug_ktrace_defs_csf.h | 11 +- .../backend/mali_kbase_debug_ktrace_defs_jm.h | 9 +- .../backend/mali_kbase_debug_ktrace_jm.c | 59 +- .../backend/mali_kbase_debug_ktrace_jm.h | 252 +- .../mali_kbase_debug_linux_ktrace_csf.h | 163 +- .../mali_kbase_debug_linux_ktrace_jm.h | 105 +- .../bifrost/debug/mali_kbase_debug_ktrace.c | 90 +- .../bifrost/debug/mali_kbase_debug_ktrace.h | 89 +- .../debug/mali_kbase_debug_ktrace_codes.h | 34 +- .../debug/mali_kbase_debug_ktrace_defs.h | 33 +- .../debug/mali_kbase_debug_ktrace_internal.h | 14 +- .../debug/mali_kbase_debug_linux_ktrace.h | 36 +- .../device/backend/mali_kbase_device_csf.c | 113 +- .../device/backend/mali_kbase_device_hw_csf.c | 118 +- .../device/backend/mali_kbase_device_hw_jm.c | 60 +- .../device/backend/mali_kbase_device_jm.c | 22 +- .../arm/bifrost/device/mali_kbase_device.c | 180 +- .../arm/bifrost/device/mali_kbase_device.h | 39 +- .../arm/bifrost/device/mali_kbase_device_hw.c | 162 +- .../device/mali_kbase_device_internal.h | 25 +- .../gpu/backend/mali_kbase_gpu_regmap_csf.h | 381 -- .../gpu/backend/mali_kbase_gpu_regmap_jm.h | 276 -- drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c | 4 +- .../arm/bifrost/gpu/mali_kbase_gpu_regmap.h | 637 --- drivers/gpu/arm/bifrost/hw_access/Kbuild | 33 + .../mali_kbase_hw_access_model_linux.c | 151 + .../backend/mali_kbase_hw_access_real_hw.c | 169 + .../bifrost/hw_access/mali_kbase_hw_access.c | 128 + .../bifrost/hw_access/mali_kbase_hw_access.h | 189 + .../hw_access/mali_kbase_hw_access_regmap.h | 507 +++ .../mali_kbase_hw_access_regmap_legacy.h | 240 ++ .../hw_access/regmap/mali_kbase_regmap_csf.c | 1525 +++++++ .../regmap/mali_kbase_regmap_csf_enums.h | 408 ++ .../regmap/mali_kbase_regmap_csf_macros.h | 441 ++ .../hw_access/regmap/mali_kbase_regmap_jm.c | 2978 +++++++++++++ .../regmap/mali_kbase_regmap_jm_enums.h | 762 ++++ .../regmap/mali_kbase_regmap_jm_macros.h | 297 ++ .../regmap/mali_kbase_regmap_legacy_csf.h | 85 + .../regmap/mali_kbase_regmap_legacy_jm.h | 108 + drivers/gpu/arm/bifrost/hwcnt/Kbuild | 1 - .../hwcnt/backend/mali_kbase_hwcnt_backend.h | 16 +- .../backend/mali_kbase_hwcnt_backend_csf.c | 360 +- .../backend/mali_kbase_hwcnt_backend_csf.h | 15 +- .../backend/mali_kbase_hwcnt_backend_csf_if.h | 102 +- .../mali_kbase_hwcnt_backend_csf_if_fw.c | 72 +- .../backend/mali_kbase_hwcnt_backend_jm.c | 128 +- .../mali_kbase_hwcnt_backend_jm_watchdog.c | 19 +- .../gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c | 36 +- .../arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c | 678 +-- .../arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h | 68 +- .../hwcnt/mali_kbase_hwcnt_gpu_narrow.c | 298 -- .../hwcnt/mali_kbase_hwcnt_gpu_narrow.h | 330 -- .../bifrost/hwcnt/mali_kbase_hwcnt_types.c | 338 +- .../bifrost/hwcnt/mali_kbase_hwcnt_types.h | 648 +-- .../hwcnt/mali_kbase_hwcnt_virtualizer.c | 18 +- .../mali_kbase_ipa_counter_common_csf.c | 112 +- .../mali_kbase_ipa_counter_common_csf.h | 13 +- .../mali_kbase_ipa_counter_common_jm.c | 72 +- .../mali_kbase_ipa_counter_common_jm.h | 33 +- .../ipa/backend/mali_kbase_ipa_counter_csf.c | 203 +- .../ipa/backend/mali_kbase_ipa_counter_jm.c | 147 +- drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c | 219 +- drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h | 28 +- .../arm/bifrost/ipa/mali_kbase_ipa_debugfs.c | 65 +- .../arm/bifrost/ipa/mali_kbase_ipa_debugfs.h | 25 +- .../arm/bifrost/ipa/mali_kbase_ipa_simple.c | 76 +- .../gpu/arm/bifrost/jm/mali_kbase_jm_defs.h | 57 +- drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h | 137 +- .../gpu/arm/bifrost/jm/mali_kbase_js_defs.h | 37 +- .../arm/bifrost/mali_base_hwconfig_features.h | 114 +- .../arm/bifrost/mali_base_hwconfig_issues.h | 836 ++-- drivers/gpu/arm/bifrost/mali_kbase.h | 235 +- .../arm/bifrost/mali_kbase_as_fault_debugfs.c | 23 +- .../arm/bifrost/mali_kbase_as_fault_debugfs.h | 13 +- .../gpu/arm/bifrost/mali_kbase_cache_policy.c | 11 +- .../gpu/arm/bifrost/mali_kbase_cache_policy.h | 4 +- drivers/gpu/arm/bifrost/mali_kbase_caps.h | 4 +- drivers/gpu/arm/bifrost/mali_kbase_ccswe.c | 14 +- drivers/gpu/arm/bifrost/mali_kbase_ccswe.h | 5 +- drivers/gpu/arm/bifrost/mali_kbase_config.h | 30 +- .../arm/bifrost/mali_kbase_config_defaults.h | 63 +- .../gpu/arm/bifrost/mali_kbase_core_linux.c | 2651 +++++++----- .../arm/bifrost/mali_kbase_cs_experimental.h | 4 +- .../gpu/arm/bifrost/mali_kbase_ctx_sched.c | 55 +- .../gpu/arm/bifrost/mali_kbase_ctx_sched.h | 15 +- drivers/gpu/arm/bifrost/mali_kbase_debug.c | 8 +- drivers/gpu/arm/bifrost/mali_kbase_debug.h | 43 +- .../arm/bifrost/mali_kbase_debug_job_fault.c | 173 +- .../arm/bifrost/mali_kbase_debug_job_fault.h | 18 +- .../arm/bifrost/mali_kbase_debug_mem_allocs.c | 41 +- .../arm/bifrost/mali_kbase_debug_mem_view.c | 129 +- .../arm/bifrost/mali_kbase_debug_mem_zones.c | 35 +- .../arm/bifrost/mali_kbase_debugfs_helper.c | 44 +- .../arm/bifrost/mali_kbase_debugfs_helper.h | 29 +- drivers/gpu/arm/bifrost/mali_kbase_defs.h | 349 +- .../arm/bifrost/mali_kbase_disjoint_events.c | 5 +- .../gpu/arm/bifrost/mali_kbase_dummy_job_wa.c | 158 +- .../gpu/arm/bifrost/mali_kbase_dummy_job_wa.h | 13 +- .../gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c | 15 +- drivers/gpu/arm/bifrost/mali_kbase_event.c | 89 +- drivers/gpu/arm/bifrost/mali_kbase_fence.c | 14 +- drivers/gpu/arm/bifrost/mali_kbase_fence.h | 27 +- .../gpu/arm/bifrost/mali_kbase_fence_defs.h | 63 - .../gpu/arm/bifrost/mali_kbase_fence_ops.c | 96 +- drivers/gpu/arm/bifrost/mali_kbase_gator.h | 11 +- .../bifrost/mali_kbase_gpu_memory_debugfs.c | 20 +- .../bifrost/mali_kbase_gpu_memory_debugfs.h | 8 +- .../gpu/arm/bifrost/mali_kbase_gpu_metrics.c | 280 ++ .../gpu/arm/bifrost/mali_kbase_gpu_metrics.h | 167 + drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c | 819 ++-- drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h | 64 +- .../mali_kbase_gpuprops_private_types.h | 87 + .../arm/bifrost/mali_kbase_gpuprops_types.h | 215 +- drivers/gpu/arm/bifrost/mali_kbase_gwt.c | 80 +- drivers/gpu/arm/bifrost/mali_kbase_gwt.h | 5 +- drivers/gpu/arm/bifrost/mali_kbase_hw.c | 342 +- drivers/gpu/arm/bifrost/mali_kbase_hw.h | 17 +- .../bifrost/mali_kbase_hwaccess_gpuprops.h | 32 +- .../arm/bifrost/mali_kbase_hwaccess_instr.h | 10 +- .../gpu/arm/bifrost/mali_kbase_hwaccess_jm.h | 32 +- .../gpu/arm/bifrost/mali_kbase_hwaccess_pm.h | 33 +- .../arm/bifrost/mali_kbase_hwaccess_time.h | 56 +- drivers/gpu/arm/bifrost/mali_kbase_jd.c | 344 +- .../gpu/arm/bifrost/mali_kbase_jd_debugfs.c | 44 +- .../gpu/arm/bifrost/mali_kbase_jd_debugfs.h | 4 +- drivers/gpu/arm/bifrost/mali_kbase_jm.c | 18 +- drivers/gpu/arm/bifrost/mali_kbase_jm.h | 8 +- drivers/gpu/arm/bifrost/mali_kbase_js.c | 1207 +++--- drivers/gpu/arm/bifrost/mali_kbase_js.h | 4 +- .../gpu/arm/bifrost/mali_kbase_js_ctx_attr.c | 76 +- .../gpu/arm/bifrost/mali_kbase_js_ctx_attr.h | 23 +- .../gpu/arm/bifrost/mali_kbase_kinstr_jm.c | 114 +- .../gpu/arm/bifrost/mali_kbase_kinstr_jm.h | 54 +- .../arm/bifrost/mali_kbase_kinstr_prfcnt.c | 651 +-- .../arm/bifrost/mali_kbase_kinstr_prfcnt.h | 27 +- drivers/gpu/arm/bifrost/mali_kbase_linux.h | 6 +- drivers/gpu/arm/bifrost/mali_kbase_mem.c | 3815 ++++++----------- drivers/gpu/arm/bifrost/mali_kbase_mem.h | 1130 ++--- .../gpu/arm/bifrost/mali_kbase_mem_linux.c | 1205 +++--- .../gpu/arm/bifrost/mali_kbase_mem_linux.h | 72 +- .../gpu/arm/bifrost/mali_kbase_mem_lowlevel.h | 45 +- .../gpu/arm/bifrost/mali_kbase_mem_migrate.c | 97 +- .../gpu/arm/bifrost/mali_kbase_mem_migrate.h | 13 +- drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c | 260 +- .../arm/bifrost/mali_kbase_mem_pool_debugfs.c | 98 +- .../arm/bifrost/mali_kbase_mem_pool_debugfs.h | 14 +- .../arm/bifrost/mali_kbase_mem_pool_group.c | 24 +- .../arm/bifrost/mali_kbase_mem_pool_group.h | 10 +- .../bifrost/mali_kbase_mem_profile_debugfs.c | 27 +- .../bifrost/mali_kbase_mem_profile_debugfs.h | 8 +- .../gpu/arm/bifrost/mali_kbase_native_mgm.c | 31 +- drivers/gpu/arm/bifrost/mali_kbase_pbha.c | 189 +- drivers/gpu/arm/bifrost/mali_kbase_pbha.h | 7 +- .../gpu/arm/bifrost/mali_kbase_pbha_debugfs.c | 45 +- .../arm/bifrost/mali_kbase_platform_fake.c | 16 +- drivers/gpu/arm/bifrost/mali_kbase_pm.c | 158 +- drivers/gpu/arm/bifrost/mali_kbase_pm.h | 14 +- .../gpu/arm/bifrost/mali_kbase_reg_track.c | 1517 +++++++ .../gpu/arm/bifrost/mali_kbase_reg_track.h | 443 ++ .../bifrost/mali_kbase_regs_history_debugfs.c | 42 +- .../bifrost/mali_kbase_regs_history_debugfs.h | 4 +- .../gpu/arm/bifrost/mali_kbase_reset_gpu.h | 5 +- drivers/gpu/arm/bifrost/mali_kbase_smc.c | 23 +- drivers/gpu/arm/bifrost/mali_kbase_smc.h | 6 +- drivers/gpu/arm/bifrost/mali_kbase_softjobs.c | 383 +- drivers/gpu/arm/bifrost/mali_kbase_strings.h | 23 - drivers/gpu/arm/bifrost/mali_kbase_sync.h | 19 +- .../gpu/arm/bifrost/mali_kbase_sync_file.c | 76 +- .../arm/bifrost/mali_kbase_trace_gpu_mem.c | 36 +- .../arm/bifrost/mali_kbase_trace_gpu_mem.h | 25 +- drivers/gpu/arm/bifrost/mali_kbase_utility.h | 52 - drivers/gpu/arm/bifrost/mali_kbase_vinstr.c | 1132 ----- drivers/gpu/arm/bifrost/mali_kbase_vinstr.h | 90 - drivers/gpu/arm/bifrost/mali_linux_trace.h | 530 +-- drivers/gpu/arm/bifrost/mali_malisw.h | 22 +- .../bifrost/mali_power_gpu_frequency_trace.h | 26 +- ...s.c => mali_power_gpu_work_period_trace.c} | 16 +- .../mali_power_gpu_work_period_trace.h | 72 + .../bifrost/mmu/backend/mali_kbase_mmu_csf.c | 208 +- .../bifrost/mmu/backend/mali_kbase_mmu_jm.c | 161 +- drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c | 876 ++-- drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h | 114 +- .../gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h | 13 +- .../bifrost/mmu/mali_kbase_mmu_hw_direct.c | 156 +- .../arm/bifrost/mmu/mali_kbase_mmu_internal.h | 20 +- .../bifrost/mmu/mali_kbase_mmu_mode_aarch64.c | 58 +- drivers/gpu/arm/bifrost/platform/Kconfig | 4 +- .../devicetree/mali_kbase_clk_rate_trace.c | 34 +- .../devicetree/mali_kbase_config_devicetree.c | 5 +- .../devicetree/mali_kbase_runtime_pm.c | 19 +- .../platform/meson/mali_kbase_config_meson.c | 5 +- .../meson/mali_kbase_config_platform.h | 8 +- .../platform/meson/mali_kbase_runtime_pm.c | 22 +- .../vexpress/mali_kbase_config_vexpress.c | 20 +- .../mali_kbase_config_vexpress.c | 19 +- .../mali_kbase_config_vexpress.c | 19 +- .../gpu/arm/bifrost/protected_mode_switcher.h | 8 +- drivers/gpu/arm/bifrost/tests/Kbuild | 4 +- drivers/gpu/arm/bifrost/tests/Kconfig | 3 +- drivers/gpu/arm/bifrost/tests/build.bp | 10 + .../bifrost/tests/include/kutf/kutf_helpers.h | 7 +- .../tests/include/kutf/kutf_helpers_user.h | 27 +- .../arm/bifrost/tests/include/kutf/kutf_mem.h | 4 +- .../tests/include/kutf/kutf_resultset.h | 43 +- .../bifrost/tests/include/kutf/kutf_suite.h | 191 +- .../bifrost/tests/include/kutf/kutf_utils.h | 10 +- .../gpu/arm/bifrost/tests/kutf/kutf_helpers.c | 13 +- .../bifrost/tests/kutf/kutf_helpers_user.c | 132 +- drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c | 4 +- .../arm/bifrost/tests/kutf/kutf_resultset.c | 18 +- .../gpu/arm/bifrost/tests/kutf/kutf_suite.c | 311 +- .../gpu/arm/bifrost/tests/kutf/kutf_utils.c | 7 +- .../kernel/mali_kutf_clk_rate_trace_test.c | 209 +- .../mali_kutf_clk_rate_trace_test.h | 18 +- .../mali_kutf_irq_test_main.c | 51 +- .../mali_kutf_mgm_integration_test_main.c | 10 +- .../arm/bifrost/thirdparty/mali_kbase_mmap.c | 81 +- drivers/gpu/arm/bifrost/thirdparty/mm.h | 31 + .../tl/backend/mali_kbase_timeline_csf.c | 60 +- .../tl/backend/mali_kbase_timeline_jm.c | 30 +- .../gpu/arm/bifrost/tl/mali_kbase_timeline.c | 60 +- .../gpu/arm/bifrost/tl/mali_kbase_timeline.h | 8 +- .../arm/bifrost/tl/mali_kbase_timeline_io.c | 67 +- .../arm/bifrost/tl/mali_kbase_timeline_priv.h | 20 +- .../arm/bifrost/tl/mali_kbase_tl_serialize.h | 28 +- .../gpu/arm/bifrost/tl/mali_kbase_tlstream.c | 114 +- .../gpu/arm/bifrost/tl/mali_kbase_tlstream.h | 17 +- .../arm/bifrost/tl/mali_kbase_tracepoints.c | 10 +- .../arm/bifrost/tl/mali_kbase_tracepoints.h | 18 +- drivers/hwtracing/coresight/mali/Kconfig | 2 +- drivers/hwtracing/coresight/mali/Makefile | 77 +- drivers/hwtracing/coresight/mali/build.bp | 4 +- .../coresight/mali/coresight_mali_common.h | 74 +- .../ela/coresight_mali_source_ela_core.c | 668 +-- .../etm/coresight_mali_source_etm_core.c | 24 +- .../itm/coresight_mali_source_itm_core.c | 24 +- drivers/xen/arm/Kconfig | 27 + drivers/xen/arm/Makefile | 91 + drivers/xen/arm/Mconfig | 27 + include/linux/mali_arbiter_interface.h | 9 +- include/linux/memory_group_manager.h | 34 +- include/linux/priority_control_manager.h | 7 +- include/linux/protected_memory_allocator.h | 15 +- include/linux/protected_mode_switcher.h | 8 +- include/linux/version_compat_defs.h | 227 +- .../dma-buf-test-exporter.h | 78 + .../backend/gpu/mali_kbase_model_dummy.h | 41 +- .../arm/bifrost/csf/mali_base_csf_kernel.h | 23 +- .../arm/bifrost/csf/mali_kbase_csf_ioctl.h | 95 +- .../gpu/backend/mali_kbase_gpu_regmap_csf.h | 7 +- .../gpu/backend/mali_kbase_gpu_regmap_jm.h | 27 +- .../bifrost/gpu/mali_kbase_gpu_coherency.h | 6 +- .../gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h | 200 +- .../arm/bifrost/gpu/mali_kbase_gpu_regmap.h | 69 +- .../gpu/arm/bifrost/jm/mali_base_jm_kernel.h | 150 +- .../gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h | 23 +- .../gpu/arm/bifrost/mali_base_common_kernel.h | 8 +- .../uapi/gpu/arm/bifrost/mali_base_kernel.h | 91 +- .../uapi/gpu/arm/bifrost/mali_base_mem_priv.h | 6 +- include/uapi/gpu/arm/bifrost/mali_gpu_props.h | 111 + .../gpu/arm/bifrost/mali_kbase_hwcnt_reader.h | 57 +- .../uapi/gpu/arm/bifrost/mali_kbase_ioctl.h | 26 +- 384 files changed, 36507 insertions(+), 28475 deletions(-) create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.c create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.h create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.c create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.h create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.c rename drivers/gpu/arm/bifrost/{mali_kbase_bits.h => csf/mali_kbase_csf_kcpu_fence_debugfs.h} (50%) create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.h create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.h delete mode 100644 drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h delete mode 100644 drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h delete mode 100644 drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h create mode 100644 drivers/gpu/arm/bifrost/hw_access/Kbuild create mode 100644 drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c create mode 100644 drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c create mode 100644 drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c create mode 100644 drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h create mode 100644 drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h create mode 100644 drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h create mode 100644 drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf.c create mode 100644 drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_enums.h create mode 100644 drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h create mode 100644 drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c create mode 100644 drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h create mode 100644 drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h create mode 100644 drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_legacy_csf.h create mode 100644 drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_legacy_jm.h delete mode 100644 drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c delete mode 100644 drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h delete mode 100644 drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h create mode 100644 drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c create mode 100644 drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h create mode 100644 drivers/gpu/arm/bifrost/mali_kbase_gpuprops_private_types.h create mode 100644 drivers/gpu/arm/bifrost/mali_kbase_reg_track.c create mode 100644 drivers/gpu/arm/bifrost/mali_kbase_reg_track.h delete mode 100644 drivers/gpu/arm/bifrost/mali_kbase_strings.h delete mode 100644 drivers/gpu/arm/bifrost/mali_kbase_utility.h delete mode 100644 drivers/gpu/arm/bifrost/mali_kbase_vinstr.c delete mode 100644 drivers/gpu/arm/bifrost/mali_kbase_vinstr.h rename drivers/gpu/arm/bifrost/{mali_kbase_strings.c => mali_power_gpu_work_period_trace.c} (68%) create mode 100644 drivers/gpu/arm/bifrost/mali_power_gpu_work_period_trace.h create mode 100644 drivers/gpu/arm/bifrost/thirdparty/mm.h create mode 100644 drivers/xen/arm/Kconfig create mode 100644 drivers/xen/arm/Makefile create mode 100644 drivers/xen/arm/Mconfig create mode 100644 include/uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h create mode 100644 include/uapi/gpu/arm/bifrost/mali_gpu_props.h diff --git a/Documentation/ABI/testing/sysfs-device-mali b/Documentation/ABI/testing/sysfs-device-mali index 9a36528d93a9..1ec265c5add4 100644 --- a/Documentation/ABI/testing/sysfs-device-mali +++ b/Documentation/ABI/testing/sysfs-device-mali @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -68,6 +68,16 @@ Description: is supported or is powered down after suspending command stream groups. +What: /sys/class/misc/mali%u/device/idle_hysteresis_time_ns +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. This attribute is + used to configure the timeout value in nanoseconds for the + GPU idle handling. If GPU has been idle for this timeout + period, then it is put to sleep for GPUs where sleep feature + is supported or is powered down after suspending command + stream groups. + What: /sys/class/misc/mali%u/device/js_ctx_scheduling_mode Description: This attribute is available only with platform device that @@ -232,6 +242,23 @@ Description: If we set the value to zero then MCU-controlled shader/tiler power management will be disabled. +What: /sys/class/misc/mali%u/device/mcu_shader_pwroff_timeout_ns +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. The duration value unit + is in nanoseconds and is used for configuring MCU shader Core power-off + timer. The configured MCU shader Core power-off timer will only have + effect when the host driver has delegated the shader cores + power management to MCU. The supplied value will be + recorded internally without any change. But the actual field + value will be subject to core power-off timer source frequency + scaling and maximum value limiting. The default source will be + SYSTEM_TIMESTAMP counter. But in case the platform is not able + to supply it, the GPU CYCLE_COUNTER source will be used as an + alternative. + + If we set the value to zero then MCU-controlled shader/tiler + power management will be disabled. What: /sys/class/misc/mali%u/device/csg_scheduling_period Description: diff --git a/Documentation/ABI/testing/sysfs-device-mali-coresight-source b/Documentation/ABI/testing/sysfs-device-mali-coresight-source index a24a88a824e4..0f31a6acaa87 100644 --- a/Documentation/ABI/testing/sysfs-device-mali-coresight-source +++ b/Documentation/ABI/testing/sysfs-device-mali-coresight-source @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,6 +71,10 @@ Description: [11:10] = 1, Generate TS request approx every 128 cycles [22:16] = 1, Trace bus ID +What: /sys/bus/coresight/devices/mali-source-ela/reset_regs +Description: + Attribute used to reset registers to zero. + What: /sys/bus/coresight/devices/mali-source-ela/enable_source Description: Attribute used to enable Coresight Source ELA. @@ -79,35 +83,121 @@ What: /sys/bus/coresight/devices/mali-source-ela/is_enabled Description: Attribute used to check if Coresight Source ELA is enabled. -What: /sys/bus/coresight/devices/mali-source-ela/select +What: /sys/bus/coresight/devices/mali-source-ela/regs/TIMECTRL Description: - Coresight Source ELA select trace mode: - [0], NONE - [1], JCN - [2], CEU_EXEC - [3], CEU_CMDS - [4], MCU_AHBP - [5], HOST_AXI - [6], NR_TRACEMODE - + Coresight Source ELA TIMECTRL register set/get. Refer to specification for more details. -What: /sys/bus/coresight/devices/mali-source-ela/sigmask0 +What: /sys/bus/coresight/devices/mali-source-ela/regs/TSSR Description: - Coresight Source ELA SIGMASK0 register set/get. + Coresight Source ELA TSR register set/get. Refer to specification for more details. -What: /sys/bus/coresight/devices/mali-source-ela/sigmask4 +What: /sys/bus/coresight/devices/mali-source-ela/regs/ATBCTRL Description: - Coresight Source ELA SIGMASK4 register set/get. + Coresight Source ELA ATBCTRL register set/get. Refer to specification for more details. -What: /sys/bus/coresight/devices/mali-source-ela/sigcomp0 +What: /sys/bus/coresight/devices/mali-source-ela/regs/PTACTION Description: - Coresight Source ELA SIGCOMP0 register set/get. + Coresight Source ELA PTACTION register set/get. Refer to specification for more details. -What: /sys/bus/coresight/devices/mali-source-ela/sigcomp4 +What: /sys/bus/coresight/devices/mali-source-ela/regs/AUXCTRL Description: - Coresight Source ELA SIGCOMP4 register set/get. + Coresight Source ELA AUXCTRL register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/CNTSEL +Description: + Coresight Source ELA CNTSEL register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/SIGSELn +Description: + Coresight Source ELA SIGSELn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/TRIGCTRLn +Description: + Coresight Source ELA TRIGCTRLn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/NEXTSTATEn +Description: + Coresight Source ELA NEXTSTATEn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/ACTIONn +Description: + Coresight Source ELA ACTIONn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/ALTNEXTSTATEn +Description: + Coresight Source ELA ALTNEXTSTATEn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/ALTACTIONn +Description: + Coresight Source ELA ALTACTIONn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/COMPCTRLn +Description: + Coresight Source ELA COMPCTRLn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/ALTCOMPCTRLn +Description: + Coresight Source ELA ALTCOMPCTRLn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/COUNTCOMPn +Description: + Coresight Source ELA COUNTCOMPn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/TWBSELn +Description: + Coresight Source ELA TWBSELn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/EXTMASKn +Description: + Coresight Source ELA EXTMASKn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/EXTCOMPn +Description: + Coresight Source ELA EXTCOMPn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/QUALMASKn +Description: + Coresight Source ELA QUALMASKn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/QUALCOMPn + Coresight Source ELA QUALCOMPn register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/SIGMASKn_0-7 +Description: + Coresight Source ELA SIGMASKn_0-7 register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/SIGCOMPn_0-7 +Description: + Coresight Source ELA SIGCOMPn_0-7 register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/SIGSELn_0-7 +Description: + Coresight Source ELA SIGSELn_0-7 register set/get. + Refer to specification for more details. + +What: /sys/bus/coresight/devices/mali-source-ela/regs/SIGMASKn_0-7 +Description: + Coresight Source ELA SIGMASKn_0-7 register set/get. Refer to specification for more details. diff --git a/Documentation/csf_sync_state_dump.txt b/Documentation/csf_sync_state_dump.txt index dc1e48774377..d1f2ec68825b 100644 --- a/Documentation/csf_sync_state_dump.txt +++ b/Documentation/csf_sync_state_dump.txt @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -109,3 +109,65 @@ surpass the arg value. =============================================================================================================================== queue:KCPU-0-1 exec:S cmd:CQS_WAIT_OPERATION obj:0x0000007fbf6f2ff8 live_value:0x0000000000000000 | op:gt arg_value: 0x00000000 =============================================================================================================================== + +CSF Sync State Dump For Fence Signal Timeouts +--------------------------------------------- + +Summary +------- +A timer has been added to the KCPU queues which is checked to ensure +the queues have not "timed out" between the enqueuing of a fence signal command +and it's eventual execution. If this timeout happens then the CSF sync state +of all KCPU queues of the offending context is dumped. This feature is enabled +by default, but can be disabled/enabled later. + +Explanation +------------ +This new timer is created and destroyed alongside the creation and destruction +of each KCPU queue. It is started when a fence signal is enqueued, and cancelled +when the fence signal command has been processed. The timer times out after +10 seconds (at 100 MHz) if the execution of that fence signal event was never +processed. If this timeout occurs then the timer callback function identifies +the KCPU queue which the timer belongs to and invokes the CSF synchronisation +state dump mechanism, writing the sync state for the context of the queue +causing the timeout is dump to dmesg. + +Fence Timeouts Controls +----------------------- +Disable/Enable Feature +---------------------- +This feature is enabled by default, but can be disabled/ re-enabled via DebugFS +controls. The 'fence_signal_timeout_enable' debugfs entry is a global flag +which is written to, to turn this feature on and off. + +Example: +-------- +when writing to fence_signal_timeout_enable entry: +echo 1 > /sys/kernel/debug/mali0/fence_signal_timeout_enable -> feature is enabled. +echo 0 > /sys/kernel/debug/mali0/fence_signal_timeout_enable -> feature is disabled. + +It is also possible to read from this file to check if the feature is currently +enabled or not checking the return value of fence_signal_timeout_enable. + +Example: +-------- +when reading from fence_signal_timeout_enable entry, if: +cat /sys/kernel/debug/mali0/fence_signal_timeout_enable returns 1 -> feature is enabled. +cat /sys/kernel/debug/mali0/fence_signal_timeout_enable returns 0 -> feature is disabled. + +Update Timer Duration +--------------------- +The timeout duration can be accessed through the 'fence_signal_timeout_ms' +debugfs entry. This can be read from to retrieve the current time in +milliseconds. + +Example: +-------- +cat /sys/kernel/debug/mali0/fence_signal_timeout_ms + +The 'fence_signal_timeout_ms' debugfs entry can also be written to, to update +the time in milliseconds. + +Example: +-------- +echo 10000 > /sys/kernel/debug/mali0/fence_signal_timeout_ms diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt index caf2de5e47be..85672c6c6258 100644 --- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt +++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -44,22 +44,22 @@ Documentation/devicetree/bindings/regulator/regulator.txt for details. - mem-supply : Phandle to memory regulator for the Mali device. This is optional. - operating-points-v2 : Refer to Documentation/devicetree/bindings/power/mali-opp.txt for details. -- quirks_gpu : Used to write to the JM_CONFIG or CSF_CONFIG register. +- quirks-gpu : Used to write to the JM_CONFIG or CSF_CONFIG register. Should be used with care. Options passed here are used to override certain default behavior. Note: This will override 'idvs-group-size' field in devicetree and module param 'corestack_driver_control', - therefore if 'quirks_gpu' is used then 'idvs-group-size' and - 'corestack_driver_control' value should be incorporated into 'quirks_gpu'. -- quirks_sc : Used to write to the SHADER_CONFIG register. + therefore if 'quirks-gpu' is used then 'idvs-group-size' and + 'corestack_driver_control' value should be incorporated into 'quirks-gpu'. +- quirks-sc : Used to write to the SHADER_CONFIG register. Should be used with care. Options passed here are used to override certain default behavior. -- quirks_tiler : Used to write to the TILER_CONFIG register. +- quirks-tiler : Used to write to the TILER_CONFIG register. Should be used with care. Options passed here are used to disable or override certain default behavior. -- quirks_mmu : Used to write to the L2_CONFIG register. +- quirks-mmu : Used to write to the L2_CONFIG register. Should be used with care. Options passed here are used to disable or override certain default behavior. -- power_model : Sets the power model parameters. Defined power models include: +- power-model : Sets the power model parameters. Defined power models include: "mali-simple-power-model", "mali-g51-power-model", "mali-g52-power-model", "mali-g52_r1-power-model", "mali-g71-power-model", "mali-g72-power-model", "mali-g76-power-model", "mali-g77-power-model", "mali-tnax-power-model", @@ -96,7 +96,7 @@ for details. are used at different points so care should be taken to configure both power models in the device tree (specifically dynamic-coefficient, static-coefficient and scale) to best match the platform. -- power_policy : Sets the GPU power policy at probe time. Available options are +- power-policy : Sets the GPU power policy at probe time. Available options are "coarse_demand" and "always_on". If not set, then "coarse_demand" is used. - system-coherency : Sets the coherency protocol to be used for coherent accesses made from the GPU. @@ -116,17 +116,19 @@ for details. - l2-hash-values : Override L2 hash function using provided hash values, on GPUs that supports it. It is mutually exclusive with 'l2-hash'. Only one or the other must be used in a supported GPU. -- arbiter_if : Phandle to the arbif platform device, used to provide KBASE with an interface +- arbiter-if : Phandle to the arbif platform device, used to provide KBASE with an interface to the Arbiter. This is required when using arbitration; setting to a non-NULL value will enable arbitration. If arbitration is in use, then there should be no external GPU control. - When arbiter_if is in use then the following must not be: - - power_model (no IPA allowed with arbitration) + When arbiter-if is in use then the following must not be: + - power-model (no IPA allowed with arbitration) - #cooling-cells - operating-points-v2 (no dvfs in kbase with arbitration) - system-coherency with a value of 1 (no full coherency with arbitration) -- int_id_override: list of tuples defining the IDs needed to be +- int-id-override: list of tuples defining the IDs needed to be set and the setting coresponding to the SYSC_ALLOC register. +- propagate-bits: Used to write to L2_CONFIG.PBHA_HWU. This bitset establishes which + PBHA bits are propagated on the AXI bus. Example for a Mali GPU with 1 clock and 1 regulator: @@ -234,8 +236,8 @@ Example for a Mali GPU supporting PBHA configuration via DTB (default): gpu@0xfc010000 { ... pbha { - int_id_override = <2 0x32>, <9 0x05>, <16 0x32>; - propagate_bits = <0x03>; + int-id-override = <2 0x32>, <9 0x05>, <16 0x32>; + propagate-bits = /bits/ 4 <0x03>; }; ... }; diff --git a/Documentation/devicetree/bindings/arm/mali-coresight-source.txt b/Documentation/devicetree/bindings/arm/mali-coresight-source.txt index 87a1ce3b3e85..1018eafbccea 100644 --- a/Documentation/devicetree/bindings/arm/mali-coresight-source.txt +++ b/Documentation/devicetree/bindings/arm/mali-coresight-source.txt @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -87,27 +87,6 @@ Required properties - compatible: Has to be "arm,coresight-mali-source-ela" - gpu : phandle to a Mali GPU definition -- signal-groups: Signal groups indexed from 0 to 5. - Used to configure the signal channels. - - sgN: Types of signals attached to one channel. - It can be more than one type in the case of - JCN request/response. - - Types: - - "jcn-request": Can share the channel with "jcn-response" - - "jcn-response": Can share the channel with "jcn-request" - - "ceu-execution": Cannot share the channel with other types - - "ceu-commands": Cannot share the channel with other types - - "mcu-ahbp": Cannot share the channel with other types - - "host-axi": Cannot share the channel with other types - - - If the HW implementation shares a common channel - for JCN response and request (total of 4 channels), - Refer to: - - "Example: Shared JCN request/response channel" - Otherwise (total of 5 channels), refer to: - - "Example: Split JCN request/response channel" - port: - endpoint: - remote-endpoint: phandle to a Coresight sink port @@ -116,19 +95,12 @@ Example: Split JCN request/response channel -------------------------------------------- This examples applies to implementations with a total of 5 signal groups, -where JCN request and response are assigned to independent channels. +where JCN request and response are assigned to independent or shared +channels depending on the GPU model. mali-source-ela { compatible = "arm,coresight-mali-source-ela"; gpu = <&gpu>; - signal-groups { - sg0 = "jcn-request"; - sg1 = "jcn-response"; - sg2 = "ceu-execution"; - sg3 = "ceu-commands"; - sg4 = "mcu-ahbp"; - sg5 = "host-axi"; - }; port { mali_source_ela_out_port0: endpoint { remote-endpoint = <&mali_sink_in_port2>; @@ -136,25 +108,9 @@ mali-source-ela { }; }; -Example: Shared JCN request/response channel +SysFS Configuration -------------------------------------------- -This examples applies to implementations with a total of 4 signal groups, -where JCN request and response are assigned to the same channel. - -mali-source-ela { - compatible = "arm,coresight-mali-source-ela"; - gpu = <&gpu>; - signal-groups { - sg0 = "jcn-request", "jcn-response"; - sg1 = "ceu-execution"; - sg2 = "ceu-commands"; - sg3 = "mcu-ahbp"; - sg4 = "host-axi"; - }; - port { - mali_source_ela_out_port0: endpoint { - remote-endpoint = <&mali_sink_in_port1>; - }; - }; -}; +The register values used by CoreSight for ELA can be configured using SysFS +interfaces. This implicitly includes configuring the ELA for independent or +shared JCN request and response channels. diff --git a/drivers/base/arm/Kbuild b/drivers/base/arm/Kbuild index e5ded4cf7395..04e153b5bb48 100644 --- a/drivers/base/arm/Kbuild +++ b/drivers/base/arm/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,6 +21,7 @@ # # ccflags # +src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) ccflags-y += -I$(src)/../../../include subdir-ccflags-y += $(ccflags-y) diff --git a/drivers/base/arm/Kconfig b/drivers/base/arm/Kconfig index 7f9f1d4c418a..e8bb8a40d2c5 100644 --- a/drivers/base/arm/Kconfig +++ b/drivers/base/arm/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software diff --git a/drivers/base/arm/Makefile b/drivers/base/arm/Makefile index 3e9544b591c2..4aa68f89d3d9 100644 --- a/drivers/base/arm/Makefile +++ b/drivers/base/arm/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -23,49 +23,53 @@ # KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build KDIR ?= $(KERNEL_SRC) +M ?= $(shell pwd) ifeq ($(KDIR),) $(error Must specify KDIR to point to the kernel to target)) endif -vars := -# -# Default configuration values -# -CONFIG_MALI_BASE_MODULES ?= n +CONFIGS := -ifeq ($(CONFIG_MALI_BASE_MODULES),y) - CONFIG_MALI_CSF_SUPPORT ?= n +ifeq ($(MALI_KCONFIG_EXT_PREFIX),) + # + # Default configuration values + # + CONFIG_MALI_BASE_MODULES ?= n + + ifeq ($(CONFIG_MALI_BASE_MODULES),y) + CONFIG_MALI_CSF_SUPPORT ?= n + + ifneq ($(CONFIG_DMA_SHARED_BUFFER),n) + CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER ?= y + else + # Prevent misuse when CONFIG_DMA_SHARED_BUFFER=n + CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n + endif + + CONFIG_MALI_MEMORY_GROUP_MANAGER ?= y + + ifneq ($(CONFIG_MALI_CSF_SUPPORT), n) + CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR ?= y + endif - ifneq ($(CONFIG_DMA_SHARED_BUFFER),n) - CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER ?= y else - # Prevent misuse when CONFIG_DMA_SHARED_BUFFER=n + # Prevent misuse when CONFIG_MALI_BASE_MODULES=n CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n + CONFIG_MALI_MEMORY_GROUP_MANAGER = n + CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR = n + endif - CONFIG_MALI_MEMORY_GROUP_MANAGER ?= y - - ifneq ($(CONFIG_MALI_CSF_SUPPORT), n) - CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR ?= y - endif - -else - # Prevent misuse when CONFIG_MALI_BASE_MODULES=n - CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n - CONFIG_MALI_MEMORY_GROUP_MANAGER = n - CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR = n + CONFIGS += \ + CONFIG_MALI_BASE_MODULES \ + CONFIG_MALI_CSF_SUPPORT \ + CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER \ + CONFIG_MALI_MEMORY_GROUP_MANAGER \ + CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR \ endif -CONFIGS := \ - CONFIG_MALI_BASE_MODULES \ - CONFIG_MALI_CSF_SUPPORT \ - CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER \ - CONFIG_MALI_MEMORY_GROUP_MANAGER \ - CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR \ - - # # MAKE_ARGS to pass the custom CONFIGs on out-of-tree build # @@ -88,65 +92,65 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ $(if $(filter y m,$(value $(value config))), \ -D$(value config)=1)) -KBUILD_CFLAGS += -Wall -Werror +CFLAGS_MODULE += -Wall -Werror ifeq ($(CONFIG_GCOV_KERNEL), y) - KBUILD_CFLAGS += $(call cc-option, -ftest-coverage) - KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs) + CFLAGS_MODULE += $(call cc-option, -ftest-coverage) + CFLAGS_MODULE += $(call cc-option, -fprofile-arcs) EXTRA_CFLAGS += -DGCOV_PROFILE=1 endif ifeq ($(CONFIG_MALI_KCOV),y) - KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp) + CFLAGS_MODULE += $(call cc-option, -fsanitize-coverage=trace-cmp) EXTRA_CFLAGS += -DKCOV=1 EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1 endif # The following were added to align with W=1 in scripts/Makefile.extrawarn # from the Linux source tree (v5.18.14) -KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter -KBUILD_CFLAGS += -Wmissing-declarations -KBUILD_CFLAGS += -Wmissing-format-attribute -KBUILD_CFLAGS += -Wmissing-prototypes -KBUILD_CFLAGS += -Wold-style-definition +CFLAGS_MODULE += -Wextra -Wunused -Wno-unused-parameter +CFLAGS_MODULE += -Wmissing-declarations +CFLAGS_MODULE += -Wmissing-format-attribute +CFLAGS_MODULE += -Wmissing-prototypes +CFLAGS_MODULE += -Wold-style-definition # The -Wmissing-include-dirs cannot be enabled as the path to some of the # included directories change depending on whether it is an in-tree or # out-of-tree build. -KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) -KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) -KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) -KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) +CFLAGS_MODULE += $(call cc-option, -Wunused-but-set-variable) +CFLAGS_MODULE += $(call cc-option, -Wunused-const-variable) +CFLAGS_MODULE += $(call cc-option, -Wpacked-not-aligned) +CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra -KBUILD_CFLAGS += -Wno-sign-compare -KBUILD_CFLAGS += -Wno-shift-negative-value +CFLAGS_MODULE += -Wno-sign-compare +CFLAGS_MODULE += -Wno-shift-negative-value # This flag is needed to avoid build errors on older kernels -KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type) +CFLAGS_MODULE += $(call cc-option, -Wno-cast-function-type) KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 # The following were added to align with W=2 in scripts/Makefile.extrawarn # from the Linux source tree (v5.18.14) -KBUILD_CFLAGS += -Wdisabled-optimization +CFLAGS_MODULE += -Wdisabled-optimization # The -Wshadow flag cannot be enabled unless upstream kernels are # patched to fix redefinitions of certain built-in functions and # global variables. -KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) -KBUILD_CFLAGS += -Wmissing-field-initializers +CFLAGS_MODULE += $(call cc-option, -Wlogical-op) +CFLAGS_MODULE += -Wmissing-field-initializers # -Wtype-limits must be disabled due to build failures on kernel 5.x -KBUILD_CFLAGS += -Wno-type-limit -KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) -KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) +CFLAGS_MODULE += -Wno-type-limits +CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized) +CFLAGS_MODULE += $(call cc-option, -Wunused-macros) KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 # This warning is disabled to avoid build failures in some kernel versions -KBUILD_CFLAGS += -Wno-ignored-qualifiers +CFLAGS_MODULE += -Wno-ignored-qualifiers all: - $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules modules_install: - $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) modules_install clean: - $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) clean diff --git a/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c b/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c index 5f033a60026c..deef790dc73b 100644 --- a/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c +++ b/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,6 +31,10 @@ #include #include #include +#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE +#include +#endif +#include #define DMA_BUF_TE_VER_MAJOR 1 #define DMA_BUF_TE_VER_MINOR 0 @@ -47,6 +51,10 @@ #define NO_SG_CHAIN #endif +#ifndef CSTD_UNUSED +#define CSTD_UNUSED(x) ((void)(x)) +#endif + struct dma_buf_te_alloc { /* the real alloc */ size_t nr_pages; @@ -65,6 +73,9 @@ struct dma_buf_te_alloc { bool contiguous; dma_addr_t contig_dma_addr; void *contig_cpu_addr; + + /* @lock: Used internally to serialize list manipulation, attach/detach etc. */ + struct mutex lock; }; struct dma_buf_te_attachment { @@ -75,12 +86,13 @@ struct dma_buf_te_attachment { static struct miscdevice te_device; #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) -static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment) +static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, + struct dma_buf_attachment *attachment) #else static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *attachment) #endif { - struct dma_buf_te_alloc *alloc; + struct dma_buf_te_alloc *alloc; alloc = buf->priv; @@ -91,8 +103,9 @@ static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *att if (!attachment->priv) return -ENOMEM; - /* dma_buf is externally locked during call */ + mutex_lock(&alloc->lock); alloc->nr_attached_devices++; + mutex_unlock(&alloc->lock); return 0; } @@ -107,20 +120,23 @@ static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *at struct dma_buf_te_alloc *alloc = buf->priv; struct dma_buf_te_attachment *pa = attachment->priv; - /* dma_buf is externally locked during call */ + mutex_lock(&alloc->lock); - WARN(pa->attachment_mapped, "WARNING: dma-buf-test-exporter detected detach with open device mappings"); + WARN(pa->attachment_mapped, + "WARNING: dma-buf-test-exporter detected detach with open device mappings"); alloc->nr_attached_devices--; + mutex_unlock(&alloc->lock); kfree(pa); } -static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction) +static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, + enum dma_data_direction direction) { struct sg_table *sg; struct scatterlist *iter; - struct dma_buf_te_alloc *alloc; + struct dma_buf_te_alloc *alloc; struct dma_buf_te_attachment *pa = attachment->priv; size_t i; int ret; @@ -130,8 +146,7 @@ static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, en if (alloc->fail_map) return ERR_PTR(-ENOMEM); - if (WARN(pa->attachment_mapped, - "WARNING: Attempted to map already mapped attachment.")) + if (WARN(pa->attachment_mapped, "WARNING: Attempted to map already mapped attachment.")) return ERR_PTR(-EBUSY); #ifdef NO_SG_CHAIN @@ -145,21 +160,22 @@ static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, en return ERR_PTR(-ENOMEM); /* from here we access the allocation object, so lock the dmabuf pointing to it */ - mutex_lock(&attachment->dmabuf->lock); + mutex_lock(&alloc->lock); if (alloc->contiguous) ret = sg_alloc_table(sg, 1, GFP_KERNEL); else ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL); if (ret) { - mutex_unlock(&attachment->dmabuf->lock); + mutex_unlock(&alloc->lock); kfree(sg); return ERR_PTR(ret); } if (alloc->contiguous) { sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE; - sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0); + sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), + alloc->nr_pages * PAGE_SIZE, 0); sg_dma_address(sg->sgl) = alloc->contig_dma_addr; } else { for_each_sg(sg->sgl, iter, alloc->nr_pages, i) @@ -167,7 +183,7 @@ static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, en } if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) { - mutex_unlock(&attachment->dmabuf->lock); + mutex_unlock(&alloc->lock); sg_free_table(sg); kfree(sg); return ERR_PTR(-ENOMEM); @@ -176,26 +192,26 @@ static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, en alloc->nr_device_mappings++; pa->attachment_mapped = true; pa->sg = sg; - mutex_unlock(&attachment->dmabuf->lock); + mutex_unlock(&alloc->lock); return sg; } -static void dma_buf_te_unmap(struct dma_buf_attachment *attachment, - struct sg_table *sg, enum dma_data_direction direction) +static void dma_buf_te_unmap(struct dma_buf_attachment *attachment, struct sg_table *sg, + enum dma_data_direction direction) { struct dma_buf_te_alloc *alloc; struct dma_buf_te_attachment *pa = attachment->priv; alloc = attachment->dmabuf->priv; - mutex_lock(&attachment->dmabuf->lock); + mutex_lock(&alloc->lock); WARN(!pa->attachment_mapped, "WARNING: Unmatched unmap of attachment."); alloc->nr_device_mappings--; pa->attachment_mapped = false; pa->sg = NULL; - mutex_unlock(&attachment->dmabuf->lock); + mutex_unlock(&alloc->lock); dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction); sg_free_table(sg); @@ -209,13 +225,12 @@ static void dma_buf_te_release(struct dma_buf *buf) alloc = buf->priv; /* no need for locking */ + mutex_destroy(&alloc->lock); if (alloc->contiguous) { - dma_free_attrs(te_device.this_device, - alloc->nr_pages * PAGE_SIZE, - alloc->contig_cpu_addr, - alloc->contig_dma_addr, - DMA_ATTR_WRITE_COMBINE); + dma_free_attrs(te_device.this_device, alloc->nr_pages * PAGE_SIZE, + alloc->contig_cpu_addr, alloc->contig_dma_addr, + DMA_ATTR_WRITE_COMBINE); } else { for (i = 0; i < alloc->nr_pages; i++) __free_page(alloc->pages[i]); @@ -228,46 +243,62 @@ static void dma_buf_te_release(struct dma_buf *buf) kfree(alloc); } -static int dma_buf_te_sync(struct dma_buf *dmabuf, - enum dma_data_direction direction, - bool start_cpu_access) +static int dma_buf_te_sync(struct dma_buf *dmabuf, enum dma_data_direction direction, + bool start_cpu_access) { struct dma_buf_attachment *attachment; + struct dma_buf_te_alloc *alloc = dmabuf->priv; + /* Use the kernel lock to prevent the concurrent update of dmabuf->attachments */ +#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE + dma_resv_lock(dmabuf->resv, NULL); +#else mutex_lock(&dmabuf->lock); +#endif + + /* Use the internal lock to block the concurrent attach/detach calls */ + mutex_lock(&alloc->lock); list_for_each_entry(attachment, &dmabuf->attachments, node) { struct dma_buf_te_attachment *pa = attachment->priv; struct sg_table *sg = pa->sg; if (!sg) { - dev_dbg(te_device.this_device, "no mapping for device %s\n", dev_name(attachment->dev)); + dev_dbg(te_device.this_device, "no mapping for device %s\n", + dev_name(attachment->dev)); continue; } if (start_cpu_access) { - dev_dbg(te_device.this_device, "sync cpu with device %s\n", dev_name(attachment->dev)); + dev_dbg(te_device.this_device, "sync cpu with device %s\n", + dev_name(attachment->dev)); dma_sync_sg_for_cpu(attachment->dev, sg->sgl, sg->nents, direction); } else { - dev_dbg(te_device.this_device, "sync device %s with cpu\n", dev_name(attachment->dev)); + dev_dbg(te_device.this_device, "sync device %s with cpu\n", + dev_name(attachment->dev)); dma_sync_sg_for_device(attachment->dev, sg->sgl, sg->nents, direction); } } + mutex_unlock(&alloc->lock); + +#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE + dma_resv_unlock(dmabuf->resv); +#else mutex_unlock(&dmabuf->lock); +#endif + return 0; } -static int dma_buf_te_begin_cpu_access(struct dma_buf *dmabuf, - enum dma_data_direction direction) +static int dma_buf_te_begin_cpu_access(struct dma_buf *dmabuf, enum dma_data_direction direction) { return dma_buf_te_sync(dmabuf, direction, true); } -static int dma_buf_te_end_cpu_access(struct dma_buf *dmabuf, - enum dma_data_direction direction) +static int dma_buf_te_end_cpu_access(struct dma_buf *dmabuf, enum dma_data_direction direction) { return dma_buf_te_sync(dmabuf, direction, false); } @@ -280,9 +311,9 @@ static void dma_buf_te_mmap_open(struct vm_area_struct *vma) dma_buf = vma->vm_private_data; alloc = dma_buf->priv; - mutex_lock(&dma_buf->lock); + mutex_lock(&alloc->lock); alloc->nr_cpu_mappings++; - mutex_unlock(&dma_buf->lock); + mutex_unlock(&alloc->lock); } static void dma_buf_te_mmap_close(struct vm_area_struct *vma) @@ -293,10 +324,10 @@ static void dma_buf_te_mmap_close(struct vm_area_struct *vma) dma_buf = vma->vm_private_data; alloc = dma_buf->priv; + mutex_lock(&alloc->lock); BUG_ON(alloc->nr_cpu_mappings <= 0); - mutex_lock(&dma_buf->lock); alloc->nr_cpu_mappings--; - mutex_unlock(&dma_buf->lock); + mutex_unlock(&alloc->lock); } #if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE @@ -331,11 +362,9 @@ static vm_fault_t dma_buf_te_mmap_fault(struct vm_fault *vmf) return 0; } -static const struct vm_operations_struct dma_buf_te_vm_ops = { - .open = dma_buf_te_mmap_open, - .close = dma_buf_te_mmap_close, - .fault = dma_buf_te_mmap_fault -}; +static const struct vm_operations_struct dma_buf_te_vm_ops = { .open = dma_buf_te_mmap_open, + .close = dma_buf_te_mmap_close, + .fault = dma_buf_te_mmap_fault }; static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) { @@ -346,7 +375,7 @@ static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) if (alloc->fail_mmap) return -ENOMEM; - vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; + vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_ops = &dma_buf_te_vm_ops; vma->vm_private_data = dmabuf; @@ -374,10 +403,9 @@ static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num) if (page_num >= alloc->nr_pages) return NULL; - return kmap(alloc->pages[page_num]); + return kbase_kmap(alloc->pages[page_num]); } -static void dma_buf_te_kunmap(struct dma_buf *buf, - unsigned long page_num, void *addr) +static void dma_buf_te_kunmap(struct dma_buf *buf, unsigned long page_num, void *addr) { struct dma_buf_te_alloc *alloc; @@ -385,7 +413,7 @@ static void dma_buf_te_kunmap(struct dma_buf *buf, if (page_num >= alloc->nr_pages) return; - kunmap(alloc->pages[page_num]); + kbase_kunmap(alloc->pages[page_num], addr); } static struct dma_buf_ops dma_buf_te_ops = { @@ -467,8 +495,9 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, #endif /* NO_SG_CHAIN */ if (alloc_req.size > max_nr_pages) { - dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %zu pages", - __func__, alloc_req.size, max_nr_pages); + dev_err(te_device.this_device, + "%s: buffer size of %llu pages exceeded the mapping limit of %zu pages", + __func__, alloc_req.size, max_nr_pages); goto invalid_size; } @@ -488,23 +517,21 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, #endif if (!alloc->pages) { - dev_err(te_device.this_device, - "%s: couldn't alloc %zu page structures", - __func__, alloc->nr_pages); + dev_err(te_device.this_device, "%s: couldn't alloc %zu page structures", __func__, + alloc->nr_pages); goto free_alloc_object; } if (contiguous) { dma_addr_t dma_aux; - alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device, - alloc->nr_pages * PAGE_SIZE, - &alloc->contig_dma_addr, - GFP_KERNEL | __GFP_ZERO, - DMA_ATTR_WRITE_COMBINE); + alloc->contig_cpu_addr = dma_alloc_attrs( + te_device.this_device, alloc->nr_pages * PAGE_SIZE, &alloc->contig_dma_addr, + GFP_KERNEL | __GFP_ZERO, DMA_ATTR_WRITE_COMBINE); if (!alloc->contig_cpu_addr) { - dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %zu pages", - __func__, alloc->nr_pages); + dev_err(te_device.this_device, + "%s: couldn't alloc contiguous buffer %zu pages", __func__, + alloc->nr_pages); goto free_page_struct; } dma_aux = alloc->contig_dma_addr; @@ -522,6 +549,8 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, } } + mutex_init(&alloc->lock); + /* alloc ready, let's export it */ { struct dma_buf_export_info export_info = { @@ -555,13 +584,12 @@ no_fd: dma_buf_put(dma_buf); no_export: /* i still valid */ + mutex_destroy(&alloc->lock); no_page: if (contiguous) { - dma_free_attrs(te_device.this_device, - alloc->nr_pages * PAGE_SIZE, - alloc->contig_cpu_addr, - alloc->contig_dma_addr, - DMA_ATTR_WRITE_COMBINE); + dma_free_attrs(te_device.this_device, alloc->nr_pages * PAGE_SIZE, + alloc->contig_cpu_addr, alloc->contig_dma_addr, + DMA_ATTR_WRITE_COMBINE); } else { while (i-- > 0) __free_page(alloc->pages[i]); @@ -602,11 +630,11 @@ static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg alloc = dmabuf->priv; /* lock while reading status to take a snapshot */ - mutex_lock(&dmabuf->lock); + mutex_lock(&alloc->lock); status.attached_devices = alloc->nr_attached_devices; status.device_mappings = alloc->nr_device_mappings; status.cpu_mappings = alloc->nr_cpu_mappings; - mutex_unlock(&dmabuf->lock); + mutex_unlock(&alloc->lock); if (copy_to_user(arg, &status, sizeof(status))) goto err_have_dmabuf; @@ -640,11 +668,11 @@ static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing _ /* ours, set the fail modes */ alloc = dmabuf->priv; /* lock to set the fail modes atomically */ - mutex_lock(&dmabuf->lock); + mutex_lock(&alloc->lock); alloc->fail_attach = f.fail_attach; - alloc->fail_map = f.fail_map; - alloc->fail_mmap = f.fail_mmap; - mutex_unlock(&dmabuf->lock); + alloc->fail_map = f.fail_map; + alloc->fail_mmap = f.fail_mmap; + mutex_unlock(&alloc->lock); /* success */ res = 0; @@ -709,7 +737,6 @@ no_import: static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg) { - struct dma_buf *dmabuf; struct dma_buf_te_ioctl_fill f; int ret; @@ -729,17 +756,21 @@ static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg) static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + CSTD_UNUSED(file); + switch (cmd) { case DMA_BUF_TE_VERSION: return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg); case DMA_BUF_TE_ALLOC: - return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false); + return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, + false); case DMA_BUF_TE_ALLOC_CONT: return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true); case DMA_BUF_TE_QUERY: return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg); case DMA_BUF_TE_SET_FAILING: - return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg); + return do_dma_buf_te_ioctl_set_failing( + (struct dma_buf_te_ioctl_set_failing __user *)arg); case DMA_BUF_TE_FILL: return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg); default: @@ -770,7 +801,6 @@ static int __init dma_buf_te_init(void) dev_info(te_device.this_device, "dma_buf_te ready\n"); return 0; - } static void __exit dma_buf_te_exit(void) diff --git a/drivers/base/arm/memory_group_manager/memory_group_manager.c b/drivers/base/arm/memory_group_manager/memory_group_manager.c index 2acb9faf12d0..11dc1c2e24d8 100644 --- a/drivers/base/arm/memory_group_manager/memory_group_manager.c +++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,15 +32,13 @@ #include #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)) -#undef DEFINE_SIMPLE_ATTRIBUTE -#define DEFINE_SIMPLE_ATTRIBUTE DEFINE_DEBUGFS_ATTRIBUTE -#define debugfs_create_file debugfs_create_file_unsafe +#ifndef CSTD_UNUSED +#define CSTD_UNUSED(x) ((void)(x)) #endif #if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE) -static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, pgprot_t pgprot) +static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) { int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); @@ -72,10 +70,10 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, * debugfs. Display is organized per group with small and large sized pages. */ struct mgm_group { - size_t size; - size_t lp_size; - size_t insert_pfn; - size_t update_gpu_pte; + atomic_t size; + atomic_t lp_size; + atomic_t insert_pfn; + atomic_t update_gpu_pte; }; /** @@ -102,7 +100,7 @@ static int mgm_size_get(void *data, u64 *val) { struct mgm_group *group = data; - *val = group->size; + *val = atomic_read(&group->size); return 0; } @@ -110,27 +108,21 @@ static int mgm_size_get(void *data, u64 *val) static int mgm_lp_size_get(void *data, u64 *val) { struct mgm_group *group = data; - - *val = group->lp_size; - + *val = atomic_read(&group->lp_size); return 0; } static int mgm_insert_pfn_get(void *data, u64 *val) { struct mgm_group *group = data; - - *val = group->insert_pfn; - + *val = atomic_read(&group->insert_pfn); return 0; } static int mgm_update_gpu_pte_get(void *data, u64 *val) { struct mgm_group *group = data; - - *val = group->update_gpu_pte; - + *val = atomic_read(&group->update_gpu_pte); return 0; } @@ -154,8 +146,7 @@ static int mgm_initialize_debugfs(struct mgm_groups *mgm_data) /* * Create root directory of memory-group-manager */ - mgm_data->mgm_debugfs_root = - debugfs_create_dir("physical-memory-group-manager", NULL); + mgm_data->mgm_debugfs_root = debugfs_create_dir("physical-memory-group-manager", NULL); if (IS_ERR_OR_NULL(mgm_data->mgm_debugfs_root)) { dev_err(mgm_data->dev, "fail to create debugfs root directory\n"); return -ENODEV; @@ -165,43 +156,37 @@ static int mgm_initialize_debugfs(struct mgm_groups *mgm_data) * Create debugfs files per group */ for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) { - scnprintf(debugfs_group_name, MGM_DEBUGFS_GROUP_NAME_MAX, - "group_%d", i); - g = debugfs_create_dir(debugfs_group_name, - mgm_data->mgm_debugfs_root); + scnprintf(debugfs_group_name, MGM_DEBUGFS_GROUP_NAME_MAX, "group_%d", i); + g = debugfs_create_dir(debugfs_group_name, mgm_data->mgm_debugfs_root); if (IS_ERR_OR_NULL(g)) { dev_err(mgm_data->dev, "fail to create group[%d]\n", i); goto remove_debugfs; } - e = debugfs_create_file("size", 0444, g, &mgm_data->groups[i], - &fops_mgm_size); + e = debugfs_create_file("size", 0444, g, &mgm_data->groups[i], &fops_mgm_size); if (IS_ERR_OR_NULL(e)) { dev_err(mgm_data->dev, "fail to create size[%d]\n", i); goto remove_debugfs; } - e = debugfs_create_file("lp_size", 0444, g, - &mgm_data->groups[i], &fops_mgm_lp_size); + e = debugfs_create_file("lp_size", 0444, g, &mgm_data->groups[i], + &fops_mgm_lp_size); if (IS_ERR_OR_NULL(e)) { - dev_err(mgm_data->dev, - "fail to create lp_size[%d]\n", i); + dev_err(mgm_data->dev, "fail to create lp_size[%d]\n", i); goto remove_debugfs; } - e = debugfs_create_file("insert_pfn", 0444, g, - &mgm_data->groups[i], &fops_mgm_insert_pfn); + e = debugfs_create_file("insert_pfn", 0444, g, &mgm_data->groups[i], + &fops_mgm_insert_pfn); if (IS_ERR_OR_NULL(e)) { - dev_err(mgm_data->dev, - "fail to create insert_pfn[%d]\n", i); + dev_err(mgm_data->dev, "fail to create insert_pfn[%d]\n", i); goto remove_debugfs; } - e = debugfs_create_file("update_gpu_pte", 0444, g, - &mgm_data->groups[i], &fops_mgm_update_gpu_pte); + e = debugfs_create_file("update_gpu_pte", 0444, g, &mgm_data->groups[i], + &fops_mgm_update_gpu_pte); if (IS_ERR_OR_NULL(e)) { - dev_err(mgm_data->dev, - "fail to create update_gpu_pte[%d]\n", i); + dev_err(mgm_data->dev, "fail to create update_gpu_pte[%d]\n", i); goto remove_debugfs; } } @@ -236,31 +221,30 @@ static void update_size(struct memory_group_manager_device *mgm_dev, unsigned in switch (order) { case ORDER_SMALL_PAGE: if (alloc) - data->groups[group_id].size++; + atomic_inc(&data->groups[group_id].size); else { - WARN_ON(data->groups[group_id].size == 0); - data->groups[group_id].size--; + WARN_ON(atomic_read(&data->groups[group_id].size) == 0); + atomic_dec(&data->groups[group_id].size); } - break; + break; case ORDER_LARGE_PAGE: if (alloc) - data->groups[group_id].lp_size++; + atomic_inc(&data->groups[group_id].lp_size); else { - WARN_ON(data->groups[group_id].lp_size == 0); - data->groups[group_id].lp_size--; + WARN_ON(atomic_read(&data->groups[group_id].lp_size) == 0); + atomic_dec(&data->groups[group_id].lp_size); } - break; + break; default: dev_err(data->dev, "Unknown order(%d)\n", order); - break; + break; } } -static struct page *example_mgm_alloc_page( - struct memory_group_manager_device *mgm_dev, int group_id, - gfp_t gfp_mask, unsigned int order) +static struct page *example_mgm_alloc_page(struct memory_group_manager_device *mgm_dev, + int group_id, gfp_t gfp_mask, unsigned int order) { struct mgm_groups *const data = mgm_dev->data; struct page *p; @@ -268,8 +252,7 @@ static struct page *example_mgm_alloc_page( dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d gfp_mask=0x%x order=%u\n", __func__, (void *)mgm_dev, group_id, gfp_mask, order); - if (WARN_ON(group_id < 0) || - WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return NULL; p = alloc_pages(gfp_mask, order); @@ -285,17 +268,15 @@ static struct page *example_mgm_alloc_page( return p; } -static void example_mgm_free_page( - struct memory_group_manager_device *mgm_dev, int group_id, - struct page *page, unsigned int order) +static void example_mgm_free_page(struct memory_group_manager_device *mgm_dev, int group_id, + struct page *page, unsigned int order) { struct mgm_groups *const data = mgm_dev->data; dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d page=%pK order=%u\n", __func__, (void *)mgm_dev, group_id, (void *)page, order); - if (WARN_ON(group_id < 0) || - WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return; __free_pages(page, order); @@ -303,9 +284,8 @@ static void example_mgm_free_page( update_size(mgm_dev, group_id, order, false); } -static int example_mgm_get_import_memory_id( - struct memory_group_manager_device *mgm_dev, - struct memory_group_manager_import_data *import_data) +static int example_mgm_get_import_memory_id(struct memory_group_manager_device *mgm_dev, + struct memory_group_manager_import_data *import_data) { struct mgm_groups *const data = mgm_dev->data; @@ -315,24 +295,21 @@ static int example_mgm_get_import_memory_id( if (!WARN_ON(!import_data)) { WARN_ON(!import_data->u.dma_buf); - WARN_ON(import_data->type != - MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF); + WARN_ON(import_data->type != MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF); } return IMPORTED_MEMORY_ID; } -static u64 example_mgm_update_gpu_pte( - struct memory_group_manager_device *const mgm_dev, int const group_id, - int const mmu_level, u64 pte) +static u64 example_mgm_update_gpu_pte(struct memory_group_manager_device *const mgm_dev, + int const group_id, int const mmu_level, u64 pte) { struct mgm_groups *const data = mgm_dev->data; dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d, mmu_level=%d, pte=0x%llx)\n", __func__, (void *)mgm_dev, group_id, mmu_level, pte); - if (WARN_ON(group_id < 0) || - WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return pte; pte |= ((u64)group_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK; @@ -340,7 +317,7 @@ static u64 example_mgm_update_gpu_pte( /* Address could be translated into a different bus address here */ pte |= ((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT); - data->groups[group_id].update_gpu_pte++; + atomic_inc(&data->groups[group_id].update_gpu_pte); return pte; } @@ -348,6 +325,10 @@ static u64 example_mgm_update_gpu_pte( static u64 example_mgm_pte_to_original_pte(struct memory_group_manager_device *const mgm_dev, int const group_id, int const mmu_level, u64 pte) { + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(group_id); + CSTD_UNUSED(mmu_level); + /* Undo the group ID modification */ pte &= ~PTE_PBHA_MASK; /* Undo the bit set */ @@ -356,10 +337,11 @@ static u64 example_mgm_pte_to_original_pte(struct memory_group_manager_device *c return pte; } -static vm_fault_t example_mgm_vmf_insert_pfn_prot( - struct memory_group_manager_device *const mgm_dev, int const group_id, - struct vm_area_struct *const vma, unsigned long const addr, - unsigned long const pfn, pgprot_t const prot) +static vm_fault_t example_mgm_vmf_insert_pfn_prot(struct memory_group_manager_device *const mgm_dev, + int const group_id, + struct vm_area_struct *const vma, + unsigned long const addr, unsigned long const pfn, + pgprot_t const prot) { struct mgm_groups *const data = mgm_dev->data; vm_fault_t fault; @@ -369,14 +351,13 @@ static vm_fault_t example_mgm_vmf_insert_pfn_prot( __func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn, (unsigned long long)pgprot_val(prot)); - if (WARN_ON(group_id < 0) || - WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return VM_FAULT_SIGBUS; fault = vmf_insert_pfn_prot(vma, addr, pfn, prot); if (fault == VM_FAULT_NOPAGE) - data->groups[group_id].insert_pfn++; + atomic_inc(&data->groups[group_id].insert_pfn); else dev_err(data->dev, "vmf_insert_pfn_prot failed\n"); @@ -388,10 +369,10 @@ static int mgm_initialize_data(struct mgm_groups *mgm_data) int i; for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) { - mgm_data->groups[i].size = 0; - mgm_data->groups[i].lp_size = 0; - mgm_data->groups[i].insert_pfn = 0; - mgm_data->groups[i].update_gpu_pte = 0; + atomic_set(&mgm_data->groups[i].size, 0); + atomic_set(&mgm_data->groups[i].lp_size, 0); + atomic_set(&mgm_data->groups[i].insert_pfn, 0); + atomic_set(&mgm_data->groups[i].update_gpu_pte, 0); } return mgm_initialize_debugfs(mgm_data); @@ -402,14 +383,12 @@ static void mgm_term_data(struct mgm_groups *data) int i; for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) { - if (data->groups[i].size != 0) - dev_warn(data->dev, - "%zu 0-order pages in group(%d) leaked\n", - data->groups[i].size, i); - if (data->groups[i].lp_size != 0) - dev_warn(data->dev, - "%zu 9 order pages in group(%d) leaked\n", - data->groups[i].lp_size, i); + if (atomic_read(&data->groups[i].size) != 0) + dev_warn(data->dev, "%d 0-order pages in group(%d) leaked\n", + atomic_read(&data->groups[i].size), i); + if (atomic_read(&data->groups[i].lp_size) != 0) + dev_warn(data->dev, "%d 9 order pages in group(%d) leaked\n", + atomic_read(&data->groups[i].lp_size), i); } mgm_term_debugfs(data); @@ -427,8 +406,7 @@ static int memory_group_manager_probe(struct platform_device *pdev) mgm_dev->owner = THIS_MODULE; mgm_dev->ops.mgm_alloc_page = example_mgm_alloc_page; mgm_dev->ops.mgm_free_page = example_mgm_free_page; - mgm_dev->ops.mgm_get_import_memory_id = - example_mgm_get_import_memory_id; + mgm_dev->ops.mgm_get_import_memory_id = example_mgm_get_import_memory_id; mgm_dev->ops.mgm_vmf_insert_pfn_prot = example_mgm_vmf_insert_pfn_prot; mgm_dev->ops.mgm_update_gpu_pte = example_mgm_update_gpu_pte; mgm_dev->ops.mgm_pte_to_original_pte = example_mgm_pte_to_original_pte; @@ -456,8 +434,7 @@ static int memory_group_manager_probe(struct platform_device *pdev) static int memory_group_manager_remove(struct platform_device *pdev) { - struct memory_group_manager_device *mgm_dev = - platform_get_drvdata(pdev); + struct memory_group_manager_device *mgm_dev = platform_get_drvdata(pdev); struct mgm_groups *mgm_data = mgm_dev->data; mgm_term_data(mgm_data); @@ -476,20 +453,20 @@ static const struct of_device_id memory_group_manager_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, memory_group_manager_dt_ids); -static struct platform_driver memory_group_manager_driver = { - .probe = memory_group_manager_probe, - .remove = memory_group_manager_remove, - .driver = { - .name = "physical-memory-group-manager", - .of_match_table = of_match_ptr(memory_group_manager_dt_ids), - /* +static struct platform_driver + memory_group_manager_driver = { .probe = memory_group_manager_probe, + .remove = memory_group_manager_remove, + .driver = { + .name = "physical-memory-group-manager", + .of_match_table = + of_match_ptr(memory_group_manager_dt_ids), + /* * Prevent the mgm_dev from being unbound and freed, as other's * may have pointers to it and would get confused, or crash, if * it suddenly disappear. */ - .suppress_bind_attrs = true, - } -}; + .suppress_bind_attrs = true, + } }; module_platform_driver(memory_group_manager_driver); diff --git a/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c b/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c index b435867e1b6f..d7e0bec0978e 100644 --- a/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c +++ b/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,7 +45,7 @@ * @dev: Device pointer * @allocated_pages_bitfield_arr: Status of all the physical memory pages within the * protected memory region, one bit per page - * @rmem_base: Base address of the reserved memory region + * @rmem_base: Base physical address of the reserved memory region * @rmem_size: Size of the reserved memory region, in pages * @num_free_pages: Number of free pages in the memory region * @rmem_lock: Lock to serialize the allocation and freeing of @@ -68,9 +68,9 @@ struct simple_pma_device { * PAGES_PER_BITFIELD_ELEM, adds an extra page for the remainder. * @num_pages: number of pages */ -#define ALLOC_PAGES_BITFIELD_ARR_SIZE(num_pages) \ - ((PAGES_PER_BITFIELD_ELEM * (0 != (num_pages % PAGES_PER_BITFIELD_ELEM)) + \ - num_pages) / PAGES_PER_BITFIELD_ELEM) +#define ALLOC_PAGES_BITFIELD_ARR_SIZE(num_pages) \ + ((PAGES_PER_BITFIELD_ELEM * (0 != (num_pages % PAGES_PER_BITFIELD_ELEM)) + num_pages) / \ + PAGES_PER_BITFIELD_ELEM) /** * small_granularity_alloc() - Allocate 1-32 power-of-two pages. @@ -90,8 +90,7 @@ struct simple_pma_device { * It can be thought of as the 'small-granularity' allocator. */ static void small_granularity_alloc(struct simple_pma_device *const epma_dev, - size_t alloc_bitfield_idx, size_t start_bit, - size_t order, + size_t alloc_bitfield_idx, size_t start_bit, size_t order, struct protected_memory_allocation *pma) { size_t i; @@ -99,28 +98,26 @@ static void small_granularity_alloc(struct simple_pma_device *const epma_dev, u64 *bitfield; size_t alloc_pages_bitfield_size; - if (WARN_ON(!epma_dev) || - WARN_ON(!pma)) + if (WARN_ON(!epma_dev) || WARN_ON(!pma)) return; WARN(epma_dev->rmem_size == 0, "%s: rmem_size is 0", __func__); alloc_pages_bitfield_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); - WARN(alloc_bitfield_idx >= alloc_pages_bitfield_size, - "%s: idx>bf_size: %zu %zu", __func__, + WARN(alloc_bitfield_idx >= alloc_pages_bitfield_size, "%s: idx>bf_size: %zu %zu", __func__, alloc_bitfield_idx, alloc_pages_bitfield_size); - WARN((start_bit + (1 << order)) > PAGES_PER_BITFIELD_ELEM, - "%s: start=%zu order=%zu ppbe=%zu", - __func__, start_bit, order, PAGES_PER_BITFIELD_ELEM); + WARN((start_bit + (1ULL << order)) > PAGES_PER_BITFIELD_ELEM, + "%s: start=%zu order=%zu ppbe=%zu", __func__, start_bit, order, + PAGES_PER_BITFIELD_ELEM); bitfield = &epma_dev->allocated_pages_bitfield_arr[alloc_bitfield_idx]; - for (i = 0; i < (1 << order); i++) { + for (i = 0; i < (1ULL << order); i++) { /* Check the pages represented by this bit are actually free */ WARN(*bitfield & (1ULL << (start_bit + i)), - "in %s: page not free: %zu %zu %.16llx %zu\n", - __func__, i, order, *bitfield, alloc_pages_bitfield_size); + "in %s: page not free: %zu %zu %.16llx %zu\n", __func__, i, order, *bitfield, + alloc_pages_bitfield_size); /* Mark the pages as now allocated */ *bitfield |= (1ULL << (start_bit + i)); @@ -152,8 +149,7 @@ static void small_granularity_alloc(struct simple_pma_device *const epma_dev, * as the 'large-granularity' allocator. */ static void large_granularity_alloc(struct simple_pma_device *const epma_dev, - size_t start_alloc_bitfield_idx, - size_t order, + size_t start_alloc_bitfield_idx, size_t order, struct protected_memory_allocation *pma) { size_t i; @@ -161,8 +157,7 @@ static void large_granularity_alloc(struct simple_pma_device *const epma_dev, size_t num_bitfield_elements_needed = num_pages_to_alloc / PAGES_PER_BITFIELD_ELEM; size_t start_page_idx = start_alloc_bitfield_idx * PAGES_PER_BITFIELD_ELEM; - if (WARN_ON(!epma_dev) || - WARN_ON(!pma)) + if (WARN_ON(!epma_dev) || WARN_ON(!pma)) return; /* @@ -170,29 +165,30 @@ static void large_granularity_alloc(struct simple_pma_device *const epma_dev, * between the start element and the end of the bitfield array * to fulfill the request? */ - WARN((start_alloc_bitfield_idx + order) >= ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size), - "%s: start=%zu order=%zu ms=%zu", - __func__, start_alloc_bitfield_idx, order, epma_dev->rmem_size); + WARN((start_alloc_bitfield_idx + order) >= + ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size), + "%s: start=%zu order=%zu ms=%zu", __func__, start_alloc_bitfield_idx, order, + epma_dev->rmem_size); for (i = 0; i < num_bitfield_elements_needed; i++) { - u64 *bitfield = &epma_dev->allocated_pages_bitfield_arr[start_alloc_bitfield_idx + i]; + u64 *bitfield = + &epma_dev->allocated_pages_bitfield_arr[start_alloc_bitfield_idx + i]; /* We expect all pages that relate to this bitfield element to be free */ - WARN((*bitfield != 0), - "in %s: pages not free: i=%zu o=%zu bf=%.16llx\n", - __func__, i, order, *bitfield); + WARN((*bitfield != 0), "in %s: pages not free: i=%zu o=%zu bf=%.16llx\n", __func__, + i, order, *bitfield); /* Mark all the pages for this element as not free */ *bitfield = ~0ULL; } /* Fill-in the allocation struct for the caller */ - pma->pa = epma_dev->rmem_base + (start_page_idx << PAGE_SHIFT); + pma->pa = epma_dev->rmem_base + (start_page_idx << PAGE_SHIFT); pma->order = order; } -static struct protected_memory_allocation *simple_pma_alloc_page( - struct protected_memory_allocator_device *pma_dev, unsigned int order) +static struct protected_memory_allocation * +simple_pma_alloc_page(struct protected_memory_allocator_device *pma_dev, unsigned int order) { struct simple_pma_device *const epma_dev = container_of(pma_dev, struct simple_pma_device, pma_dev); @@ -204,8 +200,7 @@ static struct protected_memory_allocation *simple_pma_alloc_page( size_t bit; size_t count; - dev_dbg(epma_dev->dev, "%s(pma_dev=%px, order=%u\n", - __func__, (void *)pma_dev, order); + dev_dbg(epma_dev->dev, "%s(pma_dev=%px, order=%u\n", __func__, (void *)pma_dev, order); /* This is an example function that follows an extremely simple logic * and is very likely to fail to allocate memory if put under stress. @@ -260,22 +255,18 @@ static struct protected_memory_allocation *simple_pma_alloc_page( count = 0; for (bit = 0; bit < PAGES_PER_BITFIELD_ELEM; bit++) { - if (0 == (bitfields[i] & (1ULL << bit))) { + if (0 == (bitfields[i] & (1ULL << bit))) { if ((count + 1) >= num_pages_to_alloc) { /* * We've found enough free, consecutive pages with which to * make an allocation */ - small_granularity_alloc( - epma_dev, i, - bit - count, order, - pma); + small_granularity_alloc(epma_dev, i, bit - count, + order, pma); - epma_dev->num_free_pages -= - num_pages_to_alloc; + epma_dev->num_free_pages -= num_pages_to_alloc; - spin_unlock( - &epma_dev->rmem_lock); + spin_unlock(&epma_dev->rmem_lock); return pma; } @@ -307,12 +298,10 @@ static struct protected_memory_allocation *simple_pma_alloc_page( if (bitfields[i] == 0) { count += PAGES_PER_BITFIELD_ELEM; - if (count >= (1 << order)) { + if (count >= (1ULL << order)) { size_t start_idx = (i + 1) - num_bitfield_elements_needed; - large_granularity_alloc(epma_dev, - start_idx, - order, pma); + large_granularity_alloc(epma_dev, start_idx, order, pma); epma_dev->num_free_pages -= 1 << order; spin_unlock(&epma_dev->rmem_lock); @@ -327,28 +316,26 @@ static struct protected_memory_allocation *simple_pma_alloc_page( spin_unlock(&epma_dev->rmem_lock); devm_kfree(epma_dev->dev, pma); - dev_err(epma_dev->dev, "not enough contiguous pages (need %zu), total free pages left %zu\n", + dev_err(epma_dev->dev, + "not enough contiguous pages (need %zu), total free pages left %zu\n", num_pages_to_alloc, epma_dev->num_free_pages); return NULL; } -static phys_addr_t simple_pma_get_phys_addr( - struct protected_memory_allocator_device *pma_dev, - struct protected_memory_allocation *pma) +static phys_addr_t simple_pma_get_phys_addr(struct protected_memory_allocator_device *pma_dev, + struct protected_memory_allocation *pma) { struct simple_pma_device *const epma_dev = container_of(pma_dev, struct simple_pma_device, pma_dev); - dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n", - __func__, (void *)pma_dev, (void *)pma, - (unsigned long long)pma->pa); + dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%pK\n", __func__, (void *)pma_dev, + (void *)pma, (void *)pma->pa); return pma->pa; } -static void simple_pma_free_page( - struct protected_memory_allocator_device *pma_dev, - struct protected_memory_allocation *pma) +static void simple_pma_free_page(struct protected_memory_allocator_device *pma_dev, + struct protected_memory_allocation *pma) { struct simple_pma_device *const epma_dev = container_of(pma_dev, struct simple_pma_device, pma_dev); @@ -364,9 +351,8 @@ static void simple_pma_free_page( WARN_ON(pma == NULL); - dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n", - __func__, (void *)pma_dev, (void *)pma, - (unsigned long long)pma->pa); + dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%pK\n", __func__, (void *)pma_dev, + (void *)pma, (void *)pma->pa); WARN_ON(pma->pa < epma_dev->rmem_base); @@ -402,14 +388,14 @@ static void simple_pma_free_page( *bitfield &= ~(((1ULL << num_pages_in_allocation) - 1) << bitfield_start_bit); } else { WARN(page_num % PAGES_PER_BITFIELD_ELEM, - "%s: Expecting allocs of order >= %d to be %zu-page aligned\n", - __func__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM); + "%s: Expecting allocs of order >= %d to be %zu-page aligned\n", __func__, + ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM); for (i = 0; i < num_bitfield_elems_used_by_alloc; i++) { bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx + i]; /* We expect all bits to be set (all pages allocated) */ - WARN((*bitfield != ~0), + WARN((*bitfield != ~0ULL), "%s: alloc being freed is not fully allocated: of=%zu np=%zu bf=%.16llx\n", __func__, offset, num_pages_in_allocation, *bitfield); @@ -480,8 +466,8 @@ static int protected_memory_allocator_probe(struct platform_device *pdev) alloc_bitmap_pages_arr_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); - epma_dev->allocated_pages_bitfield_arr = devm_kzalloc(&pdev->dev, - alloc_bitmap_pages_arr_size * BITFIELD_ELEM_SIZE, GFP_KERNEL); + epma_dev->allocated_pages_bitfield_arr = devm_kzalloc( + &pdev->dev, alloc_bitmap_pages_arr_size * BITFIELD_ELEM_SIZE, GFP_KERNEL); if (!epma_dev->allocated_pages_bitfield_arr) { dev_err(&pdev->dev, "failed to allocate resources\n"); @@ -491,31 +477,27 @@ static int protected_memory_allocator_probe(struct platform_device *pdev) if (epma_dev->rmem_size % PAGES_PER_BITFIELD_ELEM) { size_t extra_pages = - alloc_bitmap_pages_arr_size * PAGES_PER_BITFIELD_ELEM - - epma_dev->rmem_size; + alloc_bitmap_pages_arr_size * PAGES_PER_BITFIELD_ELEM - epma_dev->rmem_size; size_t last_bitfield_index = alloc_bitmap_pages_arr_size - 1; /* Mark the extra pages (that lie outside the reserved range) as * always in use. */ epma_dev->allocated_pages_bitfield_arr[last_bitfield_index] = - ((1ULL << extra_pages) - 1) << - (PAGES_PER_BITFIELD_ELEM - extra_pages); + ((1ULL << extra_pages) - 1) << (PAGES_PER_BITFIELD_ELEM - extra_pages); } platform_set_drvdata(pdev, &epma_dev->pma_dev); - dev_info(&pdev->dev, - "Protected memory allocator probed successfully\n"); - dev_info(&pdev->dev, "Protected memory region: base=%llx num pages=%zu\n", - (unsigned long long)rmem_base, rmem_size); + dev_info(&pdev->dev, "Protected memory allocator probed successfully\n"); + dev_info(&pdev->dev, "Protected memory region: base=%pK num pages=%zu\n", (void *)rmem_base, + rmem_size); return 0; } static int protected_memory_allocator_remove(struct platform_device *pdev) { - struct protected_memory_allocator_device *pma_dev = - platform_get_drvdata(pdev); + struct protected_memory_allocator_device *pma_dev = platform_get_drvdata(pdev); struct simple_pma_device *epma_dev; struct device *dev; @@ -527,15 +509,14 @@ static int protected_memory_allocator_remove(struct platform_device *pdev) if (epma_dev->num_free_pages < epma_dev->rmem_size) { dev_warn(&pdev->dev, "Leaking %zu pages of protected memory\n", - epma_dev->rmem_size - epma_dev->num_free_pages); + epma_dev->rmem_size - epma_dev->num_free_pages); } platform_set_drvdata(pdev, NULL); devm_kfree(dev, epma_dev->allocated_pages_bitfield_arr); devm_kfree(dev, epma_dev); - dev_info(&pdev->dev, - "Protected memory allocator removed successfully\n"); + dev_info(&pdev->dev, "Protected memory allocator removed successfully\n"); return 0; } @@ -546,14 +527,14 @@ static const struct of_device_id protected_memory_allocator_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, protected_memory_allocator_dt_ids); -static struct platform_driver protected_memory_allocator_driver = { - .probe = protected_memory_allocator_probe, - .remove = protected_memory_allocator_remove, - .driver = { - .name = "simple_protected_memory_allocator", - .of_match_table = of_match_ptr(protected_memory_allocator_dt_ids), - } -}; +static struct platform_driver + protected_memory_allocator_driver = { .probe = protected_memory_allocator_probe, + .remove = protected_memory_allocator_remove, + .driver = { + .name = "simple_protected_memory_allocator", + .of_match_table = of_match_ptr( + protected_memory_allocator_dt_ids), + } }; module_platform_driver(protected_memory_allocator_driver); diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild index 9cadda188fbc..957f412d6547 100644 --- a/drivers/gpu/arm/bifrost/Kbuild +++ b/drivers/gpu/arm/bifrost/Kbuild @@ -69,7 +69,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"g18p0-01eac0"' +MALI_RELEASE_NAME ?= '"g21p0-01eac0"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) MALI_UNIT_TEST = 1 @@ -149,6 +149,7 @@ bifrost_kbase-y := \ mali_kbase_cache_policy.o \ mali_kbase_ccswe.o \ mali_kbase_mem.o \ + mali_kbase_reg_track.o \ mali_kbase_mem_migrate.o \ mali_kbase_mem_pool_group.o \ mali_kbase_native_mgm.o \ @@ -157,7 +158,6 @@ bifrost_kbase-y := \ mali_kbase_pm.o \ mali_kbase_config.o \ mali_kbase_kinstr_prfcnt.o \ - mali_kbase_vinstr.o \ mali_kbase_softjobs.o \ mali_kbase_hw.o \ mali_kbase_debug.o \ @@ -173,7 +173,6 @@ bifrost_kbase-y := \ mali_kbase_mem_pool.o \ mali_kbase_mem_pool_debugfs.o \ mali_kbase_debugfs_helper.o \ - mali_kbase_strings.o \ mali_kbase_as_fault_debugfs.o \ mali_kbase_regs_history_debugfs.o \ mali_kbase_dvfs_debugfs.o \ @@ -190,6 +189,10 @@ bifrost_kbase-$(CONFIG_SYNC_FILE) += \ mali_kbase_sync_file.o \ mali_kbase_sync_common.o +bifrost_kbase-$(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) += \ + mali_power_gpu_work_period_trace.o \ + mali_kbase_gpu_metrics.o + ifneq ($(CONFIG_MALI_CSF_SUPPORT),y) bifrost_kbase-y += \ mali_kbase_jm.o \ @@ -217,6 +220,7 @@ INCLUDE_SUBDIR = \ $(src)/tl/Kbuild \ $(src)/hwcnt/Kbuild \ $(src)/gpu/Kbuild \ + $(src)/hw_access/Kbuild \ $(src)/thirdparty/Kbuild \ $(src)/platform/$(MALI_PLATFORM_DIR)/Kbuild diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig index ca3da57cffd3..f32b107949ba 100644 --- a/drivers/gpu/arm/bifrost/Kconfig +++ b/drivers/gpu/arm/bifrost/Kconfig @@ -63,11 +63,18 @@ config MALI_BIFROST_NO_MALI All calls to the simulated hardware will complete immediately as if the hardware completed the task. +config MALI_NO_MALI_DEFAULT_GPU + string "Default GPU for No Mali" + depends on MALI_BIFROST_NO_MALI + default "tMIx" + help + This option sets the default GPU to identify as for No Mali builds. + endchoice menu "Platform specific options" -source "drivers/gpu/arm/bifrost/platform/Kconfig" +source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/bifrost/platform/Kconfig" endmenu config MALI_CSF_SUPPORT @@ -163,32 +170,36 @@ menuconfig MALI_BIFROST_EXPERT if MALI_BIFROST_EXPERT -config LARGE_PAGE_ALLOC_OVERRIDE - bool "Override default setting of 2MB pages" +config LARGE_PAGE_SUPPORT + bool "Support for 2MB page allocations" depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n + default y help - An override config for LARGE_PAGE_ALLOC config. - When LARGE_PAGE_ALLOC_OVERRIDE is Y, 2MB page allocation will be - enabled by LARGE_PAGE_ALLOC. When this is N, the feature will be - enabled when GPU HW satisfies requirements. + Rather than allocating all GPU memory page-by-page, allow the system + to decide whether to attempt to allocate 2MB pages from the kernel. + This reduces TLB pressure. - If in doubt, say N + Note that this option only enables the support for the module parameter + and does not necessarily mean that 2MB pages will be used automatically. + This depends on GPU support. -config LARGE_PAGE_ALLOC - bool "Attempt to allocate 2MB pages" + If in doubt, say Y. + +config PAGE_MIGRATION_SUPPORT + bool "Enable support for page migration" depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n + default y + default n if ANDROID help - Rather than allocating all GPU memory page-by-page, attempt to - allocate 2MB pages from the kernel. This reduces TLB pressure and - helps to prevent memory fragmentation. + Compile in support for page migration. + If set to disabled ('n') then page migration cannot + be enabled at all, and related symbols are not compiled in. + If not set, page migration is compiled in by default, and + if not explicitly enabled or disabled with the insmod parameter, + page migration becomes automatically enabled with large pages. - Note this config applies only when LARGE_PAGE_ALLOC_OVERRIDE config - is enabled and enabling this on a GPU HW that does not satisfy - requirements can cause serious problem. - - If in doubt, say N + If in doubt, say Y. To strip out page migration symbols and support, + say N. config MALI_MEMORY_FULLY_BACKED bool "Enable memory fully physically-backed" @@ -383,7 +394,16 @@ config MALI_ARBITRATION virtualization setup for Mali If unsure, say N. +config MALI_TRACE_POWER_GPU_WORK_PERIOD + bool "Enable per-application GPU metrics tracepoints" + depends on MALI_BIFROST + default y + help + This option enables per-application GPU metrics tracepoints. -# source "drivers/gpu/arm/bifrost/tests/Kconfig" + If unsure, say N. + + +# source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/bifrost/tests/Kconfig" endif diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile index 39df298ff01c..f0f5e6072193 100644 --- a/drivers/gpu/arm/bifrost/Makefile +++ b/drivers/gpu/arm/bifrost/Makefile @@ -20,6 +20,7 @@ KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build KDIR ?= $(KERNEL_SRC) +M ?= $(shell pwd) ifeq ($(KDIR),) $(error Must specify KDIR to point to the kernel to target)) @@ -31,158 +32,169 @@ endif # Dependency resolution is done through statements as Kconfig # is not supported for out-of-tree builds. # +CONFIGS := +ifeq ($(MALI_KCONFIG_EXT_PREFIX),) + CONFIG_MALI_BIFROST ?= m + ifeq ($(CONFIG_MALI_BIFROST),m) + CONFIG_MALI_PLATFORM_NAME ?= "devicetree" + CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD ?= y + CONFIG_MALI_BIFROST_GATOR_SUPPORT ?= y + CONFIG_MALI_ARBITRATION ?= n + CONFIG_MALI_PARTITION_MANAGER ?= n + CONFIG_MALI_64BIT_HW_ACCESS ?= n -CONFIG_MALI_BIFROST ?= m -ifeq ($(CONFIG_MALI_BIFROST),m) - CONFIG_MALI_PLATFORM_NAME ?= "devicetree" - CONFIG_MALI_BIFROST_GATOR_SUPPORT ?= y - CONFIG_MALI_ARBITRATION ?= n - CONFIG_MALI_PARTITION_MANAGER ?= n - - ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y) - # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI - CONFIG_MALI_REAL_HW ?= y - CONFIG_MALI_CORESIGHT = n - endif - - ifeq ($(CONFIG_MALI_BIFROST_DVFS),y) - # Prevent misuse when CONFIG_MALI_BIFROST_DVFS=y - CONFIG_MALI_BIFROST_DEVFREQ ?= n - else - CONFIG_MALI_BIFROST_DEVFREQ ?= y - endif - - ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y) - # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y - CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n - endif - - ifeq ($(CONFIG_MALI_CSF_SUPPORT), y) - CONFIG_MALI_CORESIGHT ?= n - endif - - # - # Expert/Debug/Test released configurations - # - ifeq ($(CONFIG_MALI_BIFROST_EXPERT), y) - ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y) - CONFIG_MALI_REAL_HW = n + ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y) + # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=y + CONFIG_MALI_REAL_HW ?= y + CONFIG_MALI_CORESIGHT = n + endif + ifeq ($(CONFIG_MALI_BIFROST_DVFS),y) + # Prevent misuse when CONFIG_MALI_BIFROST_DVFS=y + CONFIG_MALI_BIFROST_DEVFREQ ?= n else - # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n - CONFIG_MALI_REAL_HW = y - CONFIG_MALI_BIFROST_ERROR_INJECT = n + CONFIG_MALI_BIFROST_DEVFREQ ?= y endif - - ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y) - # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y - CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n + ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y) + # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y + CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n endif - ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) - CONFIG_MALI_BIFROST_ENABLE_TRACE ?= y - CONFIG_MALI_BIFROST_SYSTEM_TRACE ?= y + ifeq ($(CONFIG_MALI_CSF_SUPPORT), y) + CONFIG_MALI_CORESIGHT ?= n + endif + + # + # Expert/Debug/Test released configurations + # + ifeq ($(CONFIG_MALI_BIFROST_EXPERT), y) + ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y) + CONFIG_MALI_REAL_HW = n + CONFIG_MALI_NO_MALI_DEFAULT_GPU ?= "tMIx" - ifeq ($(CONFIG_SYNC_FILE), y) - CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y else + # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n + CONFIG_MALI_REAL_HW = y + CONFIG_MALI_BIFROST_ERROR_INJECT = n + endif + + + ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y) + # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y + CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n + endif + + ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) + CONFIG_MALI_BIFROST_ENABLE_TRACE ?= y + CONFIG_MALI_BIFROST_SYSTEM_TRACE ?= y + + ifeq ($(CONFIG_SYNC_FILE), y) + CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y + else + CONFIG_MALI_BIFROST_FENCE_DEBUG = n + endif + else + # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n + CONFIG_MALI_BIFROST_ENABLE_TRACE = n + CONFIG_MALI_BIFROST_SYSTEM_TRACE = n CONFIG_MALI_BIFROST_FENCE_DEBUG = n endif else - # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n + # Prevent misuse when CONFIG_MALI_BIFROST_EXPERT=n + CONFIG_MALI_CORESTACK = n + CONFIG_LARGE_PAGE_SUPPORT = y + CONFIG_MALI_PWRSOFT_765 = n + CONFIG_MALI_MEMORY_FULLY_BACKED = n + CONFIG_MALI_JOB_DUMP = n + CONFIG_MALI_BIFROST_NO_MALI = n + CONFIG_MALI_REAL_HW = y + CONFIG_MALI_BIFROST_ERROR_INJECT = n + CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n + CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n + CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n + CONFIG_MALI_BIFROST_DEBUG = n CONFIG_MALI_BIFROST_ENABLE_TRACE = n CONFIG_MALI_BIFROST_SYSTEM_TRACE = n CONFIG_MALI_BIFROST_FENCE_DEBUG = n endif - else - # Prevent misuse when CONFIG_MALI_BIFROST_EXPERT=n - CONFIG_MALI_CORESTACK = n - CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n - CONFIG_LARGE_PAGE_ALLOC = n - CONFIG_MALI_PWRSOFT_765 = n - CONFIG_MALI_MEMORY_FULLY_BACKED = n - CONFIG_MALI_JOB_DUMP = n - CONFIG_MALI_BIFROST_NO_MALI = n - CONFIG_MALI_REAL_HW = y - CONFIG_MALI_BIFROST_ERROR_INJECT = n - CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n - CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n - CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n - CONFIG_MALI_BIFROST_DEBUG = n - CONFIG_MALI_BIFROST_ENABLE_TRACE = n - CONFIG_MALI_BIFROST_SYSTEM_TRACE = n - CONFIG_MALI_BIFROST_FENCE_DEBUG = n - endif - ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) - CONFIG_MALI_KUTF ?= y - ifeq ($(CONFIG_MALI_KUTF), y) - CONFIG_MALI_KUTF_IRQ_TEST ?= y - CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y - CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y + ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) + CONFIG_MALI_KUTF ?= y + ifeq ($(CONFIG_MALI_KUTF), y) + CONFIG_MALI_KUTF_IRQ_TEST ?= y + CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y + ifeq ($(CONFIG_MALI_BIFROST_DEVFREQ), y) + ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y) + CONFIG_MALI_KUTF_IPA_UNIT_TEST ?= y + endif + endif + + else + # Prevent misuse when CONFIG_MALI_KUTF=n + CONFIG_MALI_KUTF_IRQ_TEST = n + CONFIG_MALI_KUTF_CLK_RATE_TRACE = n + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n + endif else - # Prevent misuse when CONFIG_MALI_KUTF=n + # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n + CONFIG_MALI_KUTF = n CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n endif else - # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n + # Prevent misuse when CONFIG_MALI_BIFROST=n + CONFIG_MALI_ARBITRATION = n CONFIG_MALI_KUTF = n CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n endif -else - # Prevent misuse when CONFIG_MALI_BIFROST=n - CONFIG_MALI_ARBITRATION = n - CONFIG_MALI_KUTF = n - CONFIG_MALI_KUTF_IRQ_TEST = n - CONFIG_MALI_KUTF_CLK_RATE_TRACE = n - CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n + + # All Mali CONFIG should be listed here + CONFIGS += \ + CONFIG_MALI_BIFROST \ + CONFIG_MALI_CSF_SUPPORT \ + CONFIG_MALI_BIFROST_GATOR_SUPPORT \ + CONFIG_MALI_ARBITER_SUPPORT \ + CONFIG_MALI_ARBITRATION \ + CONFIG_MALI_PARTITION_MANAGER \ + CONFIG_MALI_REAL_HW \ + CONFIG_MALI_BIFROST_DEVFREQ \ + CONFIG_MALI_BIFROST_DVFS \ + CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \ + CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \ + CONFIG_MALI_BIFROST_EXPERT \ + CONFIG_MALI_CORESTACK \ + CONFIG_LARGE_PAGE_SUPPORT \ + CONFIG_MALI_PWRSOFT_765 \ + CONFIG_MALI_MEMORY_FULLY_BACKED \ + CONFIG_MALI_JOB_DUMP \ + CONFIG_MALI_BIFROST_NO_MALI \ + CONFIG_MALI_BIFROST_ERROR_INJECT \ + CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ + CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ + CONFIG_MALI_PRFCNT_SET_PRIMARY \ + CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY \ + CONFIG_MALI_PRFCNT_SET_TERTIARY \ + CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \ + CONFIG_MALI_BIFROST_DEBUG \ + CONFIG_MALI_BIFROST_ENABLE_TRACE \ + CONFIG_MALI_BIFROST_SYSTEM_TRACE \ + CONFIG_MALI_BIFROST_FENCE_DEBUG \ + CONFIG_MALI_KUTF \ + CONFIG_MALI_KUTF_IRQ_TEST \ + CONFIG_MALI_KUTF_CLK_RATE_TRACE \ + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ + CONFIG_MALI_XEN \ + CONFIG_MALI_CORESIGHT \ + CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD + + endif -# All Mali CONFIG should be listed here -CONFIGS := \ - CONFIG_MALI_BIFROST \ - CONFIG_MALI_CSF_SUPPORT \ - CONFIG_MALI_BIFROST_GATOR_SUPPORT \ - CONFIG_MALI_ARBITER_SUPPORT \ - CONFIG_MALI_ARBITRATION \ - CONFIG_MALI_PARTITION_MANAGER \ - CONFIG_MALI_REAL_HW \ - CONFIG_MALI_BIFROST_DEVFREQ \ - CONFIG_MALI_BIFROST_DVFS \ - CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \ - CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \ - CONFIG_MALI_BIFROST_EXPERT \ - CONFIG_MALI_CORESTACK \ - CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \ - CONFIG_LARGE_PAGE_ALLOC \ - CONFIG_MALI_PWRSOFT_765 \ - CONFIG_MALI_MEMORY_FULLY_BACKED \ - CONFIG_MALI_JOB_DUMP \ - CONFIG_MALI_BIFROST_NO_MALI \ - CONFIG_MALI_BIFROST_ERROR_INJECT \ - CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ - CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ - CONFIG_MALI_PRFCNT_SET_PRIMARY \ - CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY \ - CONFIG_MALI_PRFCNT_SET_TERTIARY \ - CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \ - CONFIG_MALI_BIFROST_DEBUG \ - CONFIG_MALI_BIFROST_ENABLE_TRACE \ - CONFIG_MALI_BIFROST_SYSTEM_TRACE \ - CONFIG_MALI_BIFROST_FENCE_DEBUG \ - CONFIG_MALI_KUTF \ - CONFIG_MALI_KUTF_IRQ_TEST \ - CONFIG_MALI_KUTF_CLK_RATE_TRACE \ - CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ - CONFIG_MALI_XEN \ - CONFIG_MALI_CORESIGHT - - THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST))) -include $(THIS_DIR)/../arbitration/Makefile @@ -197,7 +209,9 @@ MAKE_ARGS := $(foreach config,$(CONFIGS), \ $(value config)=$(value $(value config)), \ $(value config)=n)) -MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) +ifeq ($(MALI_KCONFIG_EXT_PREFIX),) + MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) +endif # # EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build @@ -209,71 +223,78 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ $(if $(filter y m,$(value $(value config))), \ -D$(value config)=1)) -EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) +ifeq ($(MALI_KCONFIG_EXT_PREFIX),) + EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME='\"$(CONFIG_MALI_PLATFORM_NAME)\"' + EXTRA_CFLAGS += -DCONFIG_MALI_NO_MALI_DEFAULT_GPU='\"$(CONFIG_MALI_NO_MALI_DEFAULT_GPU)\"' +endif # # KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions # +BASE_SYMBOLS = $(M)/../../base/arm/Module.symvers -KBUILD_CFLAGS += -Wall -Werror +EXTRA_SYMBOLS += \ + $(BASE_SYMBOLS) + +CFLAGS_MODULE += -Wall -Werror # The following were added to align with W=1 in scripts/Makefile.extrawarn # from the Linux source tree (v5.18.14) -KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter -KBUILD_CFLAGS += -Wmissing-declarations -KBUILD_CFLAGS += -Wmissing-format-attribute -KBUILD_CFLAGS += -Wmissing-prototypes -KBUILD_CFLAGS += -Wold-style-definition +CFLAGS_MODULE += -Wextra -Wunused -Wno-unused-parameter +CFLAGS_MODULE += -Wmissing-declarations +CFLAGS_MODULE += -Wmissing-format-attribute +CFLAGS_MODULE += -Wmissing-prototypes +CFLAGS_MODULE += -Wold-style-definition # The -Wmissing-include-dirs cannot be enabled as the path to some of the # included directories change depending on whether it is an in-tree or # out-of-tree build. -KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) -KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) -KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) -KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) +CFLAGS_MODULE += $(call cc-option, -Wunused-but-set-variable) +CFLAGS_MODULE += $(call cc-option, -Wunused-const-variable) +CFLAGS_MODULE += $(call cc-option, -Wpacked-not-aligned) +CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra -KBUILD_CFLAGS += -Wno-sign-compare -KBUILD_CFLAGS += -Wno-shift-negative-value +CFLAGS_MODULE += -Wno-sign-compare +CFLAGS_MODULE += -Wno-shift-negative-value # This flag is needed to avoid build errors on older kernels -KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type) +CFLAGS_MODULE += $(call cc-option, -Wno-cast-function-type) KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 # The following were added to align with W=2 in scripts/Makefile.extrawarn # from the Linux source tree (v5.18.14) -KBUILD_CFLAGS += -Wdisabled-optimization +CFLAGS_MODULE += -Wdisabled-optimization # The -Wshadow flag cannot be enabled unless upstream kernels are # patched to fix redefinitions of certain built-in functions and # global variables. -KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) -KBUILD_CFLAGS += -Wmissing-field-initializers +CFLAGS_MODULE += $(call cc-option, -Wlogical-op) +CFLAGS_MODULE += -Wmissing-field-initializers # -Wtype-limits must be disabled due to build failures on kernel 5.x -KBUILD_CFLAGS += -Wno-type-limit -KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) -KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) +CFLAGS_MODULE += -Wno-type-limits +CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized) +CFLAGS_MODULE += $(call cc-option, -Wunused-macros) KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 # This warning is disabled to avoid build failures in some kernel versions -KBUILD_CFLAGS += -Wno-ignored-qualifiers +CFLAGS_MODULE += -Wno-ignored-qualifiers ifeq ($(CONFIG_GCOV_KERNEL),y) - KBUILD_CFLAGS += $(call cc-option, -ftest-coverage) - KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs) + CFLAGS_MODULE += $(call cc-option, -ftest-coverage) + CFLAGS_MODULE += $(call cc-option, -fprofile-arcs) EXTRA_CFLAGS += -DGCOV_PROFILE=1 endif ifeq ($(CONFIG_MALI_KCOV),y) - KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp) + CFLAGS_MODULE += $(call cc-option, -fsanitize-coverage=trace-cmp) EXTRA_CFLAGS += -DKCOV=1 EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1 endif all: - $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules modules_install: - $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) modules_install clean: - $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) clean diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c index b5d3cd685ba5..728f013f293d 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,13 +32,11 @@ /* Arbiter interface version against which was implemented this module */ #define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5 -#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \ - MALI_ARBITER_INTERFACE_VERSION +#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != MALI_ARBITER_INTERFACE_VERSION #error "Unsupported Mali Arbiter interface version." #endif -static void on_max_config(struct device *dev, uint32_t max_l2_slices, - uint32_t max_core_mask) +static void on_max_config(struct device *dev, uint32_t max_l2_slices, uint32_t max_core_mask) { struct kbase_device *kbdev; @@ -54,9 +52,7 @@ static void on_max_config(struct device *dev, uint32_t max_l2_slices, } if (!max_l2_slices || !max_core_mask) { - dev_dbg(dev, - "%s(): max_config ignored as one of the fields is zero", - __func__); + dev_dbg(dev, "%s(): max_config ignored as one of the fields is zero", __func__); return; } @@ -187,8 +183,7 @@ int kbase_arbif_init(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "%s\n", __func__); - arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, - "arbiter_if", 0); + arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter_if", 0); if (!arbiter_if_node) { dev_dbg(kbdev->dev, "No arbiter_if in Device Tree\n"); /* no arbiter interface defined in device tree */ @@ -230,10 +225,9 @@ int kbase_arbif_init(struct kbase_device *kbdev) /* register kbase arbiter_if callbacks */ if (arb_if->vm_ops.vm_arb_register_dev) { - err = arb_if->vm_ops.vm_arb_register_dev(arb_if, - kbdev->dev, &ops); + err = arb_if->vm_ops.vm_arb_register_dev(arb_if, kbdev->dev, &ops); if (err) { - dev_err(&pdev->dev, "Failed to register with arbiter\n"); + dev_err(&pdev->dev, "Failed to register with arbiter. (err = %d)\n", err); module_put(pdev->dev.driver->owner); put_device(&pdev->dev); if (err != -EPROBE_DEFER) diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c index 667552c561fb..4498b469300e 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,19 +36,19 @@ #define GPU_REQUEST_TIMEOUT 1000 #define KHZ_TO_HZ 1000 -#define MAX_L2_SLICES_MASK 0xFF +#define MAX_L2_SLICES_MASK 0xFF /* Maximum time in ms, before deferring probe incase * GPU_GRANTED message is not received */ static int gpu_req_timeout = 1; module_param(gpu_req_timeout, int, 0644); -MODULE_PARM_DESC(gpu_req_timeout, +MODULE_PARM_DESC( + gpu_req_timeout, "On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe"); static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); -static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( - struct kbase_device *kbdev); +static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device *kbdev); /** * kbase_arbiter_pm_vm_state_str() - Helper function to get string @@ -57,8 +57,7 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( * * Return: string representation of Kbase_vm_state */ -static inline const char *kbase_arbiter_pm_vm_state_str( - enum kbase_vm_state state) +static inline const char *kbase_arbiter_pm_vm_state_str(enum kbase_vm_state state) { switch (state) { case KBASE_VM_STATE_INITIALIZING: @@ -98,8 +97,7 @@ static inline const char *kbase_arbiter_pm_vm_state_str( * * Return: String representation of Kbase_arbif_event */ -static inline const char *kbase_arbiter_pm_vm_event_str( - enum kbase_arbif_evt evt) +static inline const char *kbase_arbiter_pm_vm_event_str(enum kbase_arbif_evt evt) { switch (evt) { case KBASE_VM_GPU_INITIALIZED_EVT: @@ -131,19 +129,18 @@ static inline const char *kbase_arbiter_pm_vm_event_str( * * This function sets the new state for the VM */ -static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, - enum kbase_vm_state new_state) +static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, enum kbase_vm_state new_state) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; dev_dbg(kbdev->dev, "VM set_state %s -> %s", - kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state), - kbase_arbiter_pm_vm_state_str(new_state)); + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state), + kbase_arbiter_pm_vm_state_str(new_state)); lockdep_assert_held(&arb_vm_state->vm_state_lock); arb_vm_state->vm_state = new_state; if (new_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU && - new_state != KBASE_VM_STATE_INITIALIZING) + new_state != KBASE_VM_STATE_INITIALIZING) KBASE_KTRACE_ADD(kbdev, ARB_VM_STATE, NULL, new_state); wake_up(&arb_vm_state->vm_state_wait); } @@ -157,21 +154,18 @@ static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, */ static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) { - struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, - struct kbase_arbiter_vm_state, - vm_suspend_work); + struct kbase_arbiter_vm_state *arb_vm_state = + container_of(data, struct kbase_arbiter_vm_state, vm_suspend_work); struct kbase_device *kbdev = arb_vm_state->kbdev; mutex_lock(&arb_vm_state->vm_state_lock); dev_dbg(kbdev->dev, ">%s\n", __func__); if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE || - arb_vm_state->vm_state == - KBASE_VM_STATE_STOPPING_ACTIVE || - arb_vm_state->vm_state == - KBASE_VM_STATE_SUSPEND_PENDING) { + arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_ACTIVE || + arb_vm_state->vm_state == KBASE_VM_STATE_SUSPEND_PENDING) { mutex_unlock(&arb_vm_state->vm_state_lock); dev_dbg(kbdev->dev, ">kbase_pm_driver_suspend\n"); - kbase_pm_driver_suspend(kbdev); + WARN_ON_ONCE(kbase_pm_driver_suspend(kbdev)); dev_dbg(kbdev->dev, "vm_state_lock); } @@ -188,9 +182,8 @@ static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) */ static void kbase_arbiter_pm_resume_wq(struct work_struct *data) { - struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, - struct kbase_arbiter_vm_state, - vm_resume_work); + struct kbase_arbiter_vm_state *arb_vm_state = + container_of(data, struct kbase_arbiter_vm_state, vm_resume_work); struct kbase_device *kbdev = arb_vm_state->kbdev; mutex_lock(&arb_vm_state->vm_state_lock); @@ -222,15 +215,15 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data) */ static enum hrtimer_restart request_timer_callback(struct hrtimer *timer) { - struct kbase_arbiter_vm_state *arb_vm_state = container_of(timer, - struct kbase_arbiter_vm_state, vm_request_timer); + struct kbase_arbiter_vm_state *arb_vm_state = + container_of(timer, struct kbase_arbiter_vm_state, vm_request_timer); KBASE_DEBUG_ASSERT(arb_vm_state); KBASE_DEBUG_ASSERT(arb_vm_state->kbdev); dev_warn(arb_vm_state->kbdev->dev, - "Still waiting for GPU to be granted from Arbiter after %d ms\n", - GPU_REQUEST_TIMEOUT); + "Still waiting for GPU to be granted from Arbiter after %d ms\n", + GPU_REQUEST_TIMEOUT); return HRTIMER_NORESTART; } @@ -246,9 +239,8 @@ static void start_request_timer(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; - hrtimer_start(&arb_vm_state->vm_request_timer, - HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT), - HRTIMER_MODE_REL); + hrtimer_start(&arb_vm_state->vm_request_timer, HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT), + HRTIMER_MODE_REL); } /** @@ -280,8 +272,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) int err; struct kbase_arbiter_vm_state *arb_vm_state = NULL; - arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), - GFP_KERNEL); + arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), GFP_KERNEL); if (arb_vm_state == NULL) return -ENOMEM; @@ -290,8 +281,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) mutex_init(&arb_vm_state->vm_state_lock); init_waitqueue_head(&arb_vm_state->vm_state_wait); - arb_vm_state->vm_arb_wq = alloc_ordered_workqueue("kbase_vm_arb_wq", - WQ_HIGHPRI); + arb_vm_state->vm_arb_wq = alloc_ordered_workqueue("kbase_vm_arb_wq", WQ_HIGHPRI); if (!arb_vm_state->vm_arb_wq) { dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n"); kfree(arb_vm_state); @@ -301,15 +291,13 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq); arb_vm_state->vm_arb_starting = false; atomic_set(&kbdev->pm.gpu_users_waiting, 0); - hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - arb_vm_state->vm_request_timer.function = - request_timer_callback; + hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + arb_vm_state->vm_request_timer.function = request_timer_callback; kbdev->pm.arb_vm_state = arb_vm_state; err = kbase_arbif_init(kbdev); if (err) { - dev_err(kbdev->dev, "Failed to initialise arbif module\n"); + dev_err(kbdev->dev, "Failed to initialise arbif module. (err = %d)\n", err); goto arbif_init_fail; } @@ -318,21 +306,20 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); err = wait_event_timeout(arb_vm_state->vm_state_wait, - arb_vm_state->vm_state == - KBASE_VM_STATE_INITIALIZING_WITH_GPU, - msecs_to_jiffies(gpu_req_timeout)); + arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU, + msecs_to_jiffies(gpu_req_timeout)); if (!err) { dev_dbg(kbdev->dev, - "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n", - gpu_req_timeout); + "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n", + gpu_req_timeout); err = -ENODEV; goto arbif_timeout; } - dev_dbg(kbdev->dev, - "Waiting for initial GPU assignment - done\n"); + dev_dbg(kbdev->dev, "Waiting for initial GPU assignment - done\n"); } return 0; @@ -423,9 +410,8 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) lockdep_assert_held(&arb_vm_state->vm_state_lock); if (atomic_read(&kbdev->pm.gpu_users_waiting) > 0 && - arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPING_ACTIVE); + arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE); dev_dbg(kbdev->dev, "%s %s\n", __func__, kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); @@ -438,8 +424,7 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_STOPPING_ACTIVE: request_gpu = true; - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED); break; case KBASE_VM_STATE_STOPPING_IDLE: kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED); @@ -448,8 +433,7 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); break; default: - dev_warn(kbdev->dev, "unexpected pm_stop VM state %u", - arb_vm_state->vm_state); + dev_warn(kbdev->dev, "unexpected pm_stop VM state %u", arb_vm_state->vm_state); break; } @@ -459,8 +443,7 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) start_request_timer(kbdev); } -void kbase_arbiter_set_max_config(struct kbase_device *kbdev, - uint32_t max_l2_slices, +void kbase_arbiter_set_max_config(struct kbase_device *kbdev, uint32_t max_l2_slices, uint32_t max_core_mask) { struct kbase_arbiter_vm_state *arb_vm_state; @@ -544,8 +527,7 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) cancel_request_timer(kbdev); switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_INITIALIZING: - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_INITIALIZING_WITH_GPU); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_INITIALIZING_WITH_GPU); break; case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); @@ -557,8 +539,7 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) */ kbase_gpuprops_req_curr_config_update(kbdev); /* curr_config will be updated while resuming the PM. */ - queue_work(arb_vm_state->vm_arb_wq, - &arb_vm_state->vm_resume_work); + queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_resume_work); break; case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: kbase_pm_set_gpu_lost(kbdev, false); @@ -572,10 +553,8 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) * without a frequency update */ if (!freq_updated) - dev_warn(kbdev->dev, - "GPU_GRANTED when not expected - state %s\n", - kbase_arbiter_pm_vm_state_str( - arb_vm_state->vm_state)); + dev_warn(kbdev->dev, "GPU_GRANTED when not expected - state %s\n", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); break; } } @@ -599,31 +578,25 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_IDLE: - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPING_IDLE); - queue_work(arb_vm_state->vm_arb_wq, - &arb_vm_state->vm_suspend_work); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_IDLE); + queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_suspend_work); break; case KBASE_VM_STATE_ACTIVE: - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPING_ACTIVE); - queue_work(arb_vm_state->vm_arb_wq, - &arb_vm_state->vm_suspend_work); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE); + queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_suspend_work); break; case KBASE_VM_STATE_STARTING: dev_dbg(kbdev->dev, "Got GPU_STOP event while STARTING."); - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPING_ACTIVE); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE); if (arb_vm_state->vm_arb_starting) - queue_work(arb_vm_state->vm_arb_wq, - &arb_vm_state->vm_suspend_work); + queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_suspend_work); break; case KBASE_VM_STATE_SUSPEND_PENDING: /* Suspend finishes with a stop so nothing else to do */ break; default: dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n", - kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); break; } } @@ -646,7 +619,7 @@ static void kbase_gpu_lost(struct kbase_device *kbdev) case KBASE_VM_STATE_ACTIVE: case KBASE_VM_STATE_IDLE: dev_warn(kbdev->dev, "GPU lost in state %s", - kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); kbase_arbiter_pm_vm_gpu_stop(kbdev); handle_gpu_lost = true; break; @@ -689,8 +662,7 @@ static void kbase_gpu_lost(struct kbase_device *kbdev) * * Return: True if its ready to be suspended else False. */ -static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state( - struct kbase_device *kbdev) +static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state(struct kbase_device *kbdev) { switch (kbdev->pm.arb_vm_state->vm_state) { case KBASE_VM_STATE_SUSPENDED: @@ -718,8 +690,7 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) lockdep_assert_held(&arb_vm_state->vm_state_lock); if (kbdev->arb.arb_if) { - if (kbdev->pm.arb_vm_state->vm_state == - KBASE_VM_STATE_SUSPENDED) + if (kbdev->pm.arb_vm_state->vm_state == KBASE_VM_STATE_SUSPENDED) return; } /* Block suspend OS function until we are in a stable state @@ -730,17 +701,15 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_STOPPING_ACTIVE: case KBASE_VM_STATE_STOPPING_IDLE: - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_SUSPEND_PENDING); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPEND_PENDING); break; case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT); break; case KBASE_VM_STATE_STARTING: if (!arb_vm_state->vm_arb_starting) { kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_SUSPEND_PENDING); + KBASE_VM_STATE_SUSPEND_PENDING); kbase_arbiter_pm_vm_stopped(kbdev); } break; @@ -748,24 +717,21 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) break; } mutex_unlock(&arb_vm_state->vm_state_lock); - wait_event(arb_vm_state->vm_state_wait, - arb_vm_state->vm_state != prev_state); + wait_event(arb_vm_state->vm_state_wait, arb_vm_state->vm_state != prev_state); mutex_lock(&arb_vm_state->vm_state_lock); } switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_STOPPED: - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_SUSPENDED); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); break; case KBASE_VM_STATE_IDLE: case KBASE_VM_STATE_ACTIVE: - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_SUSPEND_PENDING); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPEND_PENDING); mutex_unlock(&arb_vm_state->vm_state_lock); /* Ensure resume has completed fully before starting suspend */ flush_work(&arb_vm_state->vm_resume_work); - kbase_pm_driver_suspend(kbdev); + WARN_ON_ONCE(kbase_pm_driver_suspend(kbdev)); mutex_lock(&arb_vm_state->vm_state_lock); break; case KBASE_VM_STATE_SUSPENDED: @@ -789,12 +755,10 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; lockdep_assert_held(&arb_vm_state->vm_state_lock); - KBASE_DEBUG_ASSERT_MSG(arb_vm_state->vm_state == - KBASE_VM_STATE_SUSPENDED, - "Unexpected state to resume"); + KBASE_DEBUG_ASSERT_MSG(arb_vm_state->vm_state == KBASE_VM_STATE_SUSPENDED, + "Unexpected state to resume"); - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED); kbase_arbif_gpu_request(kbdev); start_request_timer(kbdev); @@ -816,8 +780,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) * The state machine function. Receives events and transitions states * according the event received and the current state */ -void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, - enum kbase_arbif_evt evt) +void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, enum kbase_arbif_evt evt) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -825,10 +788,9 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, return; mutex_lock(&arb_vm_state->vm_state_lock); - dev_dbg(kbdev->dev, "%s %s\n", __func__, - kbase_arbiter_pm_vm_event_str(evt)); + dev_dbg(kbdev->dev, "%s %s\n", __func__, kbase_arbiter_pm_vm_event_str(evt)); if (arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU && - arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING) + arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING) KBASE_KTRACE_ADD(kbdev, ARB_VM_EVT, NULL, evt); switch (evt) { case KBASE_VM_GPU_GRANTED_EVT: @@ -850,8 +812,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, case KBASE_VM_GPU_IDLE_EVENT: switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_ACTIVE: - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_IDLE); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_IDLE); kbase_arbif_gpu_idle(kbdev); break; default: @@ -863,13 +824,11 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_STARTING: case KBASE_VM_STATE_IDLE: - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_ACTIVE); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_ACTIVE); kbase_arbif_gpu_active(kbdev); break; case KBASE_VM_STATE_STOPPING_IDLE: - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPING_ACTIVE); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE); break; default: break; @@ -881,12 +840,10 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, case KBASE_VM_STATE_INITIALIZING_WITH_GPU: lockdep_assert_held(&kbdev->pm.lock); if (kbdev->pm.active_count > 0) { - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_ACTIVE); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_ACTIVE); kbase_arbif_gpu_active(kbdev); } else { - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_IDLE); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_IDLE); kbase_arbif_gpu_idle(kbdev); } break; @@ -916,8 +873,8 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Waiting for GPU assignment...\n"); wait_event(arb_vm_state->vm_state_wait, - arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || - arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); + arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || + arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n"); } @@ -929,8 +886,7 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) * * Return: true if GPU is assigned, else false. */ -static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( - struct kbase_device *kbdev) +static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -953,7 +909,7 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( * Return: 0 on success else 1 suspend handler isn not possible. */ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, - enum kbase_pm_suspend_handler suspend_handler) + enum kbase_pm_suspend_handler suspend_handler) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; int res = 0; @@ -962,23 +918,18 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, mutex_lock(&arb_vm_state->vm_state_lock); while (!kbase_arbiter_pm_vm_gpu_assigned_lockheld(kbdev)) { /* Update VM state since we have GPU work to do */ - if (arb_vm_state->vm_state == - KBASE_VM_STATE_STOPPING_IDLE) + if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPING_ACTIVE); - else if (arb_vm_state->vm_state == - KBASE_VM_STATE_STOPPED) { + KBASE_VM_STATE_STOPPING_ACTIVE); + else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPED) { kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); kbase_arbif_gpu_request(kbdev); start_request_timer(kbdev); - } else if (arb_vm_state->vm_state == - KBASE_VM_STATE_INITIALIZING_WITH_GPU) + } else if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) break; - if (suspend_handler != - KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { - + if (suspend_handler != KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { /* In case of GPU lost, even if * active_count > 0, we no longer have GPU * access @@ -1024,8 +975,7 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, * @arb_freq: Pointer to struchture holding GPU clock frequenecy data * @freq: New frequency value in KHz */ -void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, - uint32_t freq) +void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, uint32_t freq) { struct kbase_gpu_clk_notifier_data ndata; @@ -1037,8 +987,7 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, arb_freq->arb_freq = freq; arb_freq->freq_updated = true; if (arb_freq->nb) - arb_freq->nb->notifier_call(arb_freq->nb, - POST_RATE_CHANGE, &ndata); + arb_freq->nb->notifier_call(arb_freq->nb, POST_RATE_CHANGE, &ndata); } mutex_unlock(&arb_freq->arb_freq_lock); @@ -1052,8 +1001,7 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, * Return: Pointer to structure holding GPU clock frequency data reported from * arbiter, only index 0 is valid. */ -static void *get_arb_gpu_clk(struct kbase_device *kbdev, - unsigned int index) +static void *get_arb_gpu_clk(struct kbase_device *kbdev, unsigned int index) { if (index == 0) return &kbdev->arb.arb_freq; @@ -1067,12 +1015,10 @@ static void *get_arb_gpu_clk(struct kbase_device *kbdev, * * Return: The GPU clock frequency value saved when gpu is granted from arbiter */ -static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev, - void *gpu_clk_handle) +static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev, void *gpu_clk_handle) { uint32_t freq; - struct kbase_arbiter_freq *arb_dev_freq = - (struct kbase_arbiter_freq *) gpu_clk_handle; + struct kbase_arbiter_freq *arb_dev_freq = (struct kbase_arbiter_freq *)gpu_clk_handle; mutex_lock(&arb_dev_freq->arb_freq_lock); /* Convert from KHz to Hz */ @@ -1092,12 +1038,11 @@ static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev, * * Return: 0 on success, negative error code otherwise. */ -static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev, - void *gpu_clk_handle, struct notifier_block *nb) +static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev, void *gpu_clk_handle, + struct notifier_block *nb) { int ret = 0; - struct kbase_arbiter_freq *arb_dev_freq = - (struct kbase_arbiter_freq *)gpu_clk_handle; + struct kbase_arbiter_freq *arb_dev_freq = (struct kbase_arbiter_freq *)gpu_clk_handle; if (!arb_dev_freq->nb) arb_dev_freq->nb = nb; @@ -1117,16 +1062,14 @@ static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev, * was previously registered to get notified of a frequency change of the * clock corresponding to @gpu_clk_handle. */ -static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev, - void *gpu_clk_handle, struct notifier_block *nb) +static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev, void *gpu_clk_handle, + struct notifier_block *nb) { - struct kbase_arbiter_freq *arb_dev_freq = - (struct kbase_arbiter_freq *)gpu_clk_handle; + struct kbase_arbiter_freq *arb_dev_freq = (struct kbase_arbiter_freq *)gpu_clk_handle; if (arb_dev_freq->nb == nb) { arb_dev_freq->nb = NULL; } else { - dev_err(kbdev->dev, "%s - notifier did not match\n", - __func__); + dev_err(kbdev->dev, "%s - notifier did not match\n", __func__); } } diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h index f863f88601e6..3734d32b6e2b 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -114,8 +114,7 @@ int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev); * The state machine function. Receives events and transitions states * according the event received and the current state */ -void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, - enum kbase_arbif_evt event); +void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, enum kbase_arbif_evt event); /** * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for @@ -131,8 +130,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, * Return: 0 if success, 1 if failure due to system suspending/suspended */ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, - enum kbase_pm_suspend_handler suspend_handler); - + enum kbase_pm_suspend_handler suspend_handler); /** * kbase_arbiter_pm_vm_stopped() - Handle stop event for the VM @@ -152,8 +150,7 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev); * This function handles a stop event for the VM. * It will update the VM state and forward the stop event to the driver. */ -void kbase_arbiter_set_max_config(struct kbase_device *kbdev, - uint32_t max_l2_slices, +void kbase_arbiter_set_max_config(struct kbase_device *kbdev, uint32_t max_l2_slices, uint32_t max_core_mask); /** @@ -190,7 +187,6 @@ struct kbase_arbiter_freq { * * Updates the GPU frequency and triggers any notifications */ -void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, - uint32_t freq); +void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, uint32_t freq); #endif /*_MALI_KBASE_ARBITER_PM_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild index efebc8a544d1..c3db14217c6d 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild +++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild @@ -22,7 +22,6 @@ bifrost_kbase-y += \ backend/gpu/mali_kbase_cache_policy_backend.o \ backend/gpu/mali_kbase_gpuprops_backend.o \ backend/gpu/mali_kbase_irq_linux.o \ - backend/gpu/mali_kbase_js_backend.o \ backend/gpu/mali_kbase_pm_backend.o \ backend/gpu/mali_kbase_pm_driver.o \ backend/gpu/mali_kbase_pm_metrics.o \ @@ -40,7 +39,8 @@ ifeq ($(MALI_USE_CSF),0) backend/gpu/mali_kbase_jm_as.o \ backend/gpu/mali_kbase_debug_job_fault_backend.o \ backend/gpu/mali_kbase_jm_hw.o \ - backend/gpu/mali_kbase_jm_rb.o + backend/gpu/mali_kbase_jm_rb.o \ + backend/gpu/mali_kbase_js_backend.o endif diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h index 6924fdb8a608..f0368c2e59b9 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,4 +27,3 @@ #define _KBASE_BACKEND_CONFIG_H_ #endif /* _KBASE_BACKEND_CONFIG_H_ */ - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c index 7c0abbaf860f..aa8436420234 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,71 +22,43 @@ #include "backend/gpu/mali_kbase_cache_policy_backend.h" #include -/** - * kbasep_amba_register_present() - Check AMBA_<> register is present - * in the GPU. - * @kbdev: Device pointer - * - * Note: Only for arch version 12.x.1 onwards. - * - * Return: true if AMBA_FEATURES/ENABLE registers are present. - */ -static bool kbasep_amba_register_present(struct kbase_device *kbdev) +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode) { - return (ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id) >= - GPU_ID2_ARCH_MAJOR_REV_MAKE(12, 1)); -} -void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, - u32 mode) -{ kbdev->current_gpu_coherency_mode = mode; - if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); +#if MALI_USE_CSF + if (kbdev->gpu_props.gpu_id.arch_id >= GPU_ID_ARCH_MAKE(12, 0, 1)) { + /* AMBA_ENABLE present from 12.0.1 */ + u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE)); val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode); - kbase_reg_write(kbdev, AMBA_ENABLE, val); - } else - kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); -} - -u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev) -{ - u32 coherency_features; - - if (kbasep_amba_register_present(kbdev)) - coherency_features = - kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_FEATURES)); - else - coherency_features = kbase_reg_read( - kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES)); - - return coherency_features; -} - -void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, - bool enable) -{ - if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); - - val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable); - kbase_reg_write(kbdev, AMBA_ENABLE, val); - + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE), val); } else { - WARN(1, "memory_cache_support not supported"); + /* Fallback to COHERENCY_ENABLE for older versions */ + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(COHERENCY_ENABLE), mode); } +#else /* MALI_USE_CSF */ + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(COHERENCY_ENABLE), mode); +#endif /* MALI_USE_CSF */ } -void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable) +void kbase_amba_set_shareable_cache_support(struct kbase_device *kbdev) { - if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); +#if MALI_USE_CSF - val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable); - kbase_reg_write(kbdev, AMBA_ENABLE, val); - } else { - WARN(1, "invalidate_hint not supported"); + /* AMBA registers only present from 12.0.1 */ + if (kbdev->gpu_props.gpu_id.arch_id < GPU_ID_ARCH_MAKE(12, 0, 1)) + return; + + if (kbdev->system_coherency != COHERENCY_NONE) { + u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AMBA_FEATURES)); + + if (AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_GET(val)) { + val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE)); + val = AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_SET(val, 1); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE), val); + } } +#endif /* MALI_USE_CSF */ } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h index 758e3be08c16..d95aa37d8950 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,35 +31,14 @@ * @kbdev: Device pointer * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE */ -void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, - u32 mode); +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode); /** - * kbase_cache_get_coherency_features() - Get the coherency features - * in the GPU. + * kbase_amba_set_shareable_cache_support() - Sets AMBA shareable cache support + * in the GPU. * @kbdev: Device pointer * - * Return: Register value to be returned - */ -u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev); - -/** - * kbase_amba_set_memory_cache_support() - Sets AMBA memory cache support - * in the GPU. - * @kbdev: Device pointer - * @enable: true for enable. - * * Note: Only for arch version 12.x.1 onwards. */ -void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, - bool enable); -/** - * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint - * in the GPU. - * @kbdev: Device pointer - * @enable: true for enable. - * - * Note: Only for arch version 12.x.1 onwards. - */ -void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable); +void kbase_amba_set_shareable_cache_support(struct kbase_device *kbdev); #endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index ddd03ca23de6..e47dd440bff2 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,8 +58,10 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) return callbacks; - arbiter_if_node = - of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter-if", NULL); + if (!arbiter_if_node) + arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + /* Arbitration enabled, override the callback pointer.*/ if (arbiter_if_node) callbacks = &arb_clk_rate_trace_ops; @@ -72,8 +74,7 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) return callbacks; } -static int gpu_clk_rate_change_notifier(struct notifier_block *nb, - unsigned long event, void *data) +static int gpu_clk_rate_change_notifier(struct notifier_block *nb, unsigned long event, void *data) { struct kbase_gpu_clk_notifier_data *ndata = data; struct kbase_clk_data *clk_data = @@ -86,10 +87,9 @@ static int gpu_clk_rate_change_notifier(struct notifier_block *nb, spin_lock_irqsave(&clk_rtm->lock, flags); if (event == POST_RATE_CHANGE) { - if (!clk_rtm->gpu_idle && - (clk_data->clock_val != ndata->new_rate)) { - kbase_clk_rate_trace_manager_notify_all( - clk_rtm, clk_data->index, ndata->new_rate); + if (!clk_rtm->gpu_idle && (clk_data->clock_val != ndata->new_rate)) { + kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index, + ndata->new_rate); } clk_data->clock_val = ndata->new_rate; @@ -99,8 +99,7 @@ static int gpu_clk_rate_change_notifier(struct notifier_block *nb, return NOTIFY_DONE; } -static int gpu_clk_data_init(struct kbase_device *kbdev, - void *gpu_clk_handle, unsigned int index) +static int gpu_clk_data_init(struct kbase_device *kbdev, void *gpu_clk_handle, unsigned int index) { struct kbase_clk_rate_trace_op_conf *callbacks; struct kbase_clk_data *clk_data; @@ -109,44 +108,42 @@ static int gpu_clk_data_init(struct kbase_device *kbdev, callbacks = get_clk_rate_trace_callbacks(kbdev); - if (WARN_ON(!callbacks) || - WARN_ON(!gpu_clk_handle) || + if (WARN_ON(!callbacks) || WARN_ON(!gpu_clk_handle) || WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS)) return -EINVAL; clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL); if (!clk_data) { - dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index); + dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", + index); return -ENOMEM; } clk_data->index = (u8)index; clk_data->gpu_clk_handle = gpu_clk_handle; /* Store the initial value of clock */ - clk_data->clock_val = - callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle); + clk_data->clock_val = callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle); { /* At the initialization time, GPU is powered off. */ unsigned long flags; spin_lock_irqsave(&clk_rtm->lock, flags); - kbase_clk_rate_trace_manager_notify_all( - clk_rtm, clk_data->index, 0); + kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index, 0); spin_unlock_irqrestore(&clk_rtm->lock, flags); } clk_data->clk_rtm = clk_rtm; clk_rtm->clks[index] = clk_data; - clk_data->clk_rate_change_nb.notifier_call = - gpu_clk_rate_change_notifier; + clk_data->clk_rate_change_nb.notifier_call = gpu_clk_rate_change_notifier; if (callbacks->gpu_clk_notifier_register) - ret = callbacks->gpu_clk_notifier_register(kbdev, - gpu_clk_handle, &clk_data->clk_rate_change_nb); + ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle, + &clk_data->clk_rate_change_nb); if (ret) { - dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index); + dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", + index); kfree(clk_data); } @@ -174,8 +171,7 @@ int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) clk_rtm->gpu_idle = true; for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { - void *gpu_clk_handle = - callbacks->enumerate_gpu_clk(kbdev, i); + void *gpu_clk_handle = callbacks->enumerate_gpu_clk(kbdev, i); if (!gpu_clk_handle) break; @@ -200,8 +196,8 @@ int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) error: while (i--) { clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( - kbdev, clk_rtm->clks[i]->gpu_clk_handle, - &clk_rtm->clks[i]->clk_rate_change_nb); + kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); kfree(clk_rtm->clks[i]); } @@ -223,9 +219,9 @@ void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev) break; if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister) - clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister - (kbdev, clk_rtm->clks[i]->gpu_clk_handle, - &clk_rtm->clks[i]->clk_rate_change_nb); + clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( + kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); kfree(clk_rtm->clks[i]); } @@ -252,8 +248,8 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) if (unlikely(!clk_data->clock_val)) continue; - kbase_clk_rate_trace_manager_notify_all( - clk_rtm, clk_data->index, clk_data->clock_val); + kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index, + clk_data->clock_val); } clk_rtm->gpu_idle = false; @@ -280,18 +276,15 @@ void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) if (unlikely(!clk_data->clock_val)) continue; - kbase_clk_rate_trace_manager_notify_all( - clk_rtm, clk_data->index, 0); + kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index, 0); } clk_rtm->gpu_idle = true; spin_unlock_irqrestore(&clk_rtm->lock, flags); } -void kbase_clk_rate_trace_manager_notify_all( - struct kbase_clk_rate_trace_manager *clk_rtm, - u32 clk_index, - unsigned long new_rate) +void kbase_clk_rate_trace_manager_notify_all(struct kbase_clk_rate_trace_manager *clk_rtm, + u32 clk_index, unsigned long new_rate) { struct kbase_clk_rate_listener *pos; struct kbase_device *kbdev; @@ -300,8 +293,8 @@ void kbase_clk_rate_trace_manager_notify_all( kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm); - dev_dbg(kbdev->dev, "%s - GPU clock %u rate changed to %lu, pid: %d", - __func__, clk_index, new_rate, current->pid); + dev_dbg(kbdev->dev, "%s - GPU clock %u rate changed to %lu, pid: %d", __func__, clk_index, + new_rate, current->pid); /* Raise standard `power/gpu_frequency` ftrace event */ { diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h index 35b3b8d06d96..81a1e15f920b 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -90,9 +90,9 @@ void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev); * * kbase_clk_rate_trace_manager:lock must be held by the caller. */ -static inline void kbase_clk_rate_trace_manager_subscribe_no_lock( - struct kbase_clk_rate_trace_manager *clk_rtm, - struct kbase_clk_rate_listener *listener) +static inline void +kbase_clk_rate_trace_manager_subscribe_no_lock(struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) { lockdep_assert_held(&clk_rtm->lock); list_add(&listener->node, &clk_rtm->listeners); @@ -104,15 +104,14 @@ static inline void kbase_clk_rate_trace_manager_subscribe_no_lock( * @clk_rtm: Clock rate manager instance. * @listener: Listener handle */ -static inline void kbase_clk_rate_trace_manager_subscribe( - struct kbase_clk_rate_trace_manager *clk_rtm, - struct kbase_clk_rate_listener *listener) +static inline void +kbase_clk_rate_trace_manager_subscribe(struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) { unsigned long flags; spin_lock_irqsave(&clk_rtm->lock, flags); - kbase_clk_rate_trace_manager_subscribe_no_lock( - clk_rtm, listener); + kbase_clk_rate_trace_manager_subscribe_no_lock(clk_rtm, listener); spin_unlock_irqrestore(&clk_rtm->lock, flags); } @@ -122,9 +121,9 @@ static inline void kbase_clk_rate_trace_manager_subscribe( * @clk_rtm: Clock rate manager instance. * @listener: Listener handle */ -static inline void kbase_clk_rate_trace_manager_unsubscribe( - struct kbase_clk_rate_trace_manager *clk_rtm, - struct kbase_clk_rate_listener *listener) +static inline void +kbase_clk_rate_trace_manager_unsubscribe(struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) { unsigned long flags; @@ -145,10 +144,7 @@ static inline void kbase_clk_rate_trace_manager_unsubscribe( * This function is exported to be used by clock rate trace test * portal. */ -void kbase_clk_rate_trace_manager_notify_all( - struct kbase_clk_rate_trace_manager *clk_rtm, - u32 clock_index, - unsigned long new_rate); +void kbase_clk_rate_trace_manager_notify_all(struct kbase_clk_rate_trace_manager *clk_rtm, + u32 clock_index, unsigned long new_rate); #endif /* _KBASE_CLK_RATE_TRACE_MGR_ */ - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c index e121b417f80d..bd3622e7d44b 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,71 +21,47 @@ #include #include +#include #include "mali_kbase_debug_job_fault.h" #if IS_ENABLED(CONFIG_DEBUG_FS) /*GPU_CONTROL_REG(r)*/ -static int gpu_control_reg_snapshot[] = { - GPU_ID, - SHADER_READY_LO, - SHADER_READY_HI, - TILER_READY_LO, - TILER_READY_HI, - L2_READY_LO, - L2_READY_HI -}; +static int gpu_control_reg_snapshot[] = { GPU_CONTROL_ENUM(GPU_ID), GPU_CONTROL_ENUM(SHADER_READY), + GPU_CONTROL_ENUM(TILER_READY), + GPU_CONTROL_ENUM(L2_READY) }; /* JOB_CONTROL_REG(r) */ -static int job_control_reg_snapshot[] = { - JOB_IRQ_MASK, - JOB_IRQ_STATUS -}; +static int job_control_reg_snapshot[] = { JOB_CONTROL_ENUM(JOB_IRQ_MASK), + JOB_CONTROL_ENUM(JOB_IRQ_STATUS) }; /* JOB_SLOT_REG(n,r) */ -static int job_slot_reg_snapshot[] = { - JS_HEAD_LO, - JS_HEAD_HI, - JS_TAIL_LO, - JS_TAIL_HI, - JS_AFFINITY_LO, - JS_AFFINITY_HI, - JS_CONFIG, - JS_STATUS, - JS_HEAD_NEXT_LO, - JS_HEAD_NEXT_HI, - JS_AFFINITY_NEXT_LO, - JS_AFFINITY_NEXT_HI, - JS_CONFIG_NEXT -}; +static int job_slot_reg_snapshot[] = { JOB_SLOT_ENUM(0, HEAD) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, TAIL) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, AFFINITY) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, CONFIG) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, STATUS) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, HEAD_NEXT) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, AFFINITY_NEXT) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, CONFIG_NEXT) - JOB_SLOT_BASE_ENUM(0) }; -/*MMU_REG(r)*/ -static int mmu_reg_snapshot[] = { - MMU_IRQ_MASK, - MMU_IRQ_STATUS -}; +/*MMU_CONTROL_REG(r)*/ +static int mmu_reg_snapshot[] = { MMU_CONTROL_ENUM(IRQ_MASK), MMU_CONTROL_ENUM(IRQ_STATUS) }; /* MMU_AS_REG(n,r) */ -static int as_reg_snapshot[] = { - AS_TRANSTAB_LO, - AS_TRANSTAB_HI, - AS_TRANSCFG_LO, - AS_TRANSCFG_HI, - AS_MEMATTR_LO, - AS_MEMATTR_HI, - AS_FAULTSTATUS, - AS_FAULTADDRESS_LO, - AS_FAULTADDRESS_HI, - AS_STATUS -}; +static int as_reg_snapshot[] = { MMU_AS_ENUM(0, TRANSTAB) - MMU_AS_BASE_ENUM(0), + MMU_AS_ENUM(0, TRANSCFG) - MMU_AS_BASE_ENUM(0), + MMU_AS_ENUM(0, MEMATTR) - MMU_AS_BASE_ENUM(0), + MMU_AS_ENUM(0, FAULTSTATUS) - MMU_AS_BASE_ENUM(0), + MMU_AS_ENUM(0, FAULTADDRESS) - MMU_AS_BASE_ENUM(0), + MMU_AS_ENUM(0, STATUS) - MMU_AS_BASE_ENUM(0) }; -bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, - int reg_range) +bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, int reg_range) { - int i, j; + uint i, j; int offset = 0; - int slot_number; - int as_number; + uint slot_number; + uint as_number; if (kctx->reg_dump == NULL) return false; @@ -94,51 +70,61 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, as_number = kctx->kbdev->gpu_props.num_address_spaces; /* get the GPU control registers*/ - for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); - offset += 2; + for (i = 0; i < ARRAY_SIZE(gpu_control_reg_snapshot); i++) { + kctx->reg_dump[offset] = gpu_control_reg_snapshot[i]; + if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset])) + offset += 4; + else + offset += 2; } /* get the Job control registers*/ - for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - JOB_CONTROL_REG(job_control_reg_snapshot[i]); - offset += 2; + for (i = 0; i < ARRAY_SIZE(job_control_reg_snapshot); i++) { + kctx->reg_dump[offset] = job_control_reg_snapshot[i]; + if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset])) + offset += 4; + else + offset += 2; } /* get the Job Slot registers*/ - for (j = 0; j < slot_number; j++) { - for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); - offset += 2; + for (j = 0; j < slot_number; j++) { + for (i = 0; i < ARRAY_SIZE(job_slot_reg_snapshot); i++) { + kctx->reg_dump[offset] = JOB_SLOT_BASE_OFFSET(j) + job_slot_reg_snapshot[i]; + if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset])) + offset += 4; + else + offset += 2; } } /* get the MMU registers*/ - for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); - offset += 2; + for (i = 0; i < ARRAY_SIZE(mmu_reg_snapshot); i++) { + kctx->reg_dump[offset] = mmu_reg_snapshot[i]; + if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset])) + offset += 4; + else + offset += 2; } /* get the Address space registers*/ for (j = 0; j < as_number; j++) { - for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - MMU_AS_REG(j, as_reg_snapshot[i]); - offset += 2; + for (i = 0; i < ARRAY_SIZE(as_reg_snapshot); i++) { + kctx->reg_dump[offset] = MMU_AS_BASE_OFFSET(j) + as_reg_snapshot[i]; + if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset])) + offset += 4; + else + offset += 2; } } - WARN_ON(offset >= (reg_range*2/4)); + WARN_ON(offset >= (reg_range * 2 / 4)); /* set the termination flag*/ kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; - dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", - offset); + dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", offset); return true; } @@ -146,18 +132,32 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) { int offset = 0; + int reg_enum; + u64 val64; if (kctx->reg_dump == NULL) return false; while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { - kctx->reg_dump[offset+1] = - kbase_reg_read(kctx->kbdev, - kctx->reg_dump[offset]); - offset += 2; + reg_enum = kctx->reg_dump[offset]; + /* Get register offset from enum */ + kbase_reg_get_offset(kctx->kbdev, reg_enum, &kctx->reg_dump[offset]); + + if (kbase_reg_is_size64(kctx->kbdev, reg_enum)) { + val64 = kbase_reg_read64(kctx->kbdev, reg_enum); + + /* offset computed offset to get _HI offset */ + kctx->reg_dump[offset + 2] = kctx->reg_dump[offset] + 4; + + kctx->reg_dump[offset + 1] = (u32)(val64 & 0xFFFFFFFF); + kctx->reg_dump[offset + 3] = (u32)(val64 >> 32); + offset += 4; + } else { + kctx->reg_dump[offset + 1] = kbase_reg_read32(kctx->kbdev, reg_enum); + offset += 2; + } } return true; } - #endif diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c index 02fb00da365c..6b29228765cd 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -91,8 +91,8 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) return voltage; } -void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, - u64 *core_mask, unsigned long *freqs, unsigned long *volts) +void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, u64 *core_mask, + unsigned long *freqs, unsigned long *volts) { unsigned int i; @@ -102,10 +102,8 @@ void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, *core_mask = kbdev->devfreq_table[i].core_mask; for (j = 0; j < kbdev->nr_clocks; j++) { - freqs[j] = - kbdev->devfreq_table[i].real_freqs[j]; - volts[j] = - kbdev->devfreq_table[i].opp_volts[j]; + freqs[j] = kbdev->devfreq_table[i].real_freqs[j]; + volts[j] = kbdev->devfreq_table[i].opp_volts[j]; } break; @@ -118,7 +116,7 @@ void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, if (i == kbdev->num_opps) { unsigned long voltage = get_voltage(kbdev, freq); - *core_mask = kbdev->gpu_props.props.raw_props.shader_present; + *core_mask = kbdev->gpu_props.shader_present; for (i = 0; i < kbdev->nr_clocks; i++) { freqs[i] = freq; @@ -245,8 +243,7 @@ restore_voltage: return ret; } -static int -kbase_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) +static int kbase_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { struct kbase_device *kbdev = dev_get_drvdata(dev); struct dev_pm_opp *opp; @@ -280,8 +277,7 @@ void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq) kbase_devfreq_target(kbdev->dev, &target_freq, 0); } -static int -kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) +static int kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) { struct kbase_device *kbdev = dev_get_drvdata(dev); @@ -290,8 +286,7 @@ kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) return 0; } -static int -kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) +static int kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) { struct kbase_device *kbdev = dev_get_drvdata(dev); struct kbasep_pm_metrics diff; @@ -311,8 +306,7 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) return 0; } -static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, - struct devfreq_dev_profile *dp) +static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, struct devfreq_dev_profile *dp) { int count; int i = 0; @@ -329,8 +323,7 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, if (count < 0) return count; - dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), - GFP_KERNEL); + dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), GFP_KERNEL); if (!dp->freq_table) return -ENOMEM; @@ -352,8 +345,7 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, #endif if (count != i) - dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", - count, i); + dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", count, i); dp->max_state = i; @@ -398,8 +390,7 @@ static void kbase_devfreq_exit(struct device *dev) kbase_devfreq_term_freq_table(kbdev); } -static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, - struct device_node *node) +static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, struct device_node *node) { u64 freq = 0; int err = 0; @@ -428,8 +419,7 @@ static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, return; kbdev->pm.backend.gpu_clock_suspend_freq = freq; - dev_info(kbdev->dev, - "suspend clock %llu by opp-mali-errata-1485982", freq); + dev_info(kbdev->dev, "suspend clock %llu by opp-mali-errata-1485982", freq); } static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) @@ -444,12 +434,12 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) */ return 0; #else - struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node, - "operating-points-v2", 0); + struct device_node *opp_node = + of_parse_phandle(kbdev->dev->of_node, "operating-points-v2", 0); struct device_node *node; - int i = 0; + unsigned int i = 0; int count; - u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present; + u64 shader_present = kbdev->gpu_props.shader_present; if (!opp_node) return 0; @@ -457,15 +447,13 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) return 0; count = dev_pm_opp_get_opp_count(kbdev->dev); - kbdev->devfreq_table = kmalloc_array(count, - sizeof(struct kbase_devfreq_opp), GFP_KERNEL); + kbdev->devfreq_table = kmalloc_array(count, sizeof(struct kbase_devfreq_opp), GFP_KERNEL); if (!kbdev->devfreq_table) return -ENOMEM; for_each_available_child_of_node(opp_node, node) { const void *core_count_p; - u64 core_mask, opp_freq, - real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; + u64 core_mask, opp_freq, real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; int err; #if IS_ENABLED(CONFIG_REGULATOR) u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; @@ -477,30 +465,28 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) err = of_property_read_u64(node, "opp-hz", &opp_freq); if (err) { - dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n", - err); + dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n", err); continue; } #if BASE_MAX_NR_CLOCKS_REGULATORS > 1 - err = of_property_read_u64_array(node, "opp-hz-real", - real_freqs, kbdev->nr_clocks); + err = of_property_read_u64_array(node, "opp-hz-real", real_freqs, kbdev->nr_clocks); #else WARN_ON(kbdev->nr_clocks != 1); err = of_property_read_u64(node, "opp-hz-real", real_freqs); #endif if (err < 0) { dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n", - err); + err); continue; } #if IS_ENABLED(CONFIG_REGULATOR) - err = of_property_read_u32_array(node, - "opp-microvolt", opp_volts, kbdev->nr_regulators); + err = of_property_read_u32_array(node, "opp-microvolt", opp_volts, + kbdev->nr_regulators); if (err < 0) { - dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d\n", - err); + dev_warn(kbdev->dev, + "Failed to read opp-microvolt property with error %d\n", err); continue; } #endif @@ -509,15 +495,16 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) core_mask = shader_present; if (core_mask != shader_present && corestack_driver_control) { - dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", - opp_freq); + dev_warn( + kbdev->dev, + "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", + opp_freq); continue; } core_count_p = of_get_property(node, "opp-core-count", NULL); if (core_count_p) { - u64 remaining_core_mask = - kbdev->gpu_props.props.raw_props.shader_present; + u64 remaining_core_mask = kbdev->gpu_props.shader_present; int core_count = be32_to_cpup(core_count_p); core_mask = 0; @@ -530,8 +517,8 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) return -ENODEV; } - core_mask |= (1ull << (core-1)); - remaining_core_mask &= ~(1ull << (core-1)); + core_mask |= (1ull << (core - 1)); + remaining_core_mask &= ~(1ull << (core - 1)); } } @@ -543,24 +530,22 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) kbdev->devfreq_table[i].opp_freq = opp_freq; kbdev->devfreq_table[i].core_mask = core_mask; if (kbdev->nr_clocks > 0) { - int j; + unsigned int j; for (j = 0; j < kbdev->nr_clocks; j++) - kbdev->devfreq_table[i].real_freqs[j] = - real_freqs[j]; + kbdev->devfreq_table[i].real_freqs[j] = real_freqs[j]; } #if IS_ENABLED(CONFIG_REGULATOR) if (kbdev->nr_regulators > 0) { - int j; + unsigned int j; for (j = 0; j < kbdev->nr_regulators; j++) - kbdev->devfreq_table[i].opp_volts[j] = - opp_volts[j]; + kbdev->devfreq_table[i].opp_volts[j] = opp_volts[j]; } #endif - dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n", - i, opp_freq, core_mask); + dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n", i, opp_freq, + core_mask); i++; } @@ -593,10 +578,9 @@ static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) { - struct kbase_devfreq_queue_info *info = container_of(work, - struct kbase_devfreq_queue_info, work); - struct kbase_device *kbdev = container_of(info, struct kbase_device, - devfreq_queue); + struct kbase_devfreq_queue_info *info = + container_of(work, struct kbase_devfreq_queue_info, work); + struct kbase_device *kbdev = container_of(info, struct kbase_device, devfreq_queue); unsigned long flags; enum kbase_devfreq_work_type type, acted_type; @@ -606,8 +590,7 @@ static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) acted_type = kbdev->devfreq_queue.acted_type; dev_dbg(kbdev->dev, "Worker handles queued req: %s (acted: %s)\n", - kbase_devfreq_req_type_name(type), - kbase_devfreq_req_type_name(acted_type)); + kbase_devfreq_req_type_name(type), kbase_devfreq_req_type_name(acted_type)); switch (type) { case DEVFREQ_WORK_SUSPEND: case DEVFREQ_WORK_RESUME: @@ -627,8 +610,7 @@ static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) } } -void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, - enum kbase_devfreq_work_type work_type) +void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, enum kbase_devfreq_work_type work_type) { unsigned long flags; @@ -637,12 +619,10 @@ void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, /* Skip enqueuing a work if workqueue has already been terminated. */ if (likely(kbdev->devfreq_queue.workq)) { kbdev->devfreq_queue.req_type = work_type; - queue_work(kbdev->devfreq_queue.workq, - &kbdev->devfreq_queue.work); + queue_work(kbdev->devfreq_queue.workq, &kbdev->devfreq_queue.work); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", - kbase_devfreq_req_type_name(work_type)); + dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", kbase_devfreq_req_type_name(work_type)); } static int kbase_devfreq_work_init(struct kbase_device *kbdev) @@ -654,8 +634,7 @@ static int kbase_devfreq_work_init(struct kbase_device *kbdev) if (!kbdev->devfreq_queue.workq) return -ENOMEM; - INIT_WORK(&kbdev->devfreq_queue.work, - kbase_devfreq_suspend_resume_worker); + INIT_WORK(&kbdev->devfreq_queue.work, kbase_devfreq_suspend_resume_worker); return 0; } @@ -699,10 +678,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) for (i = 0; i < kbdev->nr_clocks; i++) { if (kbdev->clocks[i]) - kbdev->current_freqs[i] = - clk_get_rate(kbdev->clocks[i]); - else - kbdev->current_freqs[i] = 0; + kbdev->current_freqs[i] = clk_get_rate(kbdev->clocks[i]); } if (strstr(__clk_get_name(kbdev->clocks[0]), "scmi")) kbdev->opp_info.scmi_clk = kbdev->clocks[0]; @@ -727,9 +703,9 @@ int kbase_devfreq_init(struct kbase_device *kbdev) if (dp->max_state > 0) { /* Record the maximum frequency possible */ - kbdev->gpu_props.props.core_props.gpu_freq_khz_max = - dp->freq_table[0] / 1000; - }; + kbdev->gpu_props.gpu_freq_khz_max = dp->freq_table[0] / 1000; + } + err = kbase_devfreq_init_core_mask_table(kbdev); if (err) goto init_core_mask_table_failed; @@ -738,8 +714,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) &ondemand_data.upthreshold); of_property_read_u32(np, "downdifferential", &ondemand_data.downdifferential); - kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, - "simple_ondemand", &ondemand_data); + kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, "simple_ondemand", NULL); if (IS_ERR(kbdev->devfreq)) { err = PTR_ERR(kbdev->devfreq); kbdev->devfreq = NULL; @@ -764,8 +739,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); if (err) { - dev_err(kbdev->dev, - "Failed to register OPP notifier (%d)", err); + dev_err(kbdev->dev, "Failed to register OPP notifier (%d)", err); goto opp_notifier_failed; } @@ -823,14 +797,10 @@ int kbase_devfreq_init(struct kbase_device *kbdev) } kbdev->devfreq_cooling = of_devfreq_cooling_register_power( - kbdev->dev->of_node, - kbdev->devfreq, - &kbase_ipa_power_model_ops); + kbdev->dev->of_node, kbdev->devfreq, &kbase_ipa_power_model_ops); if (IS_ERR(kbdev->devfreq_cooling)) { err = PTR_ERR(kbdev->devfreq_cooling); - dev_err(kbdev->dev, - "Failed to register cooling device (%d)\n", - err); + dev_err(kbdev->dev, "Failed to register cooling device (%d)", err); goto cooling_reg_failed; } } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h index 123c58cb030f..61342d47addf 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,14 @@ #ifndef _BASE_DEVFREQ_H_ #define _BASE_DEVFREQ_H_ +/** + * kbase_devfreq_init - Initialize kbase device for DevFreq. + * @kbdev: Device pointer + * + * This function must be called only when a kbase device is initialized. + * + * Return: 0 on success. + */ int kbase_devfreq_init(struct kbase_device *kbdev); void kbase_devfreq_term(struct kbase_device *kbdev); @@ -40,8 +48,7 @@ void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq); * @kbdev: Device pointer * @work_type: The type of the devfreq work item, i.e. suspend or resume */ -void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, - enum kbase_devfreq_work_type work_type); +void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, enum kbase_devfreq_work_type work_type); /** * kbase_devfreq_opp_translate - Translate nominal OPP frequency from devicetree @@ -58,6 +65,6 @@ void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, * untranslated frequency (and corresponding voltage) and all cores enabled. * The voltages returned are in micro Volts (uV). */ -void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, - u64 *core_mask, unsigned long *freqs, unsigned long *volts); +void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, u64 *core_mask, + unsigned long *freqs, unsigned long *volts); #endif /* _BASE_DEVFREQ_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c index 10e92ec94d3a..443a1466e0e0 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,175 +25,112 @@ #include #include -#include -#include #include +#include -int kbase_backend_gpuprops_get(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump) +int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbasep_gpuprops_regdump *regdump) { int i; - struct kbase_gpuprops_regdump registers = { 0 }; - /* Fill regdump with the content of the relevant registers */ - registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + /* regdump is zero intiialized, individual entries do not need to be explicitly set */ + regdump->gpu_id = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID)); - registers.l2_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_FEATURES)); + regdump->shader_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_PRESENT)); + regdump->tiler_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(TILER_PRESENT)); + regdump->l2_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(L2_PRESENT)); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(AS_PRESENT))) + regdump->as_present = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AS_PRESENT)); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(STACK_PRESENT))) + regdump->stack_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(STACK_PRESENT)); - registers.tiler_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_FEATURES)); - registers.mem_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(MEM_FEATURES)); - registers.mmu_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(MMU_FEATURES)); - registers.as_present = kbase_reg_read(kbdev, - GPU_CONTROL_REG(AS_PRESENT)); #if !MALI_USE_CSF - registers.js_present = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JS_PRESENT)); -#else /* !MALI_USE_CSF */ - registers.js_present = 0; + regdump->js_present = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(JS_PRESENT)); + /* Not a valid register on TMIX */ + + /* TGOx specific register */ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_TLS_ALLOC)) + regdump->thread_tls_alloc = + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_TLS_ALLOC)); #endif /* !MALI_USE_CSF */ + regdump->thread_max_threads = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_MAX_THREADS)); + regdump->thread_max_workgroup_size = + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_MAX_WORKGROUP_SIZE)); + regdump->thread_max_barrier_size = + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_MAX_BARRIER_SIZE)); + regdump->thread_features = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_FEATURES)); + + /* Feature Registers */ + /* AMBA_FEATURES enum is mapped to COHERENCY_FEATURES enum */ + regdump->coherency_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(COHERENCY_FEATURES)); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES)) + regdump->core_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(CORE_FEATURES)); + +#if MALI_USE_CSF + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(GPU_FEATURES))) + regdump->gpu_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_FEATURES)); +#endif /* MALI_USE_CSF */ + + regdump->tiler_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(TILER_FEATURES)); + regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES)); + regdump->mem_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(MEM_FEATURES)); + regdump->mmu_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(MMU_FEATURES)); + +#if !MALI_USE_CSF for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) -#if !MALI_USE_CSF - registers.js_features[i] = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JS_FEATURES_REG(i))); -#else /* !MALI_USE_CSF */ - registers.js_features[i] = 0; + regdump->js_features[i] = kbase_reg_read32(kbdev, GPU_JS_FEATURES_OFFSET(i)); #endif /* !MALI_USE_CSF */ - for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) - registers.texture_features[i] = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i))); - - registers.thread_max_threads = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_THREADS)); - registers.thread_max_workgroup_size = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE)); - registers.thread_max_barrier_size = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE)); - registers.thread_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_FEATURES)); - registers.thread_tls_alloc = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_TLS_ALLOC)); - - registers.shader_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_LO)); - registers.shader_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_HI)); - - registers.tiler_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_PRESENT_LO)); - registers.tiler_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_PRESENT_HI)); - - registers.l2_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_LO)); - registers.l2_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_HI)); - - registers.stack_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(STACK_PRESENT_LO)); - registers.stack_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(STACK_PRESENT_HI)); - - if (registers.gpu_id >= GPU_ID2_PRODUCT_MAKE(11, 8, 5, 2)) { - registers.gpu_features_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FEATURES_LO)); - registers.gpu_features_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FEATURES_HI)); - } else { - registers.gpu_features_lo = 0; - registers.gpu_features_hi = 0; +#if MALI_USE_CSF +#endif /* MALI_USE_CSF */ + { + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + regdump->texture_features[i] = + kbase_reg_read32(kbdev, GPU_TEXTURE_FEATURES_OFFSET(i)); } - if (!kbase_is_gpu_removed(kbdev)) { - *regdump = registers; - return 0; - } else + if (kbase_is_gpu_removed(kbdev)) return -EIO; + return 0; } int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, - struct kbase_current_config_regdump *curr_config_regdump) + struct kbase_current_config_regdump *curr_config_regdump) { if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump)) return -EINVAL; - curr_config_regdump->mem_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(MEM_FEATURES)); - - curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_LO)); - curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_HI)); - - curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_LO)); - curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_HI)); + curr_config_regdump->mem_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(MEM_FEATURES)); + curr_config_regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES)); + curr_config_regdump->shader_present = + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_PRESENT)); + curr_config_regdump->l2_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(L2_PRESENT)); if (kbase_is_gpu_removed(kbdev)) return -EIO; return 0; - -} - -int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump) -{ - u32 coherency_features; - int error = 0; - - /* Ensure we can access the GPU registers */ - kbase_pm_register_access_enable(kbdev); - - coherency_features = kbase_cache_get_coherency_features(kbdev); - - if (kbase_is_gpu_removed(kbdev)) - error = -EIO; - - regdump->coherency_features = coherency_features; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES)) - regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); - else - regdump->core_features = 0; - - kbase_pm_register_access_disable(kbdev); - - return error; } int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump) + struct kbasep_gpuprops_regdump *regdump) { if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { - u32 l2_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_FEATURES)); - u32 l2_config = - kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); - u32 asn_hash[ASN_HASH_COUNT] = { - 0, - }; - int i; + regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES)); + regdump->l2_config = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG)); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) { - for (i = 0; i < ASN_HASH_COUNT; i++) - asn_hash[i] = kbase_reg_read( - kbdev, GPU_CONTROL_REG(ASN_HASH(i))); +#if MALI_USE_CSF + if (kbase_hw_has_l2_slice_hash_feature(kbdev)) { + int i; + for (i = 0; i < GPU_L2_SLICE_HASH_COUNT; i++) + regdump->l2_slice_hash[i] = + kbase_reg_read32(kbdev, GPU_L2_SLICE_HASH_OFFSET(i)); } +#endif /* MALI_USE_CSF */ if (kbase_is_gpu_removed(kbdev)) return -EIO; - - regdump->l2_features = l2_features; - regdump->l2_config = l2_config; - for (i = 0; i < ASN_HASH_COUNT; i++) - regdump->l2_asn_hash[i] = asn_hash[i]; } return 0; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c index 53578ded58b9..3d61081e0f84 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ */ #include -#include +#include #include #include #include @@ -34,8 +34,8 @@ static int wait_prfcnt_ready(struct kbase_device *kbdev) u32 loops; for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) { - const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & - GPU_STATUS_PRFCNT_ACTIVE; + const u32 prfcnt_active = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & + GPU_STATUS_PRFCNT_ACTIVE; if (!prfcnt_active) return 0; } @@ -44,9 +44,8 @@ static int wait_prfcnt_ready(struct kbase_device *kbdev) return -EBUSY; } -int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_instr_hwcnt_enable *enable) +int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_instr_hwcnt_enable *enable) { unsigned long flags; int err = -EINVAL; @@ -74,9 +73,9 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, } /* Enable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | - PRFCNT_SAMPLE_COMPLETED); + irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + irq_mask | PRFCNT_SAMPLE_COMPLETED); /* In use, this context is the owner */ kbdev->hwcnt.kctx = kctx; @@ -89,8 +88,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, /* Configure */ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS - prfcnt_config |= kbdev->hwcnt.backend.override_counter_set - << PRFCNT_CONFIG_SETSELECT_SHIFT; + prfcnt_config |= kbdev->hwcnt.backend.override_counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; #else prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; #endif @@ -100,32 +98,25 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, if (err) return err; - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_OFF); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_OFF); /* Wait until prfcnt is disabled before writing configuration registers */ err = wait_prfcnt_ready(kbdev); if (err) return err; - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - enable->dump_buffer & 0xFFFFFFFF); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - enable->dump_buffer >> 32); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(PRFCNT_BASE), enable->dump_buffer); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), - enable->fe_bm); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_JM_EN), enable->fe_bm); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), - enable->shader_bm); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), - enable->mmu_l2_bm); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_SHADER_EN), enable->shader_bm); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_MMU_L2_EN), enable->mmu_l2_bm); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - enable->tiler_bm); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_TILER_EN), enable->tiler_bm); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); @@ -151,15 +142,16 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) return; /* Disable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + irq_mask & ~PRFCNT_SAMPLE_COMPLETED); /* Wait until prfcnt config register can be written, then disable the counters. * Return value is ignored as we are disabling anyway. */ wait_prfcnt_ready(kbdev); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_CONFIG), 0); kbdev->hwcnt.kctx = NULL; kbdev->hwcnt.addr = 0ULL; @@ -206,8 +198,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* Ongoing dump/setup - wait for its completion */ - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); + wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); } kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; @@ -218,8 +209,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", - kctx); + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", kctx); return 0; } @@ -261,28 +251,22 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) goto unlock; /* Reconfigure the dump address */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - kbdev->hwcnt.addr & 0xFFFFFFFF); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - kbdev->hwcnt.addr >> 32); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(PRFCNT_BASE), kbdev->hwcnt.addr); /* Start dumping */ - KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, - kbdev->hwcnt.addr); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, kbdev->hwcnt.addr); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_PRFCNT_SAMPLE); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_PRFCNT_SAMPLE); dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); - unlock: +unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); return err; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); -bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, - bool * const success) +bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, bool *const success) { unsigned long flags; bool complete = false; @@ -335,8 +319,7 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) int err; /* Wait for dump & cache clean to complete */ - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); + wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); @@ -347,8 +330,7 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) err = -EIO; } else { /* Dump done */ - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_IDLE); + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE); err = 0; } @@ -368,8 +350,7 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) /* Check it's the context previously set up and we're not in IDLE * state. */ - if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_IDLE) + if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) goto unlock; if (kbase_is_gpu_removed(kbdev)) { @@ -384,8 +365,7 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) /* Clear the counters */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_PRFCNT_CLEAR); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_PRFCNT_CLEAR); unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); @@ -443,8 +423,6 @@ int kbase_instr_backend_init(struct kbase_device *kbdev) init_waitqueue_head(&kbdev->hwcnt.backend.wait); - kbdev->hwcnt.backend.triggered = 0; - #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS /* Use the build time option for the override default. */ #if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY) @@ -474,8 +452,7 @@ void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev) * Valid inputs are the values accepted bythe SET_SELECT bits of the * PRFCNT_CONFIG register as defined in the architecture specification. */ - debugfs_create_u8("hwcnt_set_select", 0644, - kbdev->mali_debugfs_directory, + debugfs_create_u8("hwcnt_set_select", 0644, kbdev->mali_debugfs_directory, (u8 *)&kbdev->hwcnt.backend.override_counter_set); } #endif diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h index 66cda8c0b647..1429c01a1bd2 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,11 +37,10 @@ void kbase_release_interrupts(struct kbase_device *kbdev); */ void kbase_synchronize_irqs(struct kbase_device *kbdev); -int kbasep_common_test_interrupt_handlers( - struct kbase_device * const kbdev); +int kbasep_common_test_interrupt_handlers(struct kbase_device *const kbdev); irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val); -int kbase_set_custom_irq_handler(struct kbase_device *kbdev, - irq_handler_t custom_handler, int irq_type); +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler, + int irq_type); #endif /* _KBASE_IRQ_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c index eb63b2c56c3d..6474f27dafcc 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,12 +34,12 @@ static void *kbase_tag(void *ptr, u32 tag) { - return (void *)(((uintptr_t) ptr) | tag); + return (void *)(((uintptr_t)ptr) | tag); } static void *kbase_untag(void *ptr) { - return (void *)(((uintptr_t) ptr) & ~3); + return (void *)(((uintptr_t)ptr) & ~3); } static irqreturn_t kbase_job_irq_handler(int irq, void *data) @@ -56,12 +56,12 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + val = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_STATUS)); #ifdef CONFIG_MALI_BIFROST_DEBUG if (!kbdev->pm.backend.driver_ready_for_irqs) - dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", - __func__, irq, val); + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", __func__, + irq, val); #endif /* CONFIG_MALI_BIFROST_DEBUG */ if (!val) { @@ -99,12 +99,12 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) atomic_inc(&kbdev->faults_pending); - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + val = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_STATUS)); #ifdef CONFIG_MALI_BIFROST_DEBUG if (!kbdev->pm.backend.driver_ready_for_irqs) - dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", - __func__, irq, val); + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", __func__, + irq, val); #endif /* CONFIG_MALI_BIFROST_DEBUG */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -126,7 +126,8 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) { unsigned long flags; struct kbase_device *kbdev = kbase_untag(data); - u32 val; + u32 gpu_irq_status; + irqreturn_t irq_state = IRQ_NONE; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -136,23 +137,25 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); + gpu_irq_status = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_STATUS)); #ifdef CONFIG_MALI_BIFROST_DEBUG - if (!kbdev->pm.backend.driver_ready_for_irqs) - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", - __func__, irq, val); + if (!kbdev->pm.backend.driver_ready_for_irqs) { + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", __func__, + irq, gpu_irq_status); + } #endif /* CONFIG_MALI_BIFROST_DEBUG */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (!val) - return IRQ_NONE; + if (gpu_irq_status) { + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, gpu_irq_status); + kbase_gpu_interrupt(kbdev, gpu_irq_status); - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + irq_state = IRQ_HANDLED; + } - kbase_gpu_interrupt(kbdev, val); - - return IRQ_HANDLED; + return irq_state; } static irq_handler_t kbase_handler_table[] = { @@ -162,14 +165,14 @@ static irq_handler_t kbase_handler_table[] = { }; #ifdef CONFIG_MALI_BIFROST_DEBUG -#define JOB_IRQ_HANDLER JOB_IRQ_TAG -#define GPU_IRQ_HANDLER GPU_IRQ_TAG +#define JOB_IRQ_HANDLER JOB_IRQ_TAG +#define GPU_IRQ_HANDLER GPU_IRQ_TAG /** * kbase_gpu_irq_test_handler - Variant (for test) of kbase_gpu_irq_handler() * @irq: IRQ number * @data: Data associated with this IRQ (i.e. kbdev) - * @val: Value of the GPU_CONTROL_REG(GPU_IRQ_STATUS) + * @val: Value of the GPU_CONTROL_ENUM(GPU_IRQ_STATUS) * * Handle the GPU device interrupt source requests reflected in the * given source bit-pattern. The test code caller is responsible for @@ -206,33 +209,30 @@ KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); * * Return: 0 case success, error code otherwise */ -int kbase_set_custom_irq_handler(struct kbase_device *kbdev, - irq_handler_t custom_handler, - int irq_type) +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler, + int irq_type) { int result = 0; irq_handler_t requested_irq_handler = NULL; - KBASE_DEBUG_ASSERT((irq_type >= JOB_IRQ_HANDLER) && - (irq_type <= GPU_IRQ_HANDLER)); + KBASE_DEBUG_ASSERT((irq_type >= JOB_IRQ_HANDLER) && (irq_type <= GPU_IRQ_HANDLER)); /* Release previous handler */ if (kbdev->irqs[irq_type].irq) free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); - requested_irq_handler = (custom_handler != NULL) ? - custom_handler : - kbase_handler_table[irq_type]; + requested_irq_handler = (custom_handler != NULL) ? custom_handler : + kbase_handler_table[irq_type]; if (request_irq(kbdev->irqs[irq_type].irq, requested_irq_handler, - kbdev->irqs[irq_type].flags | IRQF_SHARED, - dev_name(kbdev->dev), + kbdev->irqs[irq_type].flags | IRQF_SHARED, dev_name(kbdev->dev), kbase_tag(kbdev, irq_type)) != 0) { result = -EINVAL; dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", - kbdev->irqs[irq_type].irq, irq_type); + kbdev->irqs[irq_type].irq, irq_type); #if IS_ENABLED(CONFIG_SPARSE_IRQ) - dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); + dev_err(kbdev->dev, + "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); #endif /* CONFIG_SPARSE_IRQ */ } @@ -251,7 +251,7 @@ struct kbasep_irq_test { static struct kbasep_irq_test kbasep_irq_test_data; -#define IRQ_TEST_TIMEOUT 500 +#define IRQ_TEST_TIMEOUT 500 static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) { @@ -267,7 +267,7 @@ static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + val = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_STATUS)); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -279,7 +279,7 @@ static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) kbasep_irq_test_data.triggered = 1; wake_up(&kbasep_irq_test_data.wait); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), val); return IRQ_HANDLED; } @@ -298,7 +298,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + val = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_STATUS)); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -310,15 +310,14 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) kbasep_irq_test_data.triggered = 1; wake_up(&kbasep_irq_test_data.wait); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_CLEAR), val); return IRQ_HANDLED; } static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) { - struct kbasep_irq_test *test_data = container_of(timer, - struct kbasep_irq_test, timer); + struct kbasep_irq_test *test_data = container_of(timer, struct kbasep_irq_test, timer); test_data->timeout = 1; test_data->triggered = 1; @@ -326,8 +325,7 @@ static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) return HRTIMER_NORESTART; } -static int kbasep_common_test_interrupt( - struct kbase_device * const kbdev, u32 tag) +static int kbasep_common_test_interrupt(struct kbase_device *const kbdev, u32 tag) { int err = 0; irq_handler_t test_handler; @@ -339,13 +337,13 @@ static int kbasep_common_test_interrupt( switch (tag) { case JOB_IRQ_TAG: test_handler = kbase_job_irq_test_handler; - rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); - mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); + rawstat_offset = JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT); + mask_offset = JOB_CONTROL_ENUM(JOB_IRQ_MASK); break; case MMU_IRQ_TAG: test_handler = kbase_mmu_irq_test_handler; - rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); - mask_offset = MMU_REG(MMU_IRQ_MASK); + rawstat_offset = MMU_CONTROL_ENUM(IRQ_RAWSTAT); + mask_offset = MMU_CONTROL_ENUM(IRQ_MASK); break; case GPU_IRQ_TAG: /* already tested by pm_driver - bail out */ @@ -354,9 +352,9 @@ static int kbasep_common_test_interrupt( } /* store old mask */ - old_mask_val = kbase_reg_read(kbdev, mask_offset); + old_mask_val = kbase_reg_read32(kbdev, mask_offset); /* mask interrupts */ - kbase_reg_write(kbdev, mask_offset, 0x0); + kbase_reg_write32(kbdev, mask_offset, 0x0); if (kbdev->irqs[tag].irq) { /* release original handler and install test handler */ @@ -364,36 +362,33 @@ static int kbasep_common_test_interrupt( err = -EINVAL; } else { kbasep_irq_test_data.timeout = 0; - hrtimer_init(&kbasep_irq_test_data.timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); - kbasep_irq_test_data.timer.function = - kbasep_test_interrupt_timeout; + hrtimer_init(&kbasep_irq_test_data.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbasep_irq_test_data.timer.function = kbasep_test_interrupt_timeout; /* trigger interrupt */ - kbase_reg_write(kbdev, mask_offset, 0x1); - kbase_reg_write(kbdev, rawstat_offset, 0x1); + kbase_reg_write32(kbdev, mask_offset, 0x1); + kbase_reg_write32(kbdev, rawstat_offset, 0x1); hrtimer_start(&kbasep_irq_test_data.timer, - HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), - HRTIMER_MODE_REL); + HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), HRTIMER_MODE_REL); - wait_event(kbasep_irq_test_data.wait, - kbasep_irq_test_data.triggered != 0); + wait_event(kbasep_irq_test_data.wait, kbasep_irq_test_data.triggered != 0); if (kbasep_irq_test_data.timeout != 0) { dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", - kbdev->irqs[tag].irq, tag); + kbdev->irqs[tag].irq, tag); err = -EINVAL; } else { dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", - kbdev->irqs[tag].irq, tag); + kbdev->irqs[tag].irq, tag); } hrtimer_cancel(&kbasep_irq_test_data.timer); kbasep_irq_test_data.triggered = 0; /* mask interrupts */ - kbase_reg_write(kbdev, mask_offset, 0x0); + kbase_reg_write32(kbdev, mask_offset, 0x0); /* release test handler */ free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); @@ -401,21 +396,20 @@ static int kbasep_common_test_interrupt( /* restore original interrupt */ if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], - kbdev->irqs[tag].flags | IRQF_SHARED, - dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { + kbdev->irqs[tag].flags | IRQF_SHARED, dev_name(kbdev->dev), + kbase_tag(kbdev, tag))) { dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", - kbdev->irqs[tag].irq, tag); + kbdev->irqs[tag].irq, tag); err = -EINVAL; } } /* restore old mask */ - kbase_reg_write(kbdev, mask_offset, old_mask_val); + kbase_reg_write32(kbdev, mask_offset, old_mask_val); return err; } -int kbasep_common_test_interrupt_handlers( - struct kbase_device * const kbdev) +int kbasep_common_test_interrupt_handlers(struct kbase_device *const kbdev) { int err; @@ -427,19 +421,21 @@ int kbasep_common_test_interrupt_handlers( err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); if (err) { - dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); + dev_err(kbdev->dev, + "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); goto out; } err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); if (err) { - dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); + dev_err(kbdev->dev, + "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); goto out; } dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); - out: +out: kbase_pm_context_idle(kbdev); return err; @@ -454,14 +450,14 @@ int kbase_install_interrupts(struct kbase_device *kbdev) for (i = 0; i < nr; i++) { err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], - kbdev->irqs[i].flags | IRQF_SHARED, - dev_name(kbdev->dev), - kbase_tag(kbdev, i)); + kbdev->irqs[i].flags | IRQF_SHARED, dev_name(kbdev->dev), + kbase_tag(kbdev, i)); if (err) { dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", - kbdev->irqs[i].irq, i); + kbdev->irqs[i].irq, i); #if IS_ENABLED(CONFIG_SPARSE_IRQ) - dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); + dev_err(kbdev->dev, + "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); #endif /* CONFIG_SPARSE_IRQ */ goto release; } @@ -469,7 +465,7 @@ int kbase_install_interrupts(struct kbase_device *kbdev) return 0; - release: +release: while (i-- > 0) free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c index 258dc6dac6c5..960ac9905343 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,6 +52,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + CSTD_UNUSED(current_as); + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); lockdep_assert_held(&js_devdata->runpool_mutex); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -88,8 +90,7 @@ bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_contex return false; } -void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, - struct kbase_context *kctx) +void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, struct kbase_context *kctx) { int as_nr = kctx->as_nr; @@ -111,13 +112,14 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, kbase_js_runpool_dec_context_count(kbdev, kctx); } -void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, - struct kbase_context *kctx) +void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, struct kbase_context *kctx) { + CSTD_UNUSED(kbdev); + CSTD_UNUSED(kctx); } -int kbase_backend_find_and_release_free_address_space( - struct kbase_device *kbdev, struct kbase_context *kctx) +int kbase_backend_find_and_release_free_address_space(struct kbase_device *kbdev, + struct kbase_context *kctx) { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; @@ -146,12 +148,11 @@ int kbase_backend_find_and_release_free_address_space( * descheduled. */ if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && - atomic_read(&as_kctx->refcount) == 1) { + atomic_read(&as_kctx->refcount) == 1) { if (!kbase_ctx_sched_inc_refcount_nolock(as_kctx)) { WARN(1, "Failed to retain active context\n"); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -168,7 +169,6 @@ int kbase_backend_find_and_release_free_address_space( mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - /* Release context from address space */ mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); @@ -176,9 +176,7 @@ int kbase_backend_find_and_release_free_address_space( kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { - kbasep_js_runpool_requeue_or_kill_ctx(kbdev, - as_kctx, - true); + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, as_kctx, true); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); @@ -206,9 +204,7 @@ int kbase_backend_find_and_release_free_address_space( return KBASEP_AS_NR_INVALID; } -bool kbase_backend_use_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int as_nr) +bool kbase_backend_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr) { struct kbasep_js_device_data *js_devdata; struct kbase_as *new_address_space = NULL; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h index 136aa526d41e..e9dbe825d47f 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -109,21 +109,21 @@ struct kbase_backend_data { atomic_t reset_gpu; /* The GPU reset isn't pending */ -#define KBASE_RESET_GPU_NOT_PENDING 0 +#define KBASE_RESET_GPU_NOT_PENDING 0 /* kbase_prepare_to_reset_gpu has been called */ -#define KBASE_RESET_GPU_PREPARED 1 +#define KBASE_RESET_GPU_PREPARED 1 /* kbase_reset_gpu has been called - the reset will now definitely happen * within the timeout period */ -#define KBASE_RESET_GPU_COMMITTED 2 +#define KBASE_RESET_GPU_COMMITTED 2 /* The GPU reset process is currently occuring (timeout has expired or * kbasep_try_reset_gpu_early was called) */ -#define KBASE_RESET_GPU_HAPPENING 3 +#define KBASE_RESET_GPU_HAPPENING 3 /* Reset the GPU silently, used when resetting the GPU as part of normal * behavior (e.g. when exiting protected mode). */ -#define KBASE_RESET_GPU_SILENT 4 +#define KBASE_RESET_GPU_SILENT 4 struct workqueue_struct *reset_workq; struct work_struct reset_work; wait_queue_head_t reset_wait; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c index c7257117e98a..8f06058bbdb4 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include #include @@ -41,8 +41,8 @@ #include static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); -static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, - const u64 affinity, const u64 limited_core_mask); +static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, const u64 affinity, + const u64 limited_core_mask); static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, unsigned int js, const u64 limited_core_mask) @@ -50,33 +50,21 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req u64 affinity; bool skip_affinity_check = false; - if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == - BASE_JD_REQ_T) { + if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) { /* Tiler-only atom, affinity value can be programed as 0 */ affinity = 0; skip_affinity_check = true; - } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | - BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { - unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; - struct mali_base_gpu_coherent_group_info *coherency_info = - &kbdev->gpu_props.props.coherency_info; + } else if ((core_req & + (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { + affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js]; - affinity = kbdev->pm.backend.shaders_avail & - kbdev->pm.debug_core_mask[js]; - - /* JS2 on a dual core group system targets core group 1. All - * other cases target core group 0. + /* Bifrost onwards GPUs only have 1 coherent group which is equal to + * shader_present */ - if (js == 2 && num_core_groups > 1) - affinity &= coherency_info->group[1].core_mask; - else if (num_core_groups > 1) - affinity &= coherency_info->group[0].core_mask; - else - affinity &= kbdev->gpu_props.curr_config.shader_present; + affinity &= kbdev->gpu_props.curr_config.shader_present; } else { /* Use all cores */ - affinity = kbdev->pm.backend.shaders_avail & - kbdev->pm.debug_core_mask[js]; + affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js]; } if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { @@ -86,8 +74,7 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req if (unlikely(!affinity && !skip_affinity_check)) { #ifdef CONFIG_MALI_BIFROST_DEBUG - u64 shaders_ready = - kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); WARN_ON(!(shaders_ready & kbdev->pm.backend.shaders_avail)); #endif @@ -96,7 +83,8 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { /* Limiting affinity again to make sure it only enables shader cores with backed TLS memory. */ - affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); + affinity = + kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); #ifdef CONFIG_MALI_BIFROST_DEBUG /* affinity should never be 0 */ @@ -105,10 +93,7 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req } } - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), - affinity & 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), - affinity >> 32); + kbase_reg_write64(kbdev, JOB_SLOT_OFFSET(js, AFFINITY_NEXT), affinity); return affinity; } @@ -140,8 +125,8 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) return jc; compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[katom->renderpass_id]; /* We can read a subset of renderpass state without holding @@ -182,9 +167,8 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) break; } - dev_dbg(kctx->kbdev->dev, - "Selected job chain 0x%llx for end atom %pK in state %d\n", - jc, (void *)katom, (int)rp->state); + dev_dbg(kctx->kbdev->dev, "Selected job chain 0x%llx for end atom %pK in state %d\n", jc, + (void *)katom, (int)rp->state); katom->jc = jc; return jc; @@ -199,7 +183,7 @@ static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned i /* wait for the JS_COMMAND_NEXT register to reach the given status value */ do { - if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT))) + if (!kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT))) return true; diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); @@ -227,16 +211,12 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, if (!kbasep_jm_wait_js_free(kbdev, js, kctx)) return -EPERM; - dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", - jc_head, (void *)katom); + dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", jc_head, + (void *)katom); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), - jc_head & 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), - jc_head >> 32); + kbase_reg_write64(kbdev, JOB_SLOT_OFFSET(js, HEAD_NEXT), jc_head); - affinity = kbase_job_write_affinity(kbdev, katom->core_req, js, - kctx->limited_core_mask); + affinity = kbase_job_write_affinity(kbdev, katom->core_req, js, kctx->limited_core_mask); /* start MMU, medium priority, cache clean/flush on end, clean/flush on * start @@ -244,7 +224,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, cfg = kctx->as_nr; if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && - !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) + !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) { @@ -267,7 +247,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && - !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) + !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) cfg |= JS_CONFIG_END_FLUSH_CLEAN; @@ -289,11 +269,10 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, ptr_slot_rb->job_chain_flag = false; } - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, CONFIG_NEXT), cfg); if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), - katom->flush_id); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, FLUSH_ID_NEXT), katom->flush_id); /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. @@ -302,27 +281,18 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, /* GO ! */ dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", - katom, kctx, js, jc_head); + katom, kctx, js, jc_head); - KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, - (u32)affinity); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, (u32)affinity); - KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, - js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START); + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, js, kbase_jd_atom_id(kctx, katom), + TL_JS_EVENT_START); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head, - affinity, cfg); - KBASE_TLSTREAM_TL_RET_CTX_LPU( - kbdev, - kctx, - &kbdev->gpu_props.props.raw_props.js_features[ - katom->slot_nr]); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head, affinity, cfg); + KBASE_TLSTREAM_TL_RET_CTX_LPU(kbdev, kctx, &kbdev->gpu_props.js_features[katom->slot_nr]); KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); - KBASE_TLSTREAM_TL_RET_ATOM_LPU( - kbdev, - katom, - &kbdev->gpu_props.props.raw_props.js_features[js], - "ctx_nr,atom_nr"); + KBASE_TLSTREAM_TL_RET_ATOM_LPU(kbdev, katom, &kbdev->gpu_props.js_features[js], + "ctx_nr,atom_nr"); kbase_kinstr_jm_atom_hw_submit(katom); /* Update the slot's last katom submission kctx */ @@ -333,19 +303,16 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, /* If this is the only job on the slot, trace it as starting */ char js_string[16]; - trace_gpu_sched_switch( - kbasep_make_job_slot_string(js, js_string, - sizeof(js_string)), - ktime_to_ns(katom->start_timestamp), - (u32)katom->kctx->id, 0, katom->work_id); + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, + sizeof(js_string)), + ktime_to_ns(katom->start_timestamp), (u32)katom->kctx->id, 0, + katom->work_id); } #endif - trace_sysgraph_gpu(SGR_SUBMIT, kctx->id, - kbase_jd_atom_id(kctx, katom), js); + trace_sysgraph_gpu(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(kctx, katom), js); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), - JS_COMMAND_START); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT), JS_COMMAND_START); return 0; } @@ -372,8 +339,7 @@ static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbd /* Checking the HEAD position for the job slot */ katom = kbase_gpu_inspect(kbdev, js, 0); if (katom != NULL) { - timestamp_diff = ktime_sub(end_timestamp, - katom->start_timestamp); + timestamp_diff = ktime_sub(end_timestamp, katom->start_timestamp); if (ktime_to_ns(timestamp_diff) >= 0) { /* Only update the timestamp if it's a better estimate * than what's currently stored. This is because our @@ -396,9 +362,7 @@ static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbd */ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js) { - KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( - kbdev, - &kbdev->gpu_props.props.raw_props.js_features[js]); + KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(kbdev, &kbdev->gpu_props.js_features[js]); } void kbase_job_done(struct kbase_device *kbdev, u32 done) @@ -427,40 +391,32 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) do { int nr_done; u32 active; - u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ + u32 completion_code = BASE_JD_EVENT_DONE; /* assume OK */ u64 job_tail = 0; if (failed & (1u << i)) { /* read out the job slot status code if the job * slot reported failure */ - completion_code = kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_STATUS)); + completion_code = + kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(i, STATUS)); if (completion_code == BASE_JD_EVENT_STOPPED) { u64 job_head; - KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( - kbdev, NULL, - i, 0, TL_JS_EVENT_SOFT_STOP); + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, i, 0, + TL_JS_EVENT_SOFT_STOP); - kbasep_trace_tl_event_lpu_softstop( - kbdev, i); + kbasep_trace_tl_event_lpu_softstop(kbdev, i); /* Soft-stopped job - read the value of * JS_TAIL so that the job chain can * be resumed */ - job_tail = (u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_TAIL_LO)) | - ((u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_TAIL_HI)) - << 32); - job_head = (u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_HEAD_LO)) | - ((u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_HEAD_HI)) - << 32); + job_tail = + kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(i, TAIL)); + job_head = + kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(i, HEAD)); /* For a soft-stopped job chain js_tail should * same as the js_head, but if not then the * job chain was incorrectly marked as @@ -471,15 +427,13 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) if (job_tail != job_head) completion_code = BASE_JD_EVENT_UNKNOWN; - } else if (completion_code == - BASE_JD_EVENT_NOT_STARTED) { + } else if (completion_code == BASE_JD_EVENT_NOT_STARTED) { /* PRLAM-10673 can cause a TERMINATED * job to come back as NOT_STARTED, * but the error interrupt helps us * detect it */ - completion_code = - BASE_JD_EVENT_TERMINATED; + completion_code = BASE_JD_EVENT_TERMINATED; } kbase_gpu_irq_evict(kbdev, i, completion_code); @@ -492,20 +446,17 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { if (kbase_prepare_to_reset_gpu_locked( - kbdev, - RESET_FLAGS_NONE)) + kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } } } - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), - done & ((1 << i) | (1 << (i + 16)))); - active = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_JS_STATE)); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), + done & ((1 << i) | (1 << (i + 16)))); + active = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_JS_STATE)); - if (((active >> i) & 1) == 0 && - (((done >> (i + 16)) & 1) == 0)) { + if (((active >> i) & 1) == 0 && (((done >> (i + 16)) & 1) == 0)) { /* There is a potential race we must work * around: * @@ -546,8 +497,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * have prevented any futher jobs from starting * execution. */ - u32 rawstat = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); + u32 rawstat = + kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT)); if ((rawstat >> (i + 16)) & 1) { /* There is a failed job that we've @@ -557,16 +508,14 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) } } - dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", - completion_code); + dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", completion_code); nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); nr_done -= (active >> i) & 1; nr_done -= (active >> (i + 16)) & 1; if (nr_done <= 0) { - dev_warn(kbdev->dev, "Spurious interrupt on slot %d", - i); + dev_warn(kbdev->dev, "Spurious interrupt on slot %d", i); goto spurious; } @@ -574,11 +523,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) count += nr_done; while (nr_done) { - if (nr_done == 1) { - kbase_gpu_complete_hw(kbdev, i, - completion_code, - job_tail, - &end_timestamp); + if (likely(nr_done == 1)) { + kbase_gpu_complete_hw(kbdev, i, completion_code, job_tail, + &end_timestamp); kbase_jm_try_kick_all(kbdev); } else { /* More than one job has completed. @@ -589,16 +536,21 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * slot to complete until the failed job * is cleared from the IRQ status. */ - kbase_gpu_complete_hw(kbdev, i, - BASE_JD_EVENT_DONE, - 0, - &end_timestamp); + kbase_gpu_complete_hw(kbdev, i, BASE_JD_EVENT_DONE, 0, + &end_timestamp); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /* Increment the end timestamp value by 1 ns to + * avoid having the same value for 'start_time_ns' + * and 'end_time_ns' for the 2nd atom whose job + * completion IRQ got merged with the 1st atom. + */ + end_timestamp = ktime_add(end_timestamp, ns_to_ktime(1)); +#endif } nr_done--; } - spurious: - done = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); +spurious: + done = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT)); failed = done >> 16; finished = (done & 0xFFFF) | failed; @@ -606,12 +558,10 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) end_timestamp = ktime_get_raw(); } while (finished & (1 << i)); - kbasep_job_slot_update_head_start_timestamp(kbdev, i, - end_timestamp); + kbasep_job_slot_update_head_start_timestamp(kbdev, i, end_timestamp); } - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_COMMITTED) { + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_COMMITTED) { /* If we're trying to reset the GPU then we might be able to do * it early (without waiting for a timeout) because some jobs * have completed @@ -633,21 +583,19 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns WARN_ON(action & (~JS_COMMAND_MASK)); /* Check the head pointer */ - job_in_head_before = ((u64) kbase_reg_read(kbdev, - JOB_SLOT_REG(js, JS_HEAD_LO))) - | (((u64) kbase_reg_read(kbdev, - JOB_SLOT_REG(js, JS_HEAD_HI))) - << 32); - status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); + job_in_head_before = kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(js, HEAD)); + status_reg_before = kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(js, STATUS)); #endif if (action == JS_COMMAND_SOFT_STOP) { if (kbase_jd_katom_is_protected(target_katom)) { #ifdef CONFIG_MALI_BIFROST_DEBUG dev_dbg(kbdev->dev, - "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%x", - (unsigned int)core_reqs); -#endif /* CONFIG_MALI_BIFROST_DEBUG */ + "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%x", + (unsigned int)core_reqs); +#else + CSTD_UNUSED(core_reqs); +#endif /* CONFIG_MALI_BIFROST_DEBUG */ return; } @@ -659,23 +607,21 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns /* Mark the point where we issue the soft-stop command */ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom); - action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_SOFT_STOP_1 : - JS_COMMAND_SOFT_STOP_0; + action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_SOFT_STOP_1 : + JS_COMMAND_SOFT_STOP_0; } else if (action == JS_COMMAND_HARD_STOP) { target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; - action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_HARD_STOP_1 : - JS_COMMAND_HARD_STOP_0; + action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; } - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, COMMAND), action); #if KBASE_KTRACE_ENABLE - status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); + status_reg_after = kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(js, STATUS)); if (status_reg_after == BASE_JD_EVENT_ACTIVE) { struct kbase_jd_atom *head; struct kbase_context *head_kctx; @@ -688,7 +634,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns head_kctx = head->kctx; if (status_reg_before == BASE_JD_EVENT_ACTIVE) - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, + job_in_head_before, js); else KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); @@ -697,19 +644,23 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js); break; case JS_COMMAND_SOFT_STOP_0: - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, + js); break; case JS_COMMAND_SOFT_STOP_1: - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, + js); break; case JS_COMMAND_HARD_STOP: KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js); break; case JS_COMMAND_HARD_STOP_0: - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, + js); break; case JS_COMMAND_HARD_STOP_1: - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, + js); break; default: WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, @@ -718,7 +669,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns } } else { if (status_reg_before == BASE_JD_EVENT_ACTIVE) - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, + job_in_head_before, js); else KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); @@ -762,7 +714,7 @@ void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) } void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, - struct kbase_jd_atom *target_katom) + struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev; unsigned int target_js = target_katom->slot_nr; @@ -783,9 +735,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, if (kbase_js_atom_runs_before(kbdev, target_katom, slot_katom, KBASE_ATOM_ORDERING_FLAG_SEQNR)) { if (!stop_sent) - KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( - kbdev, - target_katom); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(kbdev, target_katom); kbase_job_slot_softstop(kbdev, target_js, slot_katom); stop_sent = true; @@ -793,8 +743,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, } } -static int softstop_start_rp_nolock( - struct kbase_context *kctx, struct kbase_va_region *reg) +static int softstop_start_rp_nolock(struct kbase_context *kctx, struct kbase_va_region *reg) { struct kbase_device *const kbdev = kctx->kbdev; struct kbase_jd_atom *katom; @@ -810,33 +759,30 @@ static int softstop_start_rp_nolock( } if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { - dev_dbg(kctx->kbdev->dev, - "Atom %pK on job slot is not start RP\n", (void *)katom); + dev_dbg(kctx->kbdev->dev, "Atom %pK on job slot is not start RP\n", (void *)katom); return -EPERM; } compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[katom->renderpass_id]; - if (WARN_ON(rp->state != KBASE_JD_RP_START && - rp->state != KBASE_JD_RP_RETRY)) + if (WARN_ON(rp->state != KBASE_JD_RP_START && rp->state != KBASE_JD_RP_RETRY)) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", - (int)rp->state, (void *)reg); + dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", (int)rp->state, (void *)reg); if (WARN_ON(katom != rp->start_katom)) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", - (void *)reg, (void *)&rp->oom_reg_list); + dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", (void *)reg, + (void *)&rp->oom_reg_list); list_move_tail(®->link, &rp->oom_reg_list); dev_dbg(kctx->kbdev->dev, "Added region to list\n"); - rp->state = (rp->state == KBASE_JD_RP_START ? - KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM); + rp->state = (rp->state == KBASE_JD_RP_START ? KBASE_JD_RP_PEND_OOM : + KBASE_JD_RP_RETRY_PEND_OOM); kbase_job_slot_softstop(kbdev, 1, katom); @@ -844,7 +790,7 @@ static int softstop_start_rp_nolock( } int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx, - struct kbase_va_region *const reg) + struct kbase_va_region *const reg) { struct kbase_device *const kbdev = kctx->kbdev; int err; @@ -862,14 +808,11 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) struct kbase_device *kbdev = kctx->kbdev; unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT); - timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait, - kctx->jctx.job_nr == 0, timeout); + timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0, timeout); if (timeout != 0) - timeout = wait_event_timeout( - kctx->jctx.sched_info.ctx.is_scheduled_wait, - !kbase_ctx_flag(kctx, KCTX_SCHEDULED), - timeout); + timeout = wait_event_timeout(kctx->jctx.sched_info.ctx.is_scheduled_wait, + !kbase_ctx_flag(kctx, KCTX_SCHEDULED), timeout); /* Neither wait timed out; all done! */ if (timeout != 0) @@ -901,8 +844,7 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { mutex_lock(&kbdev->pm.lock); if (kbdev->pm.backend.gpu_powered) - flush_id = kbase_reg_read(kbdev, - GPU_CONTROL_REG(LATEST_FLUSH)); + flush_id = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(LATEST_FLUSH)); mutex_unlock(&kbdev->pm.lock); } @@ -927,7 +869,6 @@ void kbase_job_slot_term(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_job_slot_term); - /** * kbase_job_slot_softstop_swflags - Soft-stop a job with flags * @kbdev: The kbase device @@ -946,8 +887,8 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, struct kbase_jd_atom *target_katom, u32 sw_flags) { - dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", - target_katom, sw_flags, js); + dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, + js); if (sw_flags & JS_COMMAND_MASK) { WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom, @@ -955,11 +896,10 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js sw_flags &= ~((u32)JS_COMMAND_MASK); } kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, - JS_COMMAND_SOFT_STOP | sw_flags); + JS_COMMAND_SOFT_STOP | sw_flags); } -void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom) +void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom) { kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); } @@ -970,23 +910,23 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, struct kbase_device *kbdev = kctx->kbdev; bool stopped; - stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, - target_katom, - JS_COMMAND_HARD_STOP); + stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, target_katom, + JS_COMMAND_HARD_STOP); CSTD_UNUSED(stopped); } void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) { u32 hw_action = action & JS_COMMAND_MASK; + CSTD_UNUSED(core_reqs); + /* For soft-stop, don't enter if soft-stop not allowed, or isn't * causing disjoint. */ - if (hw_action == JS_COMMAND_SOFT_STOP && - (kbase_jd_katom_is_protected(target_katom) || - (0 == (action & JS_COMMAND_SW_CAUSES_DISJOINT)))) + if (hw_action == JS_COMMAND_SOFT_STOP && (kbase_jd_katom_is_protected(target_katom) || + (0 == (action & JS_COMMAND_SW_CAUSES_DISJOINT)))) return; /* Nothing to do if already logged disjoint state on this atom */ @@ -997,8 +937,7 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, kbase_disjoint_state_up(kbdev); } -void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, - struct kbase_jd_atom *target_katom) +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, struct kbase_jd_atom *target_katom) { if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; @@ -1008,28 +947,33 @@ void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); WARN(true, "%s Not implemented for JM GPUs", __func__); return -EINVAL; } int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); WARN(true, "%s Not implemented for JM GPUs", __func__); return -EINVAL; } void kbase_reset_gpu_allow(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); WARN(true, "%s Not implemented for JM GPUs", __func__); } void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); WARN(true, "%s Not implemented for JM GPUs", __func__); } void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); WARN(true, "%s Not implemented for JM GPUs", __func__); } @@ -1041,32 +985,32 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) dev_err(kbdev->dev, "Register state:"); dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS))); dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE))); + kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT)), + kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_JS_STATE))); for (i = 0; i < 3; i++) { - dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", - i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)), - i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); + dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD=0x%016llx", i, + kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(i, STATUS)), i, + kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(i, HEAD))); } dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); + kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_RAWSTAT)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_FAULTSTATUS))); dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)), + kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK)), + kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK))); dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(PWR_OVERRIDE0)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(PWR_OVERRIDE1))); dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG))); dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG))); } static void kbasep_reset_timeout_worker(struct work_struct *data) @@ -1078,13 +1022,11 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) bool silent = false; u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - kbdev = container_of(data, struct kbase_device, - hwaccess.backend.reset_work); + kbdev = container_of(data, struct kbase_device, hwaccess.backend.reset_work); js_devdata = &kbdev->js_data; - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_SILENT) + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_SILENT) silent = true; if (kbase_is_quick_reset_enabled(kbdev)) @@ -1104,12 +1046,11 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); if (kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { /* This would re-activate the GPU. Since it's already idle, * there's no need to reset it */ - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); kbase_disjoint_state_down(kbdev); wake_up(&kbdev->hwaccess.backend.reset_wait); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1150,8 +1091,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) /* Ensure that L2 is not transitioning when we send the reset * command */ - while (--max_loops && kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_L2)) + while (--max_loops && kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) ; WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); @@ -1169,8 +1109,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) */ if (!silent) - dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", - RESET_TIMEOUT); + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT); /* Output the state of some interesting registers to help in the * debugging of GPU resets @@ -1226,8 +1165,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) mutex_unlock(&kbdev->pm.lock); - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); wake_up(&kbdev->hwaccess.backend.reset_wait); if (!silent) @@ -1254,15 +1192,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) { - struct kbase_device *kbdev = container_of(timer, struct kbase_device, - hwaccess.backend.reset_timer); + struct kbase_device *kbdev = + container_of(timer, struct kbase_device, hwaccess.backend.reset_timer); /* Reset still pending? */ - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == - KBASE_RESET_GPU_COMMITTED) + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED, + KBASE_RESET_GPU_HAPPENING) == KBASE_RESET_GPU_COMMITTED) queue_work(kbdev->hwaccess.backend.reset_workq, - &kbdev->hwaccess.backend.reset_work); + &kbdev->hwaccess.backend.reset_work); return HRTIMER_NORESTART; } @@ -1296,15 +1233,13 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) * been called), and that no other thread beat this thread to starting * the reset */ - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != - KBASE_RESET_GPU_COMMITTED) { + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED, + KBASE_RESET_GPU_HAPPENING) != KBASE_RESET_GPU_COMMITTED) { /* Reset has already occurred */ return; } - queue_work(kbdev->hwaccess.backend.reset_workq, - &kbdev->hwaccess.backend.reset_work); + queue_work(kbdev->hwaccess.backend.reset_workq, &kbdev->hwaccess.backend.reset_work); } static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) @@ -1329,8 +1264,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) * false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, - unsigned int flags) +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, unsigned int flags) { int i; @@ -1346,10 +1280,8 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) kbase_instr_hwcnt_on_unrecoverable_error(kbdev); - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING, - KBASE_RESET_GPU_PREPARED) != - KBASE_RESET_GPU_NOT_PENDING) { + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_PREPARED) != KBASE_RESET_GPU_NOT_PENDING) { /* Some other thread is already resetting the GPU */ return false; } @@ -1392,16 +1324,15 @@ void kbase_reset_gpu(struct kbase_device *kbdev) */ if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) return; - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED); if (!kbase_is_quick_reset_enabled(kbdev)) - dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", - kbdev->reset_timeout_ms); + dev_err(kbdev->dev, + "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); hrtimer_start(&kbdev->hwaccess.backend.reset_timer, - HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), - HRTIMER_MODE_REL); + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), HRTIMER_MODE_REL); /* Try resetting early */ kbasep_try_reset_gpu_early(kbdev); @@ -1415,15 +1346,14 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) */ if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) return; - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED); if (!kbase_is_quick_reset_enabled(kbdev)) - dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", - kbdev->reset_timeout_ms); + dev_err(kbdev->dev, + "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); hrtimer_start(&kbdev->hwaccess.backend.reset_timer, - HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), - HRTIMER_MODE_REL); + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), HRTIMER_MODE_REL); /* Try resetting early */ kbasep_try_reset_gpu_early_locked(kbdev); @@ -1431,26 +1361,22 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) int kbase_reset_gpu_silent(struct kbase_device *kbdev) { - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING, - KBASE_RESET_GPU_SILENT) != - KBASE_RESET_GPU_NOT_PENDING) { + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_SILENT) != KBASE_RESET_GPU_NOT_PENDING) { /* Some other thread is already resetting the GPU */ return -EAGAIN; } kbase_disjoint_state_up(kbdev); - queue_work(kbdev->hwaccess.backend.reset_workq, - &kbdev->hwaccess.backend.reset_work); + queue_work(kbdev->hwaccess.backend.reset_workq, &kbdev->hwaccess.backend.reset_work); return 0; } bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) { - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_NOT_PENDING) + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING) return false; return true; @@ -1464,8 +1390,7 @@ bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev) int kbase_reset_gpu_wait(struct kbase_device *kbdev) { wait_event(kbdev->hwaccess.backend.reset_wait, - atomic_read(&kbdev->hwaccess.backend.reset_gpu) - == KBASE_RESET_GPU_NOT_PENDING); + atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING); return 0; } @@ -1473,18 +1398,14 @@ KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait); int kbase_reset_gpu_init(struct kbase_device *kbdev) { - kbdev->hwaccess.backend.reset_workq = alloc_workqueue( - "Mali reset workqueue", 0, 1); + kbdev->hwaccess.backend.reset_workq = alloc_workqueue("Mali reset workqueue", 0, 1); if (kbdev->hwaccess.backend.reset_workq == NULL) return -ENOMEM; - INIT_WORK(&kbdev->hwaccess.backend.reset_work, - kbasep_reset_timeout_worker); + INIT_WORK(&kbdev->hwaccess.backend.reset_work, kbasep_reset_timeout_worker); - hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - kbdev->hwaccess.backend.reset_timer.function = - kbasep_reset_timer_callback; + hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbdev->hwaccess.backend.reset_timer.function = kbasep_reset_timer_callback; return 0; } @@ -1494,17 +1415,15 @@ void kbase_reset_gpu_term(struct kbase_device *kbdev) destroy_workqueue(kbdev->hwaccess.backend.reset_workq); } -static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, - const u64 affinity, const u64 limited_core_mask) +static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, const u64 affinity, + const u64 limited_core_mask) { const u64 result = affinity & limited_core_mask; #ifdef CONFIG_MALI_BIFROST_DEBUG dev_dbg(kbdev->dev, - "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n", - (unsigned long)affinity, - (unsigned long)result, - (unsigned long)limited_core_mask); + "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n", + (unsigned long)affinity, (unsigned long)result, (unsigned long)limited_core_mask); #else CSTD_UNUSED(kbdev); #endif diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h index bfd55a6e2160..2b95e97d0081 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h @@ -41,13 +41,13 @@ * @job_tail: Job tail address reported by GPU * @end_timestamp: Timestamp of job completion */ -void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, - u64 job_tail, ktime_t *end_timestamp); +void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, + ktime_t *end_timestamp); #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size) { - snprintf(js_string, js_size, "job_slot_%u", js); + (void)scnprintf(js_string, js_size, "job_slot_%u", js); return js_string; } #endif @@ -74,8 +74,8 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, * on the specified atom * @kbdev: Device pointer * @js: Job slot to stop on - * @action: The action to perform, either JSn_COMMAND_HARD_STOP or - * JSn_COMMAND_SOFT_STOP + * @action: The action to perform, either JS_COMMAND_HARD_STOP or + * JS_COMMAND_SOFT_STOP * @core_reqs: Core requirements of atom to stop * @target_katom: Atom to stop * @@ -94,8 +94,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns * @kctx: Context pointer. May be NULL * @katom: Specific atom to stop. May be NULL * @js: Job slot to hard stop - * @action: The action to perform, either JSn_COMMAND_HARD_STOP or - * JSn_COMMAND_SOFT_STOP + * @action: The action to perform, either JS_COMMAND_HARD_STOP or + * JS_COMMAND_SOFT_STOP * * If no context is provided then all jobs on the slot will be soft or hard * stopped. diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c index 7db2b353b67a..50cf19d876c5 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c @@ -32,6 +32,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include +#endif #include #include #include @@ -44,7 +47,7 @@ * * Note: HW access lock must be held */ -#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) +#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) /** * SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer. @@ -55,9 +58,8 @@ */ #define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx)) -static void kbase_gpu_release_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - ktime_t *end_timestamp); +static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + ktime_t *end_timestamp); /** * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer @@ -66,8 +68,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, * * Context: Caller must hold the HW access lock */ -static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; @@ -168,8 +169,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); - if (katom && (katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED)) + if (katom && (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)) nr++; } @@ -220,10 +220,9 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned i */ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) { - if (katom->gpu_rb_state >= - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && - ((kbase_jd_katom_is_protected(katom) && secure) || - (!kbase_jd_katom_is_protected(katom) && !secure))) + if (katom->gpu_rb_state >= KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && + ((kbase_jd_katom_is_protected(katom) && secure) || + (!kbase_jd_katom_is_protected(katom) && !secure))) return true; return false; @@ -239,8 +238,7 @@ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) * * Return: true if any atoms are in the given state, false otherwise */ -static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, - bool secure) +static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, bool secure) { unsigned int js; @@ -248,8 +246,7 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, int i; for (i = 0; i < SLOT_RB_SIZE; i++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, i); + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); if (katom) { if (check_secure_atom(katom, secure)) @@ -265,8 +262,7 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) { lockdep_assert_held(&kbdev->hwaccess_lock); - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != - KBASE_RESET_GPU_NOT_PENDING) { + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_NOT_PENDING) { /* The GPU is being reset - so prevent submission */ return 0; } @@ -274,10 +270,63 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); } +/** + * trace_atom_completion_for_gpu_metrics - Report the completion of atom for the + * purpose of emitting power/gpu_work_period + * tracepoint. + * + * @katom: Pointer to the atom that completed execution on GPU. + * @end_timestamp: Pointer to the timestamp of atom completion. May be NULL, in + * which case current time will be used. + * + * The function would also report the start for an atom that was in the HEAD_NEXT + * register. + * + * Note: Caller must hold the HW access lock. + */ +static inline void trace_atom_completion_for_gpu_metrics(struct kbase_jd_atom *const katom, + ktime_t *end_timestamp) +{ +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + u64 complete_ns; + struct kbase_context *kctx = katom->kctx; + struct kbase_jd_atom *queued = kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 1); -static void kbase_gpu_release_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - ktime_t *end_timestamp) +#ifdef CONFIG_MALI_BIFROST_DEBUG + WARN_ON(!kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 0)); +#endif + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (unlikely(queued == katom)) + return; + + /* A protected atom and a non-protected atom cannot be in the RB_SUBMITTED + * state at the same time in the job slot ringbuffer. Atom submission state + * machine prevents the submission of a non-protected atom until all + * protected atoms have completed and GPU has exited the protected mode. + * This implies that if the queued atom is in RB_SUBMITTED state, it shall + * be a protected atom and so we can return early. + */ + if (unlikely(kbase_jd_katom_is_protected(katom))) + return; + + if (likely(end_timestamp)) + complete_ns = ktime_to_ns(*end_timestamp); + else + complete_ns = ktime_get_raw_ns(); + + kbase_gpu_metrics_ctx_end_activity(kctx, complete_ns); + if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + kbase_gpu_metrics_ctx_start_activity(queued->kctx, complete_ns); +#else + CSTD_UNUSED(katom); + CSTD_UNUSED(end_timestamp); +#endif +} + +static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + ktime_t *end_timestamp) { struct kbase_context *kctx = katom->kctx; @@ -290,6 +339,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, break; case KBASE_ATOM_GPU_RB_SUBMITTED: + trace_atom_completion_for_gpu_metrics(katom, end_timestamp); kbase_kinstr_jm_atom_hw_release(katom); /* Inform power management at start/finish of atom so it can * update its GPU utilisation metrics. Mark atom as not @@ -305,12 +355,10 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, kbase_pm_release_gpu_cycle_counter_nolock(kbdev); KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom, - &kbdev->gpu_props.props.raw_props.js_features - [katom->slot_nr]); + &kbdev->gpu_props.js_features[katom->slot_nr]); KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, kctx, - &kbdev->gpu_props.props.raw_props.js_features - [katom->slot_nr]); + &kbdev->gpu_props.js_features[katom->slot_nr]); /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ fallthrough; @@ -322,30 +370,23 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: if (kbase_jd_katom_is_protected(katom) && - (katom->protected_state.enter != - KBASE_ATOM_ENTER_PROTECTED_CHECK) && - (katom->protected_state.enter != - KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { + (katom->protected_state.enter != KBASE_ATOM_ENTER_PROTECTED_CHECK) && + (katom->protected_state.enter != KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { kbase_pm_protected_override_disable(kbdev); kbase_pm_update_cores_state_nolock(kbdev); } if (kbase_jd_katom_is_protected(katom) && - (katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) kbase_pm_protected_entry_override_disable(kbdev); if (!kbase_jd_katom_is_protected(katom) && - (katom->protected_state.exit != - KBASE_ATOM_EXIT_PROTECTED_CHECK) && - (katom->protected_state.exit != - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) { + (katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_CHECK) && + (katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) { kbase_pm_protected_override_disable(kbdev); kbase_pm_update_cores_state_nolock(kbdev); } - if (katom->protected_state.enter != - KBASE_ATOM_ENTER_PROTECTED_CHECK || - katom->protected_state.exit != - KBASE_ATOM_EXIT_PROTECTED_CHECK) + if (katom->protected_state.enter != KBASE_ATOM_ENTER_PROTECTED_CHECK || + katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_CHECK) kbdev->protected_mode_transition = false; /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means @@ -379,18 +420,15 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, WARN_ON(!kbdev->protected_mode_hwcnt_disabled); kbdev->protected_mode_hwcnt_desired = true; if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbdev->protected_mode_hwcnt_disabled = false; } } if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { - if (katom->atom_flags & - KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { + if (katom->atom_flags & KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { kbase_pm_protected_l2_override(kbdev, false); - katom->atom_flags &= - ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; } } @@ -410,14 +448,12 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; } -static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS, - katom->kctx, katom, katom->jc, - katom->slot_nr, katom->event_code); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS, katom->kctx, katom, + katom->jc, katom->slot_nr, katom->event_code); kbase_gpu_release_atom(kbdev, katom, NULL); katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; } @@ -438,8 +474,7 @@ static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js) if (slot == js) continue; - if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, - KBASE_ATOM_GPU_RB_SUBMITTED)) + if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, KBASE_ATOM_GPU_RB_SUBMITTED)) return true; } @@ -471,16 +506,14 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); WARN_ONCE(!kbdev->protected_ops, - "Cannot enter protected mode: protected callbacks not specified.\n"); + "Cannot enter protected mode: protected callbacks not specified.\n"); if (kbdev->protected_ops) { /* Switch GPU to protected mode */ - err = kbdev->protected_ops->protected_mode_enable( - kbdev->protected_dev); + err = kbdev->protected_ops->protected_mode_enable(kbdev->protected_dev); if (err) { - dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", - err); + dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", err); } else { kbdev->protected_mode = true; kbase_ipa_protection_mode_switch_event(kbdev); @@ -495,7 +528,7 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); WARN_ONCE(!kbdev->protected_ops, - "Cannot exit protected mode: protected callbacks not specified.\n"); + "Cannot exit protected mode: protected callbacks not specified.\n"); if (!kbdev->protected_ops) return -EINVAL; @@ -505,8 +538,8 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) return kbase_reset_gpu_silent(kbdev); } -static int kbase_jm_protected_entry(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) +static int kbase_jm_protected_entry(struct kbase_device *kbdev, struct kbase_jd_atom **katom, + int idx, int js) { int err = 0; @@ -541,8 +574,7 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, * already removed - as atoms must be returned * in order. */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + if (idx == 0 || katom[0]->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); kbase_jm_return_atom_to_js(kbdev, katom[idx]); } @@ -556,14 +588,13 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev), "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_READY; + katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; return err; } -static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) +static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, struct kbase_jd_atom **katom, + int idx, int js) { int err = 0; @@ -583,8 +614,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, */ WARN_ON(kbdev->protected_mode_hwcnt_disabled); - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_HWCNT; + katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_HWCNT; kbdev->protected_mode_transition = true; @@ -594,16 +624,14 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, /* See if we can get away with disabling hwcnt atomically */ kbdev->protected_mode_hwcnt_desired = false; if (!kbdev->protected_mode_hwcnt_disabled) { - if (kbase_hwcnt_context_disable_atomic( - kbdev->hwcnt_gpu_ctx)) + if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) kbdev->protected_mode_hwcnt_disabled = true; } /* We couldn't disable atomically, so kick off a worker */ if (!kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_queue_work( - kbdev->hwcnt_gpu_ctx, - &kbdev->protected_mode_hwcnt_disable_work); + kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx, + &kbdev->protected_mode_hwcnt_disable_work); return -EAGAIN; } @@ -619,8 +647,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, * Entering protected mode requires us to power down the L2, * and drop out of fully coherent mode. */ - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; kbase_pm_protected_override_enable(kbdev); /* @@ -645,11 +672,9 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, return -EAGAIN; } - if (kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_L2) || - kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_L2) || - kbase_is_gpu_removed(kbdev)) { + if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2) || + kbase_is_gpu_removed(kbdev)) { /* * The L2 is still powered, wait for all * the users to finish with it before doing @@ -659,8 +684,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, } } - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; + katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; @@ -685,12 +709,10 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, * Set the flag on the atom that additional * L2 references are taken. */ - katom[idx]->atom_flags |= - KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + katom[idx]->atom_flags |= KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; } - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_FINISHED; + katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED; if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) return -EAGAIN; @@ -708,10 +730,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, * Remove additional L2 reference and reset * the atom flag which denotes it. */ - if (katom[idx]->atom_flags & - KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { - kbase_pm_protected_l2_override(kbdev, - false); + if (katom[idx]->atom_flags & KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { + kbase_pm_protected_l2_override(kbdev, false); katom[idx]->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; } @@ -737,8 +757,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, return 0; } -static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) +static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, struct kbase_jd_atom **katom, + int idx, int js) { int err = 0; @@ -759,8 +779,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, * needs to be powered down to ensure it's not active when the * reset is issued. */ - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; kbdev->protected_mode_transition = true; kbase_pm_protected_override_enable(kbdev); @@ -776,8 +795,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, */ return -EAGAIN; } - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_RESET; + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_RESET; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; @@ -804,8 +822,8 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, /* Only return if head atom or previous atom * already removed - as atoms must be returned in order */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + if (idx == 0 || + katom[0]->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); kbase_jm_return_atom_to_js(kbdev, katom[idx]); } @@ -816,16 +834,14 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, WARN_ON(!kbdev->protected_mode_hwcnt_disabled); kbdev->protected_mode_hwcnt_desired = true; if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbdev->protected_mode_hwcnt_disabled = false; } return -EINVAL; } - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; @@ -849,8 +865,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); #ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbase_reset_gpu_is_active(kbdev) || - kbase_is_gpu_removed(kbdev)) + if (kbase_reset_gpu_is_active(kbdev) || kbase_is_gpu_removed(kbdev)) #else if (kbase_reset_gpu_is_active(kbdev)) #endif @@ -866,6 +881,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) for (idx = 0; idx < SLOT_RB_SIZE; idx++) { bool cores_ready; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + bool trace_atom_submit_for_gpu_metrics = true; +#endif int ret; if (!katom[idx]) @@ -887,15 +905,12 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: - if (kbase_gpu_check_secure_atoms(kbdev, - !kbase_jd_katom_is_protected( - katom[idx]))) + if (kbase_gpu_check_secure_atoms( + kbdev, !kbase_jd_katom_is_protected(katom[idx]))) break; - if ((idx == 1) && (kbase_jd_katom_is_protected( - katom[0]) != - kbase_jd_katom_is_protected( - katom[1]))) + if ((idx == 1) && (kbase_jd_katom_is_protected(katom[0]) != + kbase_jd_katom_is_protected(katom[1]))) break; if (kbdev->protected_mode_transition) @@ -917,22 +932,19 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) */ if (!kbase_gpu_in_protected_mode(kbdev) && - kbase_jd_katom_is_protected(katom[idx])) { + kbase_jd_katom_is_protected(katom[idx])) { /* Atom needs to transition into protected mode. */ - ret = kbase_jm_enter_protected_mode(kbdev, - katom, idx, js); + ret = kbase_jm_enter_protected_mode(kbdev, katom, idx, js); if (ret) break; } else if (kbase_gpu_in_protected_mode(kbdev) && - !kbase_jd_katom_is_protected(katom[idx])) { + !kbase_jd_katom_is_protected(katom[idx])) { /* Atom needs to transition out of protected mode. */ - ret = kbase_jm_exit_protected_mode(kbdev, - katom, idx, js); + ret = kbase_jm_exit_protected_mode(kbdev, katom, idx, js); if (ret) break; } - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_CHECK; + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; /* Atom needs no protected mode transition. */ @@ -943,62 +955,64 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) fallthrough; case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: if (katom[idx]->will_fail_event_code) { - kbase_gpu_mark_atom_for_return(kbdev, - katom[idx]); + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* Set EVENT_DONE so this atom will be * completed, not unpulled. */ - katom[idx]->event_code = - BASE_JD_EVENT_DONE; + katom[idx]->event_code = BASE_JD_EVENT_DONE; /* Only return if head atom or previous * atom already removed - as atoms must * be returned in order. */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); kbase_jm_return_atom_to_js(kbdev, katom[idx]); } break; } - cores_ready = kbase_pm_cores_requested(kbdev, - true); + cores_ready = kbase_pm_cores_requested(kbdev, true); if (!cores_ready) break; - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_READY; + katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; case KBASE_ATOM_GPU_RB_READY: if (idx == 1) { + enum kbase_atom_gpu_rb_state atom_0_gpu_rb_state = + katom[0]->gpu_rb_state; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + trace_atom_submit_for_gpu_metrics = + (atom_0_gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB); +#endif + /* Only submit if head atom or previous * atom already submitted */ - if ((katom[0]->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + if ((atom_0_gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && + atom_0_gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) break; /* If intra-slot serialization in use * then don't submit atom to NEXT slot */ - if (kbdev->serialize_jobs & - KBASE_SERIALIZE_INTRA_SLOT) + if (kbdev->serialize_jobs & KBASE_SERIALIZE_INTRA_SLOT) break; } /* If inter-slot serialization in use then don't * submit atom if any other slots are in use */ - if ((kbdev->serialize_jobs & - KBASE_SERIALIZE_INTER_SLOT) && - other_slots_busy(kbdev, js)) + if ((kbdev->serialize_jobs & KBASE_SERIALIZE_INTER_SLOT) && + other_slots_busy(kbdev, js)) break; /* Check if this job needs the cycle counter @@ -1015,10 +1029,17 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) * metrics. */ kbase_pm_metrics_update(kbdev, - &katom[idx]->start_timestamp); + &katom[idx]->start_timestamp); /* Inform platform at start/finish of atom */ kbasep_platform_event_atom_submit(katom[idx]); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + if (likely(trace_atom_submit_for_gpu_metrics && + !kbase_jd_katom_is_protected(katom[idx]))) + kbase_gpu_metrics_ctx_start_activity( + katom[idx]->kctx, + ktime_to_ns(katom[idx]->start_timestamp)); +#endif } else { if (katom[idx]->core_req & BASE_JD_REQ_PERMON) kbase_pm_release_gpu_cycle_counter_nolock(kbdev); @@ -1036,11 +1057,10 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) * already removed - as atoms must be returned * in order */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + if (idx == 0 || + katom[0]->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, - katom[idx]); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); } break; } @@ -1048,9 +1068,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) } } - -void kbase_backend_run_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { lockdep_assert_held(&kbdev->hwaccess_lock); dev_dbg(kbdev->dev, "Backend running atom %pK\n", (void *)katom); @@ -1069,20 +1087,33 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, * * Return: true if @katom_b might depend on @katom_a, false if it cannot depend. */ -static inline bool -kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, - const struct kbase_jd_atom *katom_b) +static inline bool kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, + const struct kbase_jd_atom *katom_b) { if (katom_a->kctx != katom_b->kctx) return false; - return (katom_b->pre_dep || - (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED | - KBASE_KATOM_FLAG_FAIL_BLOCKER))); + return (katom_b->pre_dep || (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED | + KBASE_KATOM_FLAG_FAIL_BLOCKER))); +} + +static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + u32 action, bool disjoint) +{ + struct kbase_context *kctx = katom->kctx; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_gpu_mark_atom_for_return(kbdev, katom); + kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, katom->sched_priority); + + if (disjoint) + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); } /** - * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is - * related to a failed JSn_HEAD atom + * kbase_gpu_irq_evict - evict a slot's JS_HEAD_NEXT atom from the HW if it is + * related to a failed JS_HEAD atom * @kbdev: kbase device * @js: job slot to check * @completion_code: completion code of the failed atom @@ -1091,18 +1122,18 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, * unlike other failure codes we _can_ re-run them. * * This forms step 1 in a 2-step process of removing any related atoms from a - * slot's JSn_HEAD_NEXT (ringbuffer index 1), should there have - * been a 'failure' on an atom in JSn_HEAD (ringbuffer index 0). + * slot's JS_HEAD_NEXT (ringbuffer index 1), should there have + * been a 'failure' on an atom in JS_HEAD (ringbuffer index 0). * * This step only removes the atoms from the HW, and marks them as * (potentially) ready to run again. * - * Step 2 is on marking the JSn_HEAD atom as complete + * Step 2 is on marking the JS_HEAD atom as complete * (kbase_gpu_complete_hw()), to dequeue said atoms and return them to the JS * as appropriate, or re-submit them. * * Hence, this function must evict at a minimum the atoms related to the atom - * in JSn_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable + * in JS_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable * if this function evicts more atoms than kbase_gpu_complete_hw() dequeues, as * the next kbase_backend_slot_update() will resubmit any remaining. * @@ -1122,29 +1153,28 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple } next_katom = kbase_gpu_inspect(kbdev, js, 1); - if (next_katom && - next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && + if (next_katom && next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && (kbase_rb_atom_might_depend(katom, next_katom) || kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) && - (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 || - kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) { - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), - JS_COMMAND_NOP); - next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(js, HEAD_NEXT)) != 0) { + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT), JS_COMMAND_NOP); if (completion_code == BASE_JD_EVENT_STOPPED) { - KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom, - &kbdev->gpu_props.props.raw_props.js_features - [next_katom->slot_nr]); - KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, &kbdev->as - [next_katom->kctx->as_nr]); - KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx, - &kbdev->gpu_props.props.raw_props.js_features - [next_katom->slot_nr]); - } + kbase_gpu_remove_atom(kbdev, next_katom, JS_COMMAND_SOFT_STOP, false); + KBASE_TLSTREAM_TL_NRET_ATOM_LPU( + kbdev, next_katom, + &kbdev->gpu_props.js_features[next_katom->slot_nr]); + KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, + &kbdev->as[next_katom->kctx->as_nr]); + KBASE_TLSTREAM_TL_NRET_CTX_LPU( + kbdev, next_katom->kctx, + &kbdev->gpu_props.js_features[next_katom->slot_nr]); + } else { + next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; - if (next_katom->core_req & BASE_JD_REQ_PERMON) - kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + if (next_katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + } /* On evicting the next_katom, the last submission kctx on the * given job slot then reverts back to the one that owns katom. @@ -1161,24 +1191,24 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple } /** - * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD + * kbase_gpu_complete_hw - complete the atom in a slot's JS_HEAD * @kbdev: kbase device * @js: job slot to check * @completion_code: completion code of the completed atom - * @job_tail: value read from JSn_TAIL, for STOPPED atoms + * @job_tail: value read from JS_TAIL, for STOPPED atoms * @end_timestamp: pointer to approximate ktime value when the katom completed * * Among other operations, this also executes step 2 of a 2-step process of - * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1), - * should there have been a 'failure' on an atom in JSn_HEAD (ringbuffer index + * removing any related atoms from a slot's JS_HEAD_NEXT (ringbuffer index 1), + * should there have been a 'failure' on an atom in JS_HEAD (ringbuffer index * 0). The first step is done in kbase_gpu_irq_evict(). * * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but * unlike other failure codes we _can_ re-run them. * - * When the JSn_HEAD atom is considered to be 'failed', then this will dequeue + * When the JS_HEAD atom is considered to be 'failed', then this will dequeue * and return to the JS some (usually all) of the atoms evicted from the HW - * during the kbase_gpu_irq_evict() for that JSn_HEAD atom. If it dequeues an + * during the kbase_gpu_irq_evict() for that JS_HEAD atom. If it dequeues an * atom, that atom must not have been running or must already be evicted, as * otherwise we would be in the incorrect state of having an atom both running * on the HW and returned to the JS. @@ -1197,8 +1227,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp kctx = katom->kctx; - dev_dbg(kbdev->dev, - "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", + dev_dbg(kbdev->dev, "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", (void *)katom, completion_code, job_tail, js); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1209,15 +1238,13 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { if (completion_code == BASE_JD_EVENT_STOPPED && - (katom->atom_flags & - KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { + (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { completion_code = BASE_JD_EVENT_TERMINATED; } } if ((katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && - completion_code != BASE_JD_EVENT_DONE && - !(completion_code & BASE_JD_SW_EVENT)) { + completion_code != BASE_JD_EVENT_DONE && !(completion_code & BASE_JD_SW_EVENT)) { /* When a job chain fails, on a T60x or when * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not * flushed. To prevent future evictions causing possible memory @@ -1230,8 +1257,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); if (completion_code == BASE_JD_EVENT_STOPPED) { - struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, - 0); + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, 0); /* * Dequeue next atom from ringbuffers on same slot if required. @@ -1239,10 +1265,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that * the atoms on this slot are returned in the correct order. */ - if (next_katom && - kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) { - WARN_ON(next_katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED); + if (next_katom && kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) { + WARN_ON(next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED); kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); kbase_jm_return_atom_to_js(kbdev, next_katom); } @@ -1252,9 +1276,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) { dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", - js, completion_code, - kbase_gpu_exception_name( - completion_code)); + js, completion_code, kbase_gpu_exception_name(completion_code)); } @@ -1270,66 +1292,53 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp * atom. */ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { - struct kbase_jd_atom *katom_idx0 = - kbase_gpu_inspect(kbdev, i, 0); - struct kbase_jd_atom *katom_idx1 = - kbase_gpu_inspect(kbdev, i, 1); + struct kbase_jd_atom *katom_idx0 = kbase_gpu_inspect(kbdev, i, 0); + struct kbase_jd_atom *katom_idx1 = kbase_gpu_inspect(kbdev, i, 1); - if (katom_idx0 && - kbase_rb_atom_might_depend(katom, katom_idx0) && - katom_idx0->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom_idx0 && kbase_rb_atom_might_depend(katom, katom_idx0) && + katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Dequeue katom_idx0 from ringbuffer */ kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); - if (katom_idx1 && kbase_rb_atom_might_depend( - katom, katom_idx1) && - katom_idx0->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom_idx1 && kbase_rb_atom_might_depend(katom, katom_idx1) && + katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Dequeue katom_idx1 from ringbuffer */ - kbase_gpu_dequeue_atom(kbdev, i, - end_timestamp); + kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); - katom_idx1->event_code = - BASE_JD_EVENT_STOPPED; - kbase_jm_return_atom_to_js(kbdev, - katom_idx1); + katom_idx1->event_code = BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, katom_idx1); } katom_idx0->event_code = BASE_JD_EVENT_STOPPED; kbase_jm_return_atom_to_js(kbdev, katom_idx0); - } else if (katom_idx1 && kbase_rb_atom_might_depend( - katom, katom_idx1) && - katom_idx1->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { + } else if (katom_idx1 && kbase_rb_atom_might_depend(katom, katom_idx1) && + katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Can not dequeue this atom yet - will be * dequeued when atom at idx0 completes */ katom_idx1->event_code = BASE_JD_EVENT_STOPPED; - kbase_gpu_mark_atom_for_return(kbdev, - katom_idx1); + kbase_gpu_mark_atom_for_return(kbdev, katom_idx1); } } } - KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, js, completion_code); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, js, + completion_code); if (job_tail != 0 && job_tail != katom->jc) { /* Some of the job has been executed */ - dev_dbg(kbdev->dev, - "Update job chain address of atom %pK to resume from 0x%llx\n", + dev_dbg(kbdev->dev, "Update job chain address of atom %pK to resume from 0x%llx\n", (void *)katom, job_tail); katom->jc = job_tail; - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, - katom, job_tail, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, katom, job_tail, js); } /* Only update the event code for jobs that weren't cancelled */ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) katom->event_code = (enum base_jd_event_code)completion_code; - /* Complete the job, and start new ones + /* Complete the job, and start new ones * * Also defer remaining work onto the workqueue: * - Re-queue Soft-stopped jobs @@ -1340,19 +1349,15 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) { /* The atom in the HEAD */ - struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, - 0); + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, 0); - if (next_katom && next_katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (next_katom && next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) { char js_string[16]; - trace_gpu_sched_switch(kbasep_make_job_slot_string(js, - js_string, - sizeof(js_string)), - ktime_to_ns(*end_timestamp), - (u32)next_katom->kctx->id, 0, - next_katom->work_id); + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, + sizeof(js_string)), + ktime_to_ns(*end_timestamp), + (u32)next_katom->kctx->id, 0, next_katom->work_id); } else { char js_string[16]; @@ -1372,8 +1377,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp katom = kbase_jm_complete(kbdev, katom, end_timestamp); if (katom) { - dev_dbg(kbdev->dev, - "Cross-slot dependency %pK has become runnable.\n", + dev_dbg(kbdev->dev, "Cross-slot dependency %pK has become runnable.\n", (void *)katom); /* Check if there are lower priority jobs to soft stop */ @@ -1405,14 +1409,12 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) int idx; for (idx = 0; idx < SLOT_RB_SIZE; idx++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, atom_idx); + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, atom_idx); bool keep_in_jm_rb = false; if (!katom) break; - if (katom->protected_state.exit == - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { + if (katom->protected_state.exit == KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { /* protected mode sanity checks */ WARN(kbase_jd_katom_is_protected(katom) != kbase_gpu_in_protected_mode(kbdev), @@ -1476,9 +1478,9 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) * should_stop_next_atom - given a soft/hard stop action, determine if the next * atom on a slot should be stopped * @kbdev: kbase devices - * @head_katom: atom currently in the JSn_HEAD - * @next_katom: atom currently in the JSn_HEAD_NEXT - * @action: JS_COMMAND_<...> action for soft/hard-stop + * @head_katom: atom currently in the JS_HEAD + * @next_katom: atom currently in the JS_HEAD_NEXT + * @action: COMMAND_<...> action for soft/hard-stop * * This is used in cases where @head_katom is the target of the soft/hard-stop. * It only makes sense to call this when @head_katom and @next_katom are from @@ -1489,16 +1491,14 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) */ static bool should_stop_next_atom(struct kbase_device *kbdev, const struct kbase_jd_atom *head_katom, - const struct kbase_jd_atom *next_katom, - u32 action) + const struct kbase_jd_atom *next_katom, u32 action) { bool ret = false; u32 hw_action = action & JS_COMMAND_MASK; switch (hw_action) { case JS_COMMAND_SOFT_STOP: - ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom, - 0u); + ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom, 0u); break; case JS_COMMAND_HARD_STOP: /* Unlike soft-stop, a hard-stop targeting a particular atom @@ -1524,39 +1524,17 @@ static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int u32 hw_action = action & JS_COMMAND_MASK; kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); - kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, - katom->core_req, katom); + kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, katom->core_req, katom); kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority); } -static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - u32 action, - bool disjoint) -{ - struct kbase_context *kctx = katom->kctx; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; - kbase_gpu_mark_atom_for_return(kbdev, katom); - kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, - katom->sched_priority); - - if (disjoint) - kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, - katom); -} - static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) { if (katom->x_post_dep) { struct kbase_jd_atom *dep_atom = katom->x_post_dep; - if (dep_atom->gpu_rb_state != - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && - dep_atom->gpu_rb_state != - KBASE_ATOM_GPU_RB_RETURN_TO_JS) + if (dep_atom->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && + dep_atom->gpu_rb_state != KBASE_ATOM_GPU_RB_RETURN_TO_JS) return dep_atom->slot_nr; } return -1; @@ -1601,13 +1579,12 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_ katom_idx0_valid = (katom_idx0 && (!kctx || kctx_idx0 == kctx)); katom_idx1_valid = (katom_idx1 && (!kctx || kctx_idx1 == kctx)); } - /* If there's an atom in JSn_HEAD_NEXT that we haven't already decided - * to stop, but we're stopping the JSn_HEAD atom, see if they are + /* If there's an atom in JS_HEAD_NEXT that we haven't already decided + * to stop, but we're stopping the JS_HEAD atom, see if they are * related/ordered in some way that would require the same stop action */ if (!katom_idx1_valid && katom_idx0_valid && katom_idx1) - katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0, - katom_idx1, action); + katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0, katom_idx1, action); if (katom_idx0_valid) stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); @@ -1620,78 +1597,59 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_ kbase_gpu_dequeue_atom(kbdev, js, NULL); if (katom_idx1_valid) { kbase_gpu_dequeue_atom(kbdev, js, NULL); - katom_idx1->event_code = - BASE_JD_EVENT_REMOVED_FROM_NEXT; + katom_idx1->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_jm_return_atom_to_js(kbdev, katom_idx1); - kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js, - prio_idx1); + kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js, prio_idx1); } - katom_idx0->event_code = - BASE_JD_EVENT_REMOVED_FROM_NEXT; + katom_idx0->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_jm_return_atom_to_js(kbdev, katom_idx0); - kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js, - prio_idx0); + kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js, prio_idx0); } else { /* katom_idx0 is on GPU */ - if (katom_idx1_valid && katom_idx1->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom_idx1_valid && + katom_idx1->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) { /* katom_idx0 and katom_idx1 are on GPU */ - if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT)) == 0) { + if (kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT)) == + 0) { /* idx0 has already completed - stop * idx1 if needed */ if (katom_idx1_valid) { - kbase_gpu_stop_atom(kbdev, js, - katom_idx1, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); ret = true; } } else { /* idx1 is in NEXT registers - attempt * to remove */ - kbase_reg_write(kbdev, - JOB_SLOT_REG(js, - JS_COMMAND_NEXT), - JS_COMMAND_NOP); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT), + JS_COMMAND_NOP); - if (kbase_reg_read(kbdev, - JOB_SLOT_REG(js, - JS_HEAD_NEXT_LO)) - != 0 || - kbase_reg_read(kbdev, - JOB_SLOT_REG(js, - JS_HEAD_NEXT_HI)) - != 0) { + if (kbase_reg_read64(kbdev, + JOB_SLOT_OFFSET(js, HEAD_NEXT)) != 0) { /* idx1 removed successfully, * will be handled in IRQ */ - kbase_gpu_remove_atom(kbdev, - katom_idx1, - action, true); + kbase_gpu_remove_atom(kbdev, katom_idx1, action, + true); /* Revert the last_context. */ kbdev->hwaccess.backend.slot_rb[js] .last_kctx_tagged = SLOT_RB_TAG_KCTX(katom_idx0->kctx); stop_x_dep_idx1 = - should_stop_x_dep_slot(katom_idx1); + should_stop_x_dep_slot(katom_idx1); /* stop idx0 if still on GPU */ - kbase_gpu_stop_atom(kbdev, js, - katom_idx0, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx0, action); ret = true; } else if (katom_idx1_valid) { /* idx0 has already completed, * stop idx1 if needed */ - kbase_gpu_stop_atom(kbdev, js, - katom_idx1, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); ret = true; } } @@ -1699,8 +1657,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_ /* idx1 not on GPU but must be dequeued*/ /* idx1 will be handled in IRQ */ - kbase_gpu_remove_atom(kbdev, katom_idx1, action, - false); + kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); /* stop idx0 */ /* This will be repeated for anything removed * from the next registers, since their normal @@ -1709,14 +1666,12 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_ * don't actually do a hard stop on the head * atom */ - kbase_gpu_stop_atom(kbdev, js, katom_idx0, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx0, action); ret = true; } else { /* no atom in idx1 */ /* just stop idx0 */ - kbase_gpu_stop_atom(kbdev, js, katom_idx0, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx0, action); ret = true; } } @@ -1724,77 +1679,60 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_ if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Mark for return */ /* idx1 will be returned once idx0 completes */ - kbase_gpu_remove_atom(kbdev, katom_idx1, action, - false); + kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); } else { /* idx1 is on GPU */ - if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT)) == 0) { + if (kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT)) == 0) { /* idx0 has already completed - stop idx1 */ - kbase_gpu_stop_atom(kbdev, js, katom_idx1, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); ret = true; } else { /* idx1 is in NEXT registers - attempt to * remove */ - kbase_reg_write(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT), - JS_COMMAND_NOP); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT), + JS_COMMAND_NOP); - if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_HEAD_NEXT_LO)) != 0 || - kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_HEAD_NEXT_HI)) != 0) { + if (kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(js, HEAD_NEXT)) != 0) { /* idx1 removed successfully, will be * handled in IRQ once idx0 completes */ - kbase_gpu_remove_atom(kbdev, katom_idx1, - action, - false); + kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); /* Revert the last_context, or mark as purged */ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = kctx_idx0 ? SLOT_RB_TAG_KCTX(katom_idx0->kctx) : - SLOT_RB_TAG_PURGED; + SLOT_RB_TAG_PURGED; } else { /* idx0 has already completed - stop * idx1 */ - kbase_gpu_stop_atom(kbdev, js, - katom_idx1, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); ret = true; } } } } - if (stop_x_dep_idx0 != -1) - kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, - NULL, action); + kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, NULL, action); if (stop_x_dep_idx1 != -1) - kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, - NULL, action); + kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, NULL, action); return ret; } -void kbase_backend_cache_clean(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +void kbase_backend_cache_clean(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { if (katom->need_cache_flush_cores_retained) { - kbase_gpu_start_cache_clean(kbdev, - GPU_COMMAND_CACHE_CLN_INV_FULL); + kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_FULL); kbase_gpu_wait_cache_clean(kbdev); katom->need_cache_flush_cores_retained = false; } } -void kbase_backend_complete_wq(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +void kbase_backend_complete_wq(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { /* * If cache flush required due to HW workaround then perform the flush @@ -1803,9 +1741,10 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, kbase_backend_cache_clean(kbdev, katom); } -void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, - base_jd_core_req core_req) +void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, base_jd_core_req core_req) { + CSTD_UNUSED(core_req); + if (!kbdev->pm.active_count) { kbase_pm_lock(kbdev); kbase_pm_update_active(kbdev); @@ -1826,9 +1765,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) int idx; for (idx = 0; idx < SLOT_RB_SIZE; idx++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, - idx); + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, idx); if (katom) dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n", diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c index cbc88f91a400..c40ffbf9e089 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,34 +28,23 @@ #include #include #include +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include +#endif -#if !MALI_USE_CSF /* * Hold the runpool_mutex for this */ -static inline bool timer_callback_should_run(struct kbase_device *kbdev) +static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_running_ctxs) { - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - int nr_running_ctxs; - lockdep_assert_held(&kbdev->js_data.runpool_mutex); - /* Timer must stop if we are suspending */ - if (backend->suspend_timer) - return false; - - /* nr_contexts_pullable is updated with the runpool_mutex. However, the - * locking in the caller gives us a barrier that ensures - * nr_contexts_pullable is up-to-date for reading - */ - nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); - #ifdef CONFIG_MALI_BIFROST_DEBUG if (kbdev->js_data.softstop_always) { /* Debug support for allowing soft-stop on a single context */ return true; } -#endif /* CONFIG_MALI_BIFROST_DEBUG */ +#endif /* CONFIG_MALI_BIFROST_DEBUG */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { /* Timeouts would have to be 4x longer (due to micro- @@ -69,19 +58,16 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev) * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). */ { - int nr_compute_ctxs = - kbasep_js_ctx_attr_count_on_runpool(kbdev, - KBASEP_JS_CTX_ATTR_COMPUTE); - int nr_noncompute_ctxs = nr_running_ctxs - - nr_compute_ctxs; + int nr_compute_ctxs = kbasep_js_ctx_attr_count_on_runpool( + kbdev, KBASEP_JS_CTX_ATTR_COMPUTE); + int nr_noncompute_ctxs = nr_running_ctxs - nr_compute_ctxs; - return (bool) (nr_compute_ctxs >= 2 || - nr_noncompute_ctxs > 0); + return (bool)(nr_compute_ctxs >= 2 || nr_noncompute_ctxs > 0); } } else { /* Run the timer callback whenever you have at least 1 context */ - return (bool) (nr_running_ctxs > 0); + return (bool)(nr_running_ctxs > 0); } } @@ -96,8 +82,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) KBASE_DEBUG_ASSERT(timer != NULL); - backend = container_of(timer, struct kbase_backend_data, - scheduling_timer); + backend = container_of(timer, struct kbase_backend_data, scheduling_timer); kbdev = container_of(backend, struct kbase_device, hwaccess.backend); js_devdata = &kbdev->js_data; @@ -119,26 +104,19 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) u32 ticks = atom->ticks++; #if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP) - u32 soft_stop_ticks, hard_stop_ticks, - gpu_reset_ticks; + u32 soft_stop_ticks, hard_stop_ticks, gpu_reset_ticks; if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { - soft_stop_ticks = - js_devdata->soft_stop_ticks_cl; - hard_stop_ticks = - js_devdata->hard_stop_ticks_cl; - gpu_reset_ticks = - js_devdata->gpu_reset_ticks_cl; + soft_stop_ticks = js_devdata->soft_stop_ticks_cl; + hard_stop_ticks = js_devdata->hard_stop_ticks_cl; + gpu_reset_ticks = js_devdata->gpu_reset_ticks_cl; } else { - soft_stop_ticks = - js_devdata->soft_stop_ticks; + soft_stop_ticks = js_devdata->soft_stop_ticks; if (kbase_is_quick_reset_enabled(kbdev)) { hard_stop_ticks = 2; gpu_reset_ticks = 3; } else { - hard_stop_ticks = - js_devdata->hard_stop_ticks_ss; - gpu_reset_ticks = - js_devdata->gpu_reset_ticks_ss; + hard_stop_ticks = js_devdata->hard_stop_ticks_ss; + gpu_reset_ticks = js_devdata->gpu_reset_ticks_ss; } } @@ -149,8 +127,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * races between this worker and the thread * changing the timeouts. */ - if (backend->timeouts_updated && - ticks > soft_stop_ticks) + if (backend->timeouts_updated && ticks > soft_stop_ticks) ticks = atom->ticks = soft_stop_ticks; /* Job is Soft-Stoppable */ @@ -162,7 +139,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) */ #if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS int disjoint_threshold = - KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; + KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; u32 softstop_flags = 0u; dev_dbg(kbdev->dev, "Soft-stop"); @@ -183,13 +160,12 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * older value and register a disjoint * event when we try soft-stopping */ - if (js_devdata->nr_user_contexts_running - >= disjoint_threshold) - softstop_flags |= - JS_COMMAND_SW_CAUSES_DISJOINT; + if (js_devdata->nr_user_contexts_running >= + disjoint_threshold) + softstop_flags |= JS_COMMAND_SW_CAUSES_DISJOINT; - kbase_job_slot_softstop_swflags(kbdev, - s, atom, softstop_flags); + kbase_job_slot_softstop_swflags(kbdev, s, atom, + softstop_flags); #endif } else if (ticks == hard_stop_ticks) { /* Job has been scheduled for at least @@ -198,15 +174,13 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * now. Hard stop the slot. */ #if !KBASE_DISABLE_SCHEDULING_HARD_STOPS - int ms = - js_devdata->scheduling_period_ns - / 1000000u; + int ms = js_devdata->scheduling_period_ns / 1000000u; if (!kbase_is_quick_reset_enabled(kbdev)) - dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", - (unsigned long)ticks, - (unsigned long)ms); - kbase_job_slot_hardstop(atom->kctx, s, - atom); + dev_warn( + kbdev->dev, + "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", + (unsigned long)ticks, (unsigned long)ms); + kbase_job_slot_hardstop(atom->kctx, s, atom); #endif } else if (ticks == gpu_reset_ticks) { /* Job has been scheduled for at least @@ -217,7 +191,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) */ reset_needed = true; } -#else /* !CONFIG_MALI_JOB_DUMP */ +#else /* !CONFIG_MALI_JOB_DUMP */ /* NOTE: During CONFIG_MALI_JOB_DUMP, we use * the alternate timeouts, which makes the hard- * stop and GPU reset timeout much longer. We @@ -230,24 +204,20 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * CONFIG_MALI_JOB_DUMP, however. */ dev_dbg(kbdev->dev, "Soft-stop"); - } else if (ticks == - js_devdata->hard_stop_ticks_dumping) { + } else if (ticks == js_devdata->hard_stop_ticks_dumping) { /* Job has been scheduled for at least * js_devdata->hard_stop_ticks_dumping * ticks. Hard stop the slot. */ #if !KBASE_DISABLE_SCHEDULING_HARD_STOPS - int ms = - js_devdata->scheduling_period_ns - / 1000000u; - dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", - (unsigned long)ticks, - (unsigned long)ms); - kbase_job_slot_hardstop(atom->kctx, s, - atom); + int ms = js_devdata->scheduling_period_ns / 1000000u; + dev_warn( + kbdev->dev, + "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", + (unsigned long)ticks, (unsigned long)ms); + kbase_job_slot_hardstop(atom->kctx, s, atom); #endif - } else if (ticks == - js_devdata->gpu_reset_ticks_dumping) { + } else if (ticks == js_devdata->gpu_reset_ticks_dumping) { /* Job has been scheduled for at least * js_devdata->gpu_reset_ticks_dumping * ticks. It should have left the GPU by @@ -256,16 +226,16 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) */ reset_needed = true; } -#endif /* !CONFIG_MALI_JOB_DUMP */ +#endif /* !CONFIG_MALI_JOB_DUMP */ } } } if (reset_needed) { if (kbase_is_quick_reset_enabled(kbdev)) dev_err(kbdev->dev, "quick reset"); - else { - dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve."); - } + else + dev_err(kbdev->dev, + "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve."); if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); @@ -274,8 +244,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) if (backend->timer_running) hrtimer_start(&backend->scheduling_timer, - HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), - HRTIMER_MODE_REL); + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); backend->timeouts_updated = false; @@ -283,18 +253,19 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) return HRTIMER_NORESTART; } -#endif /* !MALI_USE_CSF */ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbasep_js_device_data *js_devdata = &kbdev->js_data; struct kbase_backend_data *backend = &kbdev->hwaccess.backend; unsigned long flags; + /* Timer must stop if we are suspending */ + const bool suspend_timer = backend->suspend_timer; + const int nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); lockdep_assert_held(&js_devdata->runpool_mutex); - if (!timer_callback_should_run(kbdev)) { + if (suspend_timer || !timer_callback_should_run(kbdev, nr_running_ctxs)) { /* Take spinlock to force synchronisation with timer */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = false; @@ -308,47 +279,70 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) hrtimer_cancel(&backend->scheduling_timer); } - if (timer_callback_should_run(kbdev) && !backend->timer_running) { + if (!suspend_timer && timer_callback_should_run(kbdev, nr_running_ctxs) && + !backend->timer_running) { /* Take spinlock to force synchronisation with timer */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); hrtimer_start(&backend->scheduling_timer, - HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), - HRTIMER_MODE_REL); + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); } -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + if (unlikely(suspend_timer)) { + js_devdata->gpu_metrics_timer_needed = false; + /* Cancel the timer as System suspend is happening */ + hrtimer_cancel(&js_devdata->gpu_metrics_timer); + js_devdata->gpu_metrics_timer_running = false; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* Explicitly emit the tracepoint on System suspend */ + kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns()); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return; + } + + if (!nr_running_ctxs) { + /* Just set the flag to not restart the timer on expiry */ + js_devdata->gpu_metrics_timer_needed = false; + return; + } + + /* There are runnable contexts so the timer is needed */ + if (!js_devdata->gpu_metrics_timer_needed) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + js_devdata->gpu_metrics_timer_needed = true; + /* No need to restart the timer if it is already running. */ + if (!js_devdata->gpu_metrics_timer_running) { + hrtimer_start(&js_devdata->gpu_metrics_timer, + HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_tp_emit_interval()), + HRTIMER_MODE_REL); + js_devdata->gpu_metrics_timer_running = true; + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif } int kbase_backend_timer_init(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); + hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); backend->scheduling_timer.function = timer_callback; backend->timer_running = false; -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ return 0; } void kbase_backend_timer_term(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbase_backend_data *backend = &kbdev->hwaccess.backend; hrtimer_cancel(&backend->scheduling_timer); -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ } void kbase_backend_timer_suspend(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c index 9ce50758c240..b0dcf67bdae9 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,8 +19,9 @@ * */ +#include + #include -#include #include #include #include "mali_kbase_l2_mmu_config.h" @@ -61,43 +62,30 @@ struct l2_mmu_config_limit { */ static const struct l2_mmu_config_limit limits[] = { /* GPU, read, write */ - {GPU_ID2_PRODUCT_LBEX, - {0, GENMASK(10, 5), 5}, - {0, GENMASK(16, 12), 12} }, - {GPU_ID2_PRODUCT_TBEX, - {0, GENMASK(10, 5), 5}, - {0, GENMASK(16, 12), 12} }, - {GPU_ID2_PRODUCT_TBAX, - {0, GENMASK(10, 5), 5}, - {0, GENMASK(16, 12), 12} }, - {GPU_ID2_PRODUCT_TTRX, - {0, GENMASK(12, 7), 7}, - {0, GENMASK(17, 13), 13} }, - {GPU_ID2_PRODUCT_TNAX, - {0, GENMASK(12, 7), 7}, - {0, GENMASK(17, 13), 13} }, - {GPU_ID2_PRODUCT_TGOX, - {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, - {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, - {GPU_ID2_PRODUCT_TNOX, - {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, - {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, + { GPU_ID_PRODUCT_LBEX, { 0, GENMASK(10, 5), 5 }, { 0, GENMASK(16, 12), 12 } }, + { GPU_ID_PRODUCT_TBEX, { 0, GENMASK(10, 5), 5 }, { 0, GENMASK(16, 12), 12 } }, + { GPU_ID_PRODUCT_TBAX, { 0, GENMASK(10, 5), 5 }, { 0, GENMASK(16, 12), 12 } }, + { GPU_ID_PRODUCT_TTRX, { 0, GENMASK(12, 7), 7 }, { 0, GENMASK(17, 13), 13 } }, + { GPU_ID_PRODUCT_TNAX, { 0, GENMASK(12, 7), 7 }, { 0, GENMASK(17, 13), 13 } }, + { GPU_ID_PRODUCT_TGOX, + { KBASE_3BIT_AID_32, GENMASK(14, 12), 12 }, + { KBASE_3BIT_AID_32, GENMASK(17, 15), 15 } }, + { GPU_ID_PRODUCT_TNOX, + { KBASE_3BIT_AID_32, GENMASK(14, 12), 12 }, + { KBASE_3BIT_AID_32, GENMASK(17, 15), 15 } }, }; int kbase_set_mmu_quirks(struct kbase_device *kbdev) { /* All older GPUs had 2 bits for both fields, this is a default */ - struct l2_mmu_config_limit limit = { - 0, /* Any GPU not in the limits array defined above */ - {KBASE_AID_32, GENMASK(25, 24), 24}, - {KBASE_AID_32, GENMASK(27, 26), 26} - }; - u32 product_model, gpu_id; - u32 mmu_config; - int i; + struct l2_mmu_config_limit limit = { 0, /* Any GPU not in the limits array defined above */ + { KBASE_AID_32, GENMASK(25, 24), 24 }, + { KBASE_AID_32, GENMASK(27, 26), 26 } }; + u32 product_model; + u32 mmu_config = 0; + unsigned int i; - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; + product_model = kbdev->gpu_props.gpu_id.product_model; /* Limit the GPU bus bandwidth if the platform needs this. */ for (i = 0; i < ARRAY_SIZE(limits); i++) { @@ -107,7 +95,8 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev) } } - mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG))) + mmu_config = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG)); if (kbase_is_gpu_removed(kbdev)) return -EIO; @@ -115,7 +104,7 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev) mmu_config &= ~(limit.read.mask | limit.write.mask); /* Can't use FIELD_PREP() macro here as the mask isn't constant */ mmu_config |= (limit.read.value << limit.read.shift) | - (limit.write.value << limit.write.shift); + (limit.write.value << limit.write.shift); kbdev->hw_quirks_mmu = mmu_config; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c index 6db7031764ea..4c6bb912105e 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -63,7 +63,8 @@ */ #include #include -#include +#include +#include #include #include @@ -80,20 +81,22 @@ static u64 ipa_ctl_select_config[KBASE_IPA_CORE_TYPE_NUM]; static bool ipa_control_timer_enabled; #endif -#define LO_MASK(M) ((M) & 0xFFFFFFFF) -#if !MALI_USE_CSF -#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) +#if MALI_USE_CSF +static u32 sysc_alloc_regs[SYSC_ALLOC_COUNT]; #endif +#define LO_MASK(M) ((M)&0xFFFFFFFF) +#define HI_MASK(M) ((M)&0xFFFFFFFF00000000) + /* Construct a value for the THREAD_FEATURES register, *except* the two most - * significant bits, which are set to IMPLEMENTATION_MODEL in + * significant bits, which are set to THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_SOFTWARE in * midgard_model_read_reg(). */ #if MALI_USE_CSF -#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ +#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24)) #else -#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ +#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24)) #endif @@ -109,7 +112,7 @@ struct error_status_t hw_error_status; * @thread_max_barrier_size: Maximum number of threads per barrier * @thread_features: Thread features, NOT INCLUDING the 2 * most-significant bits, which are always set to - * IMPLEMENTATION_MODEL. + * THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_SOFTWARE. * @core_features: Core features * @tiler_features: Tiler features * @mmu_features: MMU features @@ -144,28 +147,35 @@ struct job_slot { int job_disabled; }; +enum pwr_on_index { + INDEX_L2, + INDEX_TILER, + INDEX_SHADER, + INDEX_STACK, + INDEX_DOMAIN_COUNT +}; + struct dummy_model_t { int reset_completed; int reset_completed_mask; #if !MALI_USE_CSF int prfcnt_sample_completed; #endif /* !MALI_USE_CSF */ - int power_changed_mask; /* 2bits: _ALL,_SINGLE */ - int power_changed; /* 1bit */ + int power_changed_mask; /* 2 bits: _ALL,_SINGLE */ + int power_changed; /* 1 bit */ bool clean_caches_completed; bool clean_caches_completed_irq_enabled; #if MALI_USE_CSF bool flush_pa_range_completed; bool flush_pa_range_completed_irq_enabled; #endif - int power_on; /* 6bits: SHADER[4],TILER,L2 */ - u32 stack_power_on_lo; + uint32_t domain_power_on[INDEX_DOMAIN_COUNT]; u32 coherency_enable; unsigned int job_irq_js_state; struct job_slot slots[NUM_SLOTS]; const struct control_reg_values_t *control_reg_values; u32 l2_config; - void *data; + struct kbase_device *kbdev; }; /* Array associating GPU names with control register values. The first @@ -399,7 +409,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .gpu_features_lo = 0xf, .gpu_features_hi = 0, .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX, - .stack_present = 0xF, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tTIx", @@ -415,7 +425,23 @@ static const struct control_reg_values_t all_control_reg_values[] = { .gpu_features_lo = 0xf, .gpu_features_hi = 0, .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX, - .stack_present = 0xF, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tKRx", + .gpu_id = GPU_ID2_MAKE(13, 8, 1, 0, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x800, + .thread_max_workgroup_size = 0x400, + .thread_max_barrier_size = 0x400, + .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 16, 0), + .core_features = 0x1, /* core_1e64fma4tex */ + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0xf, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TKRX, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, }; @@ -439,12 +465,21 @@ static struct { u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; #endif /* !MALI_USE_CSF */ u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; - u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * - KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; - u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * - KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; } performance_counters; +/** + * get_implementation_register - Returns the value of the register + * + * @reg: Register address + * @control_reg_values: Struct containing the implementations of the registers + * + * Registers of the dummy model are implemented in the control_reg_values_t struct + * We are only concerned with the lower 32 bits in the dummy model + * + * Return: value of the register for the current control_reg_values_t + */ static u32 get_implementation_register(u32 reg, const struct control_reg_values_t *const control_reg_values) { @@ -457,53 +492,42 @@ static u32 get_implementation_register(u32 reg, return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT); case GPU_CONTROL_REG(STACK_PRESENT_LO): return LO_MASK(control_reg_values->stack_present); - - case GPU_CONTROL_REG(SHADER_PRESENT_HI): - case GPU_CONTROL_REG(TILER_PRESENT_HI): - case GPU_CONTROL_REG(L2_PRESENT_HI): - case GPU_CONTROL_REG(STACK_PRESENT_HI): - /* *** FALLTHROUGH *** */ default: return 0; } } + void gpu_device_set_data(void *model, void *data) { struct dummy_model_t *dummy = (struct dummy_model_t *)model; - dummy->data = data; + dummy->kbdev = data; } void *gpu_device_get_data(void *model) { struct dummy_model_t *dummy = (struct dummy_model_t *)model; - return dummy->data; + return dummy->kbdev; } #define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 -/* SCons should pass in a default GPU, but other ways of building (e.g. - * in-tree) won't, so define one here in case. - */ -#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU -#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx" -#endif - static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; module_param(no_mali_gpu, charp, 0000); MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); #if MALI_USE_CSF -static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, - u32 cnt_idx, bool is_low_word) +static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cnt_idx, + bool is_low_word) { u64 *counters_data; u32 core_count = 0; u32 event_index; u64 value = 0; u32 core; + u32 num_cores = 1; unsigned long flags; if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM)) @@ -512,12 +536,13 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, if (WARN_ON(cnt_idx >= KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS)) return 0; - event_index = - (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; + event_index = (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; - /* Currently only primary counter blocks are supported */ - if (WARN_ON(event_index >= - (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE))) + if (core_type == KBASE_IPA_CORE_TYPE_SHADER) + num_cores = KBASE_DUMMY_MODEL_MAX_SHADER_CORES; + + if (WARN_ON(event_index >= (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + + KBASE_DUMMY_MODEL_COUNTER_PER_CORE * num_cores))) return 0; /* The actual events start index 4 onwards. Spec also says PRFCNT_EN, @@ -628,8 +653,7 @@ static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_ if (!(blocks_present & (1 << block_idx))) { #if MALI_USE_CSF /* if CSF dump zeroed out block */ - memset(&prfcnt_base[*out_index], 0, - KBASE_DUMMY_MODEL_BLOCK_SIZE); + memset(&prfcnt_base[*out_index], 0, KBASE_DUMMY_MODEL_BLOCK_SIZE); *out_index += KBASE_DUMMY_MODEL_VALUES_PER_BLOCK; #endif /* MALI_USE_CSF */ continue; @@ -637,25 +661,22 @@ static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_ /* write the header */ prfcnt_base[*out_index] = performance_counters.time++; - prfcnt_base[*out_index+2] = prfcnt_enable_mask; + prfcnt_base[*out_index + 2] = prfcnt_enable_mask; *out_index += KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; /* write the counters */ - for (counter = 0; - counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE; - counter++) { + for (counter = 0; counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE; counter++) { /* HW counter values retrieved through * PRFCNT_SAMPLE request are of 32 bits only. */ counter_value = (u32)values[index++]; if (KBASE_DUMMY_MODEL_COUNTER_ENABLED( - prfcnt_enable_mask, (counter + - KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) { - prfcnt_base[*out_index + counter] = - counter_value; + prfcnt_enable_mask, + (counter + KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) { + prfcnt_base[*out_index + counter] = counter_value; } } - *out_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; + *out_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; } } @@ -672,16 +693,15 @@ static void gpu_model_dump_nolock(void) gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1, performance_counters.prfcnt_en.fe, 0x1); #endif /* !MALI_USE_CSF */ - gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, - &index, 1, + gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, &index, 1, performance_counters.prfcnt_en.tiler, DUMMY_IMPLEMENTATION_TILER_PRESENT); gpu_model_dump_prfcnt_blocks(performance_counters.l2_counters, &index, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, performance_counters.prfcnt_en.l2, performance_counters.l2_present); - gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters, - &index, KBASE_DUMMY_MODEL_MAX_SHADER_CORES, + gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters, &index, + KBASE_DUMMY_MODEL_MAX_SHADER_CORES, performance_counters.prfcnt_en.shader, performance_counters.shader_present); @@ -741,10 +761,8 @@ static void init_register_statuses(struct dummy_model_t *dummy) for (i = 0; i < NUM_SLOTS; i++) { hw_error_status.js_status[i] = 0; - hw_error_status.job_irq_rawstat |= - (dummy->slots[i].job_complete_irq_asserted) << i; - hw_error_status.job_irq_status |= - (dummy->slots[i].job_complete_irq_asserted) << i; + hw_error_status.job_irq_rawstat |= (dummy->slots[i].job_complete_irq_asserted) << i; + hw_error_status.job_irq_status |= (dummy->slots[i].job_complete_irq_asserted) << i; } for (i = 0; i < NUM_MMU_AS; i++) { hw_error_status.as_command[i] = 0; @@ -765,21 +783,17 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j if (hw_error_status.js_status[job_slot] == 0) { /* status reg is clean; it can be written */ - switch (hw_error_status.errors_mask & - IS_A_JOB_ERROR) { + switch (hw_error_status.errors_mask & IS_A_JOB_ERROR) { case KBASE_JOB_INTERRUPTED: - hw_error_status.js_status[job_slot] = - JS_STATUS_INTERRUPTED; + hw_error_status.js_status[job_slot] = JS_STATUS_INTERRUPTED; break; case KBASE_JOB_STOPPED: - hw_error_status.js_status[job_slot] = - JS_STATUS_STOPPED; + hw_error_status.js_status[job_slot] = JS_STATUS_STOPPED; break; case KBASE_JOB_TERMINATED: - hw_error_status.js_status[job_slot] = - JS_STATUS_TERMINATED; + hw_error_status.js_status[job_slot] = JS_STATUS_TERMINATED; break; case KBASE_JOB_CONFIG_FAULT: @@ -788,18 +802,15 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j break; case KBASE_JOB_POWER_FAULT: - hw_error_status.js_status[job_slot] = - JS_STATUS_POWER_FAULT; + hw_error_status.js_status[job_slot] = JS_STATUS_POWER_FAULT; break; case KBASE_JOB_READ_FAULT: - hw_error_status.js_status[job_slot] = - JS_STATUS_READ_FAULT; + hw_error_status.js_status[job_slot] = JS_STATUS_READ_FAULT; break; case KBASE_JOB_WRITE_FAULT: - hw_error_status.js_status[job_slot] = - JS_STATUS_WRITE_FAULT; + hw_error_status.js_status[job_slot] = JS_STATUS_WRITE_FAULT; break; case KBASE_JOB_AFFINITY_FAULT: @@ -808,8 +819,7 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j break; case KBASE_JOB_BUS_FAULT: - hw_error_status.js_status[job_slot] = - JS_STATUS_BUS_FAULT; + hw_error_status.js_status[job_slot] = JS_STATUS_BUS_FAULT; break; case KBASE_INSTR_INVALID_PC: @@ -868,14 +878,13 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j break; case KBASE_UNKNOWN: - hw_error_status.js_status[job_slot] = - JS_STATUS_UNKNOWN; + hw_error_status.js_status[job_slot] = JS_STATUS_UNKNOWN; break; default: model_error_log(KBASE_CORE, - "\nAtom Chain 0x%llx: Invalid Error Mask!", - hw_error_status.current_jc); + "\nAtom Chain 0x%llx: Invalid Error Mask!", + hw_error_status.current_jc); break; } } @@ -883,32 +892,27 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j /* we set JOB_FAIL_ */ hw_error_status.job_irq_rawstat |= - (dummy->slots[job_slot].job_complete_irq_asserted) << - (job_slot + 16); + (dummy->slots[job_slot].job_complete_irq_asserted) + << (job_slot + 16); hw_error_status.job_irq_status |= - (((dummy->slots[job_slot].job_complete_irq_asserted) << - (job_slot)) & - (dummy->slots[job_slot].job_irq_mask << - job_slot)) << 16; + (((dummy->slots[job_slot].job_complete_irq_asserted) + << (job_slot)) & + (dummy->slots[job_slot].job_irq_mask << job_slot)) + << 16; } else { hw_error_status.job_irq_rawstat |= - (dummy->slots[job_slot].job_complete_irq_asserted) << - job_slot; + (dummy->slots[job_slot].job_complete_irq_asserted) << job_slot; hw_error_status.job_irq_status |= - ((dummy->slots[job_slot].job_complete_irq_asserted) << - (job_slot)) & - (dummy->slots[job_slot].job_irq_mask << - job_slot); + ((dummy->slots[job_slot].job_complete_irq_asserted) << (job_slot)) & + (dummy->slots[job_slot].job_irq_mask << job_slot); } } else { hw_error_status.job_irq_rawstat |= - (dummy->slots[job_slot].job_complete_irq_asserted) << - job_slot; + (dummy->slots[job_slot].job_complete_irq_asserted) << job_slot; hw_error_status.job_irq_status |= - ((dummy->slots[job_slot].job_complete_irq_asserted) << - (job_slot)) & + ((dummy->slots[job_slot].job_complete_irq_asserted) << (job_slot)) & (dummy->slots[job_slot].job_irq_mask << job_slot); - } /* end of job register statuses */ + } /* end of job register statuses */ if (hw_error_status.errors_mask & IS_A_MMU_ERROR) { int i; @@ -916,27 +920,22 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j for (i = 0; i < NUM_MMU_AS; i++) { if (i == hw_error_status.faulty_mmu_as) { if (hw_error_status.as_faultstatus[i] == 0) { - u32 status = - hw_error_status.as_faultstatus[i]; + u32 status = hw_error_status.as_faultstatus[i]; /* status reg is clean; it can be * written */ - switch (hw_error_status.errors_mask & - IS_A_MMU_ERROR) { + switch (hw_error_status.errors_mask & IS_A_MMU_ERROR) { case KBASE_TRANSLATION_FAULT: /* 0xCm means TRANSLATION FAULT * (m is mmu_table_level) */ - status = - ((1 << 7) | (1 << 6) | - hw_error_status.mmu_table_level - ); + status = ((1 << 7) | (1 << 6) | + hw_error_status.mmu_table_level); break; case KBASE_PERMISSION_FAULT: /*0xC8 means PERMISSION FAULT */ - status = ((1 << 7) | (1 << 6) | - (1 << 3)); + status = ((1 << 7) | (1 << 6) | (1 << 3)); break; case KBASE_TRANSTAB_BUS_FAULT: @@ -944,38 +943,34 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j * BUS FAULT (m is * mmu_table_level) */ - status = ((1 << 7) | (1 << 6) | - (1 << 4) | - hw_error_status.mmu_table_level - ); + status = ((1 << 7) | (1 << 6) | (1 << 4) | + hw_error_status.mmu_table_level); break; case KBASE_ACCESS_FLAG: /* 0xD8 means ACCESS FLAG */ - status = ((1 << 7) | (1 << 6) | - (1 << 4) | (1 << 3)); + status = + ((1 << 7) | (1 << 6) | (1 << 4) | (1 << 3)); break; default: - model_error_log(KBASE_CORE, - "\nAtom Chain 0x%llx: Invalid Error Mask!", - hw_error_status.current_jc); + model_error_log( + KBASE_CORE, + "\nAtom Chain 0x%llx: Invalid Error Mask!", + hw_error_status.current_jc); break; } - hw_error_status.as_faultstatus[i] = - status; + hw_error_status.as_faultstatus[i] = status; } - if (hw_error_status.errors_mask & - KBASE_TRANSTAB_BUS_FAULT) + if (hw_error_status.errors_mask & KBASE_TRANSTAB_BUS_FAULT) hw_error_status.mmu_irq_rawstat |= 1 << (16 + i); /* bus error */ else - hw_error_status.mmu_irq_rawstat |= - 1 << i; /* page fault */ + hw_error_status.mmu_irq_rawstat |= 1 << i; /* page fault */ } } - } /*end of mmu register statuses */ + } /*end of mmu register statuses */ if (hw_error_status.errors_mask & IS_A_GPU_ERROR) { if (hw_error_status.gpu_fault_status) { /* not the first GPU error reported */ @@ -988,19 +983,18 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j break; case KBASE_SHAREABILITY_FAULT: - hw_error_status.gpu_fault_status = (1 << 7) | - (1 << 3); + hw_error_status.gpu_fault_status = (1 << 7) | (1 << 3); break; default: model_error_log(KBASE_CORE, - "\nAtom Chain 0x%llx: Invalid Error Mask!", + "\nAtom Chain 0x%llx: Invalid Error Mask!", hw_error_status.current_jc); break; } } } - hw_error_status.errors_mask = 0; /*clear error mask */ + hw_error_status.errors_mask = 0; /*clear error mask */ } #if !MALI_USE_CSF @@ -1017,22 +1011,19 @@ static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask) if ((mask & (1 << i)) || (mask & (1 << (i + 16)))) { /* clear the bits we're updating */ - dummy->job_irq_js_state &= ~((1 << (16 + i)) | - (1 << i)); + dummy->job_irq_js_state &= ~((1 << (16 + i)) | (1 << i)); if (hw_error_status.js_status[i]) { - dummy->job_irq_js_state |= next_busy << - (i + 16); + dummy->job_irq_js_state |= next_busy << (i + 16); if (mask & (1 << (i + 16))) { /* clear job slot status */ hw_error_status.js_status[i] = 0; /* continue execution of jobchain */ - dummy->slots[i].job_active = - dummy->slots[i].job_queued; + dummy->slots[i].job_active = dummy->slots[i].job_queued; } } else { /* set bits if needed */ - dummy->job_irq_js_state |= ((slot_active << i) | - (next_busy << (i + 16))); + dummy->job_irq_js_state |= + ((slot_active << i) | (next_busy << (i + 16))); } } } @@ -1070,7 +1061,7 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp } for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) { - const struct control_reg_values_t * const fcrv = &all_control_reg_values[i]; + const struct control_reg_values_t *const fcrv = &all_control_reg_values[i]; if (!strcmp(fcrv->name, gpu)) { ret = fcrv; @@ -1081,8 +1072,8 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp if (!ret) { ret = &all_control_reg_values[0]; - pr_warn("Couldn't find control register values for GPU %s; using default %s\n", - gpu, ret->name); + pr_warn("Couldn't find control register values for GPU %s; using default %s\n", gpu, + ret->name); } return ret; @@ -1101,10 +1092,12 @@ void *midgard_model_create(struct kbase_device *kbdev) dummy->job_irq_js_state = 0; init_register_statuses(dummy); dummy->control_reg_values = find_control_reg_values(no_mali_gpu); - performance_counters.l2_present = get_implementation_register( - GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); - performance_counters.shader_present = get_implementation_register( - GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); + { + performance_counters.l2_present = get_implementation_register( + GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); + performance_counters.shader_present = get_implementation_register( + GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); + } gpu_device_set_data(dummy, kbdev); @@ -1165,7 +1158,7 @@ static void midgard_model_update(void *h) * the head registers - which has not yet been read */ if ((hw_error_status.job_irq_rawstat & (1 << (i + 16))) || - (hw_error_status.job_irq_rawstat & (1 << i))) { + (hw_error_status.job_irq_rawstat & (1 << i))) { continue; } @@ -1173,7 +1166,7 @@ static void midgard_model_update(void *h) signal_int(dummy, i); #ifdef CONFIG_MALI_BIFROST_ERROR_INJECT midgard_set_error(i); -#endif /* CONFIG_MALI_BIFROST_ERROR_INJECT */ +#endif /* CONFIG_MALI_BIFROST_ERROR_INJECT */ update_register_statuses(dummy, i); /*if this job slot returned failures we cannot use it */ if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) { @@ -1186,7 +1179,7 @@ static void midgard_model_update(void *h) if (dummy->slots[i].job_active) { if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) model_error_log(KBASE_CORE, - "\natom %lld running a job on a dirty slot", + "\natom %lld running a job on a dirty slot", hw_error_status.current_jc); } } @@ -1202,7 +1195,7 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) if (dummy->slots[i].job_active) { hw_error_status.job_irq_rawstat |= (1 << (16 + i)); - hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/ + hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/ } } } @@ -1215,57 +1208,48 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) spin_lock_irqsave(&hw_error_status.access_lock, flags); #if !MALI_USE_CSF - if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && - (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { + if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { unsigned int slot_idx = (addr >> 7) & 0xf; KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) { - hw_error_status.current_jc &= - ~((u64) (0xFFFFFFFF)); - hw_error_status.current_jc |= (u64) value; + hw_error_status.current_jc &= ~((u64)(0xFFFFFFFF)); + hw_error_status.current_jc |= (u64)value; } if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_HI)) { - hw_error_status.current_jc &= (u64) 0xFFFFFFFF; - hw_error_status.current_jc |= - ((u64) value) << 32; + hw_error_status.current_jc &= (u64)0xFFFFFFFF; + hw_error_status.current_jc |= ((u64)value) << 32; } - if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && - value == 1) { + if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value == 1) { pr_debug("%s", "start detected"); KBASE_DEBUG_ASSERT(!dummy->slots[slot_idx].job_active || - !dummy->slots[slot_idx].job_queued); + !dummy->slots[slot_idx].job_queued); if ((dummy->slots[slot_idx].job_active) || - (hw_error_status.job_irq_rawstat & - (1 << (slot_idx + 16)))) { - pr_debug("~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending ~~~~~~~~~" - ); + (hw_error_status.job_irq_rawstat & (1 << (slot_idx + 16)))) { + pr_debug( + "~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending ~~~~~~~~~"); dummy->slots[slot_idx].job_queued = 1; } else { dummy->slots[slot_idx].job_active = 1; } } - if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value == - 0) + if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value == 0) dummy->slots[slot_idx].job_queued = 0; if ((addr == JOB_SLOT_REG(slot_idx, JS_COMMAND)) && - (value == JS_COMMAND_SOFT_STOP || - value == JS_COMMAND_HARD_STOP)) { + (value == JS_COMMAND_SOFT_STOP || value == JS_COMMAND_HARD_STOP)) { /*dummy->slots[slot_idx].job_active = 0; */ hw_error_status.current_job_slot = slot_idx; if (value == JS_COMMAND_SOFT_STOP) { hw_error_status.errors_mask = KBASE_JOB_STOPPED; - } else { /*value == 3 */ + } else { /*value == 3 */ if (dummy->slots[slot_idx].job_disabled != 0) { - pr_debug("enabling slot after HARD_STOP" - ); + pr_debug("enabling slot after HARD_STOP"); dummy->slots[slot_idx].job_disabled = 0; } - hw_error_status.errors_mask = - KBASE_JOB_TERMINATED; + hw_error_status.errors_mask = KBASE_JOB_TERMINATED; } } } else if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { @@ -1289,8 +1273,7 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) for (i = 0; i < NUM_SLOTS; i++) dummy->slots[i].job_irq_mask = (value >> i) & 0x01; pr_debug("job irq mask to value %x", value); - } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { -#else /* !MALI_USE_CSF */ +#else /* MALI_USE_CSF */ if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { pr_debug("%s", "job irq cleared"); @@ -1298,8 +1281,8 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) hw_error_status.job_irq_status &= ~(value); } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ - } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { #endif /* !MALI_USE_CSF */ + } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { pr_debug("GPU_IRQ_MASK set to 0x%x", value); dummy->reset_completed_mask = (value >> 8) & 0x01; dummy->power_changed_mask = (value >> 9) & 0x03; @@ -1310,14 +1293,14 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) } else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) { dummy->coherency_enable = value; } else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) { - if (value & (1 << 8)) { + if (value & RESET_COMPLETED) { pr_debug("%s", "gpu RESET_COMPLETED irq cleared"); dummy->reset_completed = 0; } - if (value & (3 << 9)) + if (value & (POWER_CHANGED_SINGLE | POWER_CHANGED_ALL)) dummy->power_changed = 0; - if (value & (1 << 17)) + if (value & CLEAN_CACHES_COMPLETED) dummy->clean_caches_completed = false; #if MALI_USE_CSF @@ -1376,29 +1359,31 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) #endif } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { dummy->l2_config = value; - } #if MALI_USE_CSF - else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) && - addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET + - (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) { - if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET)) + } else if (addr >= CSF_HW_DOORBELL_PAGE_OFFSET && + addr < CSF_HW_DOORBELL_PAGE_OFFSET + + (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { + if (addr == CSF_HW_DOORBELL_PAGE_OFFSET) hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { + u32 alloc_reg = (addr - GPU_CONTROL_REG(SYSC_ALLOC0)) >> 2; + + sysc_alloc_regs[alloc_reg] = value; + } else if ((addr >= GPU_CONTROL_REG(L2_SLICE_HASH_0)) && + (addr < GPU_CONTROL_REG(L2_SLICE_HASH(L2_SLICE_HASH_COUNT)))) { /* Do nothing */ - } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && - (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { - /* Do nothing */ - } else if (addr == IPA_CONTROL_REG(COMMAND)) { + } else if (addr == IPA_CONTROL_REG(COMMAND) + ) { pr_debug("Received IPA_CONTROL command"); - } else if (addr == IPA_CONTROL_REG(TIMER)) { + } else if (addr == IPA_CONTROL_REG(TIMER) + ) { ipa_control_timer_enabled = value ? true : false; } else if ((addr >= IPA_CONTROL_REG(SELECT_CSHW_LO)) && (addr <= IPA_CONTROL_REG(SELECT_SHADER_HI))) { - enum kbase_ipa_core_type core_type = (enum kbase_ipa_core_type)( - (addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3); - bool is_low_word = - !((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7); + enum kbase_ipa_core_type core_type = + (enum kbase_ipa_core_type)((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3); + bool is_low_word = !((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7); if (is_low_word) { ipa_ctl_select_config[core_type] &= ~(u64)U32_MAX; @@ -1407,87 +1392,72 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) ipa_ctl_select_config[core_type] &= U32_MAX; ipa_ctl_select_config[core_type] |= ((u64)value << 32); } - } #endif - else if (addr == MMU_REG(MMU_IRQ_MASK)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) { hw_error_status.mmu_irq_mask = value; - } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_CLEAR)) { hw_error_status.mmu_irq_rawstat &= (~value); - } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) { - int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) - >> 6; + } else if ((addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) && + (addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS)))) { + int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; switch (addr & 0x3F) { case AS_COMMAND: - switch (value) { - case AS_COMMAND_NOP: - hw_error_status.as_command[mem_addr_space] = - value; + switch (AS_COMMAND_COMMAND_GET(value)) { + case AS_COMMAND_COMMAND_NOP: + hw_error_status.as_command[mem_addr_space] = value; break; - case AS_COMMAND_UPDATE: - hw_error_status.as_command[mem_addr_space] = - value; - if ((hw_error_status.as_faultstatus[ - mem_addr_space]) - && ((hw_error_status.as_transtab[ - mem_addr_space] & 0x3) != 0)) { - model_error_log(KBASE_CORE, - "\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n" - ); - } else if ((hw_error_status.as_faultstatus[ - mem_addr_space]) - && ((hw_error_status.as_transtab[ - mem_addr_space] & 0x3) == 0)) { - + case AS_COMMAND_COMMAND_UPDATE: + hw_error_status.as_command[mem_addr_space] = value; + if ((hw_error_status.as_faultstatus[mem_addr_space]) && + ((hw_error_status.as_transtab[mem_addr_space] & 0x3) != 0)) { + model_error_log( + KBASE_CORE, + "\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n"); + } else if ((hw_error_status.as_faultstatus[mem_addr_space]) && + ((hw_error_status.as_transtab[mem_addr_space] & 0x3) == + 0)) { /*invalidate all active jobs */ invalidate_active_jobs(dummy); /* error handled */ - hw_error_status.as_faultstatus[ - mem_addr_space] = 0; + hw_error_status.as_faultstatus[mem_addr_space] = 0; } break; - case AS_COMMAND_LOCK: - case AS_COMMAND_UNLOCK: - hw_error_status.as_command[mem_addr_space] = - value; + case AS_COMMAND_COMMAND_LOCK: + case AS_COMMAND_COMMAND_UNLOCK: + hw_error_status.as_command[mem_addr_space] = value; break; - case AS_COMMAND_FLUSH_PT: - case AS_COMMAND_FLUSH_MEM: - if (hw_error_status.as_command[mem_addr_space] - != AS_COMMAND_LOCK) - model_error_log(KBASE_CORE, - "\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n" - ); + case AS_COMMAND_COMMAND_FLUSH_PT: + case AS_COMMAND_COMMAND_FLUSH_MEM: + if (hw_error_status.as_command[mem_addr_space] != + AS_COMMAND_COMMAND_LOCK) + model_error_log( + KBASE_CORE, + "\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n"); else /* error handled if any */ - hw_error_status.as_faultstatus[ - mem_addr_space] = 0; - hw_error_status.as_command[mem_addr_space] = - value; + hw_error_status.as_faultstatus[mem_addr_space] = 0; + hw_error_status.as_command[mem_addr_space] = value; break; default: model_error_log(KBASE_CORE, - "\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n", - value); + "\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n", + value); break; } break; case AS_TRANSTAB_LO: - hw_error_status.as_transtab[mem_addr_space] &= - ~((u64) (0xffffffff)); - hw_error_status.as_transtab[mem_addr_space] |= - (u64) value; + hw_error_status.as_transtab[mem_addr_space] &= ~((u64)(0xffffffff)); + hw_error_status.as_transtab[mem_addr_space] |= (u64)value; break; case AS_TRANSTAB_HI: - hw_error_status.as_transtab[mem_addr_space] &= - (u64) 0xffffffff; - hw_error_status.as_transtab[mem_addr_space] |= - ((u64) value) << 32; + hw_error_status.as_transtab[mem_addr_space] &= (u64)0xffffffff; + hw_error_status.as_transtab[mem_addr_space] |= ((u64)value) << 32; break; case AS_LOCKADDR_LO: @@ -1500,9 +1470,10 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) break; default: - model_error_log(KBASE_CORE, + model_error_log( + KBASE_CORE, "Dummy model register access: Writing unsupported MMU #%d register 0x%x value 0x%x\n", - mem_addr_space, addr, value); + mem_addr_space, addr, value); break; } } else { @@ -1534,64 +1505,71 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) break; #endif /* !MALI_USE_CSF */ case TILER_PWRON_LO: - dummy->power_on |= (value & 1) << 1; + dummy->domain_power_on[INDEX_TILER] |= value & + DUMMY_IMPLEMENTATION_TILER_PRESENT; /* Also ensure L2 is powered on */ - dummy->power_on |= value & 1; + fallthrough; + case L2_PWRON_LO: + dummy->domain_power_on[INDEX_L2] |= value & DUMMY_IMPLEMENTATION_L2_PRESENT; dummy->power_changed = 1; break; case SHADER_PWRON_LO: - dummy->power_on |= - (value & dummy->control_reg_values->shader_present) << 2; - dummy->power_changed = 1; - break; - case L2_PWRON_LO: - dummy->power_on |= value & 1; + dummy->domain_power_on[INDEX_SHADER] |= + value & dummy->control_reg_values->shader_present; dummy->power_changed = 1; break; case STACK_PWRON_LO: - dummy->stack_power_on_lo |= value; - dummy->power_changed = 1; - break; - case TILER_PWROFF_LO: - dummy->power_on &= ~((value & 1) << 1); - dummy->power_changed = 1; - break; - case SHADER_PWROFF_LO: - dummy->power_on &= - ~((value & dummy->control_reg_values->shader_present) << 2); - dummy->power_changed = 1; - break; - case L2_PWROFF_LO: - dummy->power_on &= ~(value & 1); - /* Also ensure tiler is powered off */ - dummy->power_on &= ~((value & 1) << 1); - dummy->power_changed = 1; - break; - case STACK_PWROFF_LO: - dummy->stack_power_on_lo &= ~value; + dummy->domain_power_on[INDEX_STACK] |= + value & dummy->control_reg_values->stack_present; dummy->power_changed = 1; break; + case L2_PWROFF_LO: + dummy->domain_power_on[INDEX_L2] &= + ~(value & DUMMY_IMPLEMENTATION_L2_PRESENT); + /* Also ensure tiler is powered off */ + fallthrough; + case TILER_PWROFF_LO: + dummy->domain_power_on[INDEX_TILER] &= + ~(value & DUMMY_IMPLEMENTATION_TILER_PRESENT); + dummy->power_changed = 1; + break; + case SHADER_PWROFF_LO: + dummy->domain_power_on[INDEX_SHADER] &= + ~(value & dummy->control_reg_values->shader_present); + dummy->power_changed = 1; + break; + case STACK_PWROFF_LO: + dummy->domain_power_on[INDEX_STACK] &= + ~(value & dummy->control_reg_values->stack_present); + dummy->power_changed = 1; + break; + + case TILER_PWRON_HI: + case SHADER_PWRON_HI: + case L2_PWRON_HI: case TILER_PWROFF_HI: case SHADER_PWROFF_HI: case L2_PWROFF_HI: case PWR_KEY: case PWR_OVERRIDE0: -#if !MALI_USE_CSF +#if MALI_USE_CSF + case SHADER_PWRFEATURES: + case CSF_CONFIG: +#else /* !MALI_USE_CSF */ case JM_CONFIG: case PRFCNT_CONFIG: -#else /* !MALI_USE_CSF */ - case CSF_CONFIG: -#endif /* !MALI_USE_CSF */ +#endif /* MALI_USE_CSF */ case SHADER_CONFIG: case TILER_CONFIG: case L2_MMU_CONFIG: /* Writes ignored */ break; default: - model_error_log(KBASE_CORE, + model_error_log( + KBASE_CORE, "Dummy model register access: Writing unsupported register 0x%x value 0x%x\n", - addr, value); + addr, value); break; } } @@ -1608,7 +1586,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) spin_lock_irqsave(&hw_error_status.access_lock, flags); - *value = 0; /* 0 by default */ + *value = 0; /* 0 by default */ #if !MALI_USE_CSF if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) { pr_debug("%s", "JS_ACTIVE being read"); @@ -1626,21 +1604,19 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) } else if (addr == JOB_CONTROL_REG(JOB_IRQ_STATUS)) { *value = hw_error_status.job_irq_status; pr_debug("JS_IRQ_STATUS being read %x", *value); - } #if !MALI_USE_CSF - else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { + } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { int i; *value = 0; for (i = 0; i < NUM_SLOTS; i++) *value |= dummy->slots[i].job_irq_mask << i; pr_debug("JS_IRQ_MASK being read %x", *value); - } #else /* !MALI_USE_CSF */ - else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) - ; /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ + } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { + /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ #endif /* !MALI_USE_CSF */ - else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { + } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { *value = (dummy->reset_completed_mask << 8) | ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) | #if MALI_USE_CSF @@ -1649,37 +1625,36 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) (dummy->power_changed_mask << 9) | (1 << 7) | 1; pr_debug("GPU_IRQ_MASK read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { - *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) | - (dummy->reset_completed << 8) | -#if !MALI_USE_CSF - (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | -#endif /* !MALI_USE_CSF */ - ((dummy->clean_caches_completed ? 1u : 0u) << 17) | + *value = ((dummy->clean_caches_completed ? 1u : 0u) << 17) | #if MALI_USE_CSF ((dummy->flush_pa_range_completed ? 1u : 0u) << 20) | +#else + (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | #endif hw_error_status.gpu_error_irq; + *value |= (dummy->power_changed << 9) | (dummy->power_changed << 10) | + (dummy->reset_completed << 8); + pr_debug("GPU_IRQ_RAWSTAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { - *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | - ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | - ((dummy->reset_completed & dummy->reset_completed_mask) << 8) | -#if !MALI_USE_CSF - (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | -#endif /* !MALI_USE_CSF */ - (((dummy->clean_caches_completed && + *value = (((dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ? - 1u : - 0u) + 1u : + 0u) << 17) | #if MALI_USE_CSF (((dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) ? - 1u : - 0u) + 1u : + 0u) << 20) | +#else + (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | #endif hw_error_status.gpu_error_irq; + *value |= ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | + ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | + ((dummy->reset_completed & dummy->reset_completed_mask) << 8); pr_debug("GPU_IRQ_STAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { *value = 0; @@ -1691,18 +1666,17 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = hw_error_status.gpu_fault_status; } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { *value = dummy->l2_config; - } #if MALI_USE_CSF - else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && - (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { + } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && + (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { + u32 alloc_reg = (addr - GPU_CONTROL_REG(SYSC_ALLOC0)) >> 2; + *value = sysc_alloc_regs[alloc_reg]; + } else if ((addr >= GPU_CONTROL_REG(L2_SLICE_HASH_0)) && + (addr < GPU_CONTROL_REG(L2_SLICE_HASH(L2_SLICE_HASH_COUNT)))) { *value = 0; - } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && - (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { - *value = 0; - } #endif - else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && - (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { + } else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && + (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { switch (addr) { case GPU_CONTROL_REG(SHADER_PRESENT_LO): case GPU_CONTROL_REG(SHADER_PRESENT_HI): @@ -1715,22 +1689,22 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = get_implementation_register(addr, dummy->control_reg_values); break; case GPU_CONTROL_REG(SHADER_READY_LO): - *value = (dummy->power_on >> 0x02) & + *value = (dummy->domain_power_on[INDEX_SHADER]) & get_implementation_register(GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); break; case GPU_CONTROL_REG(TILER_READY_LO): - *value = (dummy->power_on >> 0x01) & + *value = (dummy->domain_power_on[INDEX_TILER]) & get_implementation_register(GPU_CONTROL_REG(TILER_PRESENT_LO), dummy->control_reg_values); break; case GPU_CONTROL_REG(L2_READY_LO): - *value = dummy->power_on & + *value = dummy->domain_power_on[INDEX_L2] & get_implementation_register(GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); break; case GPU_CONTROL_REG(STACK_READY_LO): - *value = dummy->stack_power_on_lo & + *value = dummy->domain_power_on[INDEX_STACK] & get_implementation_register(GPU_CONTROL_REG(STACK_PRESENT_LO), dummy->control_reg_values); break; @@ -1739,38 +1713,33 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) case GPU_CONTROL_REG(TILER_READY_HI): case GPU_CONTROL_REG(L2_READY_HI): case GPU_CONTROL_REG(STACK_READY_HI): - *value = 0; - break; - case GPU_CONTROL_REG(SHADER_PWRTRANS_LO): - case GPU_CONTROL_REG(SHADER_PWRTRANS_HI): - case GPU_CONTROL_REG(TILER_PWRTRANS_LO): - case GPU_CONTROL_REG(TILER_PWRTRANS_HI): case GPU_CONTROL_REG(L2_PWRTRANS_LO): case GPU_CONTROL_REG(L2_PWRTRANS_HI): + case GPU_CONTROL_REG(TILER_PWRTRANS_LO): + case GPU_CONTROL_REG(TILER_PWRTRANS_HI): + case GPU_CONTROL_REG(SHADER_PWRTRANS_LO): + case GPU_CONTROL_REG(SHADER_PWRTRANS_HI): case GPU_CONTROL_REG(STACK_PWRTRANS_LO): case GPU_CONTROL_REG(STACK_PWRTRANS_HI): - *value = 0; - break; - case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO): - case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI): - case GPU_CONTROL_REG(TILER_PWRACTIVE_LO): - case GPU_CONTROL_REG(TILER_PWRACTIVE_HI): case GPU_CONTROL_REG(L2_PWRACTIVE_LO): case GPU_CONTROL_REG(L2_PWRACTIVE_HI): - *value = 0; - break; + case GPU_CONTROL_REG(TILER_PWRACTIVE_LO): + case GPU_CONTROL_REG(TILER_PWRACTIVE_HI): + case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO): + case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI): -#if !MALI_USE_CSF - case GPU_CONTROL_REG(JM_CONFIG): -#else /* !MALI_USE_CSF */ +#if MALI_USE_CSF + case GPU_CONTROL_REG(SHADER_PWRFEATURES): case GPU_CONTROL_REG(CSF_CONFIG): -#endif /* !MALI_USE_CSF */ - +#else /* !MALI_USE_CSF */ + case GPU_CONTROL_REG(JM_CONFIG): +#endif /* MALI_USE_CSF */ case GPU_CONTROL_REG(SHADER_CONFIG): case GPU_CONTROL_REG(TILER_CONFIG): case GPU_CONTROL_REG(L2_MMU_CONFIG): + case GPU_CONTROL_REG(THREAD_TLS_ALLOC): *value = 0; break; @@ -1781,43 +1750,39 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = dummy->coherency_enable; break; - case GPU_CONTROL_REG(THREAD_TLS_ALLOC): - *value = 0; - break; - default: - model_error_log(KBASE_CORE, - "Dummy model register access: Reading unknown control reg 0x%x\n", - addr); + model_error_log( + KBASE_CORE, + "Dummy model register access: Reading unknown control reg 0x%x\n", + addr); break; } #if !MALI_USE_CSF } else if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && - (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { + (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { int slot_idx = (addr >> 7) & 0xf; int sub_reg = addr & 0x7F; KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); switch (sub_reg) { case JS_HEAD_NEXT_LO: - *value = (u32) ((hw_error_status.current_jc) & - 0xFFFFFFFF); + *value = (u32)((hw_error_status.current_jc) & 0xFFFFFFFF); break; case JS_HEAD_NEXT_HI: - *value = (u32) (hw_error_status.current_jc >> 32); + *value = (u32)(hw_error_status.current_jc >> 32); break; case JS_STATUS: if (hw_error_status.js_status[slot_idx]) *value = hw_error_status.js_status[slot_idx]; else /* 0x08 means active, 0x00 idle */ - *value = (dummy->slots[slot_idx].job_active) - << 3; + *value = (dummy->slots[slot_idx].job_active) << 3; break; case JS_COMMAND_NEXT: *value = dummy->slots[slot_idx].job_queued; break; - /* The dummy model does not implement these registers + /** + * The dummy model does not implement these registers * avoid printing error messages */ case JS_HEAD_HI: @@ -1828,20 +1793,19 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) break; default: - model_error_log(KBASE_CORE, + model_error_log( + KBASE_CORE, "Dummy model register access: unknown job slot reg 0x%02X being read\n", - sub_reg); + sub_reg); break; } -#endif /* !MALI_USE_CSF */ - } else if (addr == GPU_CONTROL_REG(AS_PRESENT)) { - *value = dummy->control_reg_values->as_present; -#if !MALI_USE_CSF } else if (addr == GPU_CONTROL_REG(JS_PRESENT)) { *value = 0x7; #endif /* !MALI_USE_CSF */ + } else if (addr == GPU_CONTROL_REG(AS_PRESENT)) { + *value = dummy->control_reg_values->as_present; } else if (addr >= GPU_CONTROL_REG(TEXTURE_FEATURES_0) && - addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) { + addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) { switch (addr) { case GPU_CONTROL_REG(TEXTURE_FEATURES_0): *value = 0xfffff; @@ -1861,7 +1825,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) } #if !MALI_USE_CSF } else if (addr >= GPU_CONTROL_REG(JS0_FEATURES) && - addr <= GPU_CONTROL_REG(JS15_FEATURES)) { + addr <= GPU_CONTROL_REG(JS15_FEATURES)) { switch (addr) { case GPU_CONTROL_REG(JS0_FEATURES): *value = 0x20e; @@ -1880,8 +1844,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) break; } #endif /* !MALI_USE_CSF */ - } else if (addr >= GPU_CONTROL_REG(L2_FEATURES) - && addr <= GPU_CONTROL_REG(MMU_FEATURES)) { + } else if (addr >= GPU_CONTROL_REG(L2_FEATURES) && addr <= GPU_CONTROL_REG(MMU_FEATURES)) { switch (addr) { case GPU_CONTROL_REG(L2_FEATURES): *value = 0x6100206; @@ -1906,12 +1869,12 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = dummy->control_reg_values->mmu_features; break; } - } else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS) - && addr <= GPU_CONTROL_REG(THREAD_FEATURES)) { + } else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS) && + addr <= GPU_CONTROL_REG(THREAD_FEATURES)) { switch (addr) { case GPU_CONTROL_REG(THREAD_FEATURES): - *value = dummy->control_reg_values->thread_features - | (IMPLEMENTATION_MODEL << 30); + *value = dummy->control_reg_values->thread_features | + (THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_SOFTWARE << 30); break; case GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE): *value = dummy->control_reg_values->thread_max_barrier_size; @@ -1923,25 +1886,20 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = dummy->control_reg_values->thread_max_threads; break; } - } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) - && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { + } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) && + addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { *value = 0; - } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO) - && addr <= MMU_AS_REG(15, AS_STATUS)) { - int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) - >> 6; + } else if (addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO)) && + addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS))) { + int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; switch (addr & 0x3F) { case AS_TRANSTAB_LO: - *value = (u32) - (hw_error_status.as_transtab[mem_addr_space] & - 0xffffffff); + *value = (u32)(hw_error_status.as_transtab[mem_addr_space] & 0xffffffff); break; case AS_TRANSTAB_HI: - *value = (u32) - (hw_error_status.as_transtab[mem_addr_space] >> - 32); + *value = (u32)(hw_error_status.as_transtab[mem_addr_space] >> 32); break; case AS_STATUS: @@ -1950,8 +1908,8 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) case AS_FAULTSTATUS: if (mem_addr_space == hw_error_status.faulty_mmu_as) - *value = hw_error_status.as_faultstatus[ - hw_error_status.faulty_mmu_as]; + *value = hw_error_status + .as_faultstatus[hw_error_status.faulty_mmu_as]; else *value = 0; break; @@ -1967,73 +1925,62 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) break; default: - model_error_log(KBASE_CORE, - "Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n", - mem_addr_space, addr); + model_error_log( + KBASE_CORE, + "Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n", + mem_addr_space, addr); *value = 0; break; } - } else if (addr == MMU_REG(MMU_IRQ_MASK)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) { *value = hw_error_status.mmu_irq_mask; - } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)) { *value = hw_error_status.mmu_irq_rawstat; - } else if (addr == MMU_REG(MMU_IRQ_STATUS)) { - *value = hw_error_status.mmu_irq_mask & - hw_error_status.mmu_irq_rawstat; - } + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_STATUS)) { + *value = hw_error_status.mmu_irq_mask & hw_error_status.mmu_irq_rawstat; #if MALI_USE_CSF - else if (addr == IPA_CONTROL_REG(STATUS)) { + } else if (addr == IPA_CONTROL_REG(STATUS) + ) { *value = (ipa_control_timer_enabled << 31); } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { - u32 counter_index = - (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; - bool is_low_word = - !((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7); + (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { + u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; + bool is_low_word = !((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7); - *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, - counter_index, is_low_word); + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, counter_index, + is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { - u32 counter_index = - (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; - bool is_low_word = - !((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7); + (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { + u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; + bool is_low_word = !((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7); - *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, - counter_index, is_low_word); + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, counter_index, + is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { - u32 counter_index = - (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; - bool is_low_word = - !((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7); + (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { + u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; + bool is_low_word = !((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7); - *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, - counter_index, is_low_word); + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, counter_index, + is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { - u32 counter_index = - (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; - bool is_low_word = - !((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7); + (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { + u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; + bool is_low_word = !((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7); + + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, counter_index, + is_low_word); - *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, - counter_index, is_low_word); - } #endif - else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { + } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { *value = dummy->control_reg_values->gpu_features_lo; } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) { *value = dummy->control_reg_values->gpu_features_hi; } else { - model_error_log(KBASE_CORE, + model_error_log( + KBASE_CORE, "Dummy model register access: Reading unsupported register 0x%x. Returning 0\n", - addr); + addr); *value = 0; } @@ -2049,11 +1996,9 @@ static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr lockdep_assert_held(&performance_counters.access_lock); - sample_size = - core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); + sample_size = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); - if ((usr_data_size >= usr_data_offset) && - (sample_size <= usr_data_size - usr_data_offset)) + if ((usr_data_size >= usr_data_offset) && (sample_size <= usr_data_size - usr_data_offset)) usr_data = usr_data_start + (usr_data_offset / sizeof(u32)); if (!usr_data) @@ -2070,20 +2015,17 @@ static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr return usr_data_offset + sample_size; } -static u32 set_kernel_sample_core_type(u64 *counters, - u64 *usr_data_start, u32 usr_data_offset, - u32 usr_data_size, u32 core_count) +static u32 set_kernel_sample_core_type(u64 *counters, u64 *usr_data_start, u32 usr_data_offset, + u32 usr_data_size, u32 core_count) { u32 sample_size; u64 *usr_data = NULL; lockdep_assert_held(&performance_counters.access_lock); - sample_size = - core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); + sample_size = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); - if ((usr_data_size >= usr_data_offset) && - (sample_size <= usr_data_size - usr_data_offset)) + if ((usr_data_size >= usr_data_offset) && (sample_size <= usr_data_size - usr_data_offset)) usr_data = usr_data_start + (usr_data_offset / sizeof(u64)); if (!usr_data) @@ -2162,8 +2104,8 @@ void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size) } KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample); -void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, - u64 *l2_present, u64 *shader_present) +void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 *l2_present, + u64 *shader_present) { if (shader_present) *shader_present = performance_counters.shader_present; @@ -2172,12 +2114,12 @@ void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, } KBASE_EXPORT_TEST_API(gpu_model_get_dummy_prfcnt_cores); -void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, - u64 l2_present, u64 shader_present) +void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 l2_present, + u64 shader_present) { - if (WARN_ON(!l2_present || !shader_present - || hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS - || hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES)) + if (WARN_ON(!l2_present || !shader_present || + hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS || + hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES)) return; performance_counters.l2_present = l2_present; @@ -2186,15 +2128,14 @@ void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, /* Update the GPU properties used by vinstr to calculate the counter * dump buffer size. */ - kbdev->gpu_props.props.l2_props.num_l2_slices = hweight64(l2_present); - kbdev->gpu_props.props.coherency_info.group[0].core_mask = shader_present; + kbdev->gpu_props.num_l2_slices = hweight64(l2_present); + kbdev->gpu_props.coherency_info.group.core_mask = shader_present; kbdev->gpu_props.curr_config.l2_slices = hweight64(l2_present); kbdev->gpu_props.curr_config.shader_present = shader_present; } KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores); -int gpu_model_control(void *model, - struct kbase_model_control_params *params) +int gpu_model_control(void *model, struct kbase_model_control_params *params) { struct dummy_model_t *dummy = (struct dummy_model_t *)model; int i; @@ -2214,16 +2155,3 @@ int gpu_model_control(void *model, return 0; } - -/** - * kbase_is_gpu_removed - Has the GPU been removed. - * @kbdev: Kbase device pointer - * - * This function would return true if the GPU has been removed. - * It is stubbed here - * Return: Always false - */ -bool kbase_is_gpu_removed(struct kbase_device *kbdev) -{ - return false; -} diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h index 84842291c0f7..3c6561a2f7b9 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,54 +43,55 @@ #define model_error_log(module, ...) pr_err(__VA_ARGS__) -#define NUM_SLOTS 4 /*number of job slots */ +#define NUM_SLOTS 4 /*number of job slots */ /*Errors Mask Codes*/ /* each bit of errors_mask is associated to a specific error: * NON FAULT STATUS CODES: only the following are implemented since the others * represent normal working statuses */ -#define KBASE_JOB_INTERRUPTED (1<<0) -#define KBASE_JOB_STOPPED (1<<1) -#define KBASE_JOB_TERMINATED (1<<2) +#define KBASE_JOB_INTERRUPTED (1 << 0) +#define KBASE_JOB_STOPPED (1 << 1) +#define KBASE_JOB_TERMINATED (1 << 2) /* JOB EXCEPTIONS: */ -#define KBASE_JOB_CONFIG_FAULT (1<<3) -#define KBASE_JOB_POWER_FAULT (1<<4) -#define KBASE_JOB_READ_FAULT (1<<5) -#define KBASE_JOB_WRITE_FAULT (1<<6) -#define KBASE_JOB_AFFINITY_FAULT (1<<7) -#define KBASE_JOB_BUS_FAULT (1<<8) -#define KBASE_INSTR_INVALID_PC (1<<9) -#define KBASE_INSTR_INVALID_ENC (1<<10) -#define KBASE_INSTR_TYPE_MISMATCH (1<<11) -#define KBASE_INSTR_OPERAND_FAULT (1<<12) -#define KBASE_INSTR_TLS_FAULT (1<<13) -#define KBASE_INSTR_BARRIER_FAULT (1<<14) -#define KBASE_INSTR_ALIGN_FAULT (1<<15) -#define KBASE_DATA_INVALID_FAULT (1<<16) -#define KBASE_TILE_RANGE_FAULT (1<<17) -#define KBASE_ADDR_RANGE_FAULT (1<<18) -#define KBASE_OUT_OF_MEMORY (1<<19) -#define KBASE_UNKNOWN (1<<20) +#define KBASE_JOB_CONFIG_FAULT (1 << 3) +#define KBASE_JOB_POWER_FAULT (1 << 4) +#define KBASE_JOB_READ_FAULT (1 << 5) +#define KBASE_JOB_WRITE_FAULT (1 << 6) +#define KBASE_JOB_AFFINITY_FAULT (1 << 7) +#define KBASE_JOB_BUS_FAULT (1 << 8) +#define KBASE_INSTR_INVALID_PC (1 << 9) +#define KBASE_INSTR_INVALID_ENC (1 << 10) +#define KBASE_INSTR_TYPE_MISMATCH (1 << 11) +#define KBASE_INSTR_OPERAND_FAULT (1 << 12) +#define KBASE_INSTR_TLS_FAULT (1 << 13) +#define KBASE_INSTR_BARRIER_FAULT (1 << 14) +#define KBASE_INSTR_ALIGN_FAULT (1 << 15) +#define KBASE_DATA_INVALID_FAULT (1 << 16) +#define KBASE_TILE_RANGE_FAULT (1 << 17) +#define KBASE_ADDR_RANGE_FAULT (1 << 18) +#define KBASE_OUT_OF_MEMORY (1 << 19) +#define KBASE_UNKNOWN (1 << 20) /* GPU EXCEPTIONS:*/ -#define KBASE_DELAYED_BUS_FAULT (1<<21) -#define KBASE_SHAREABILITY_FAULT (1<<22) +#define KBASE_DELAYED_BUS_FAULT (1 << 21) +#define KBASE_SHAREABILITY_FAULT (1 << 22) /* MMU EXCEPTIONS:*/ -#define KBASE_TRANSLATION_FAULT (1<<23) -#define KBASE_PERMISSION_FAULT (1<<24) -#define KBASE_TRANSTAB_BUS_FAULT (1<<25) -#define KBASE_ACCESS_FLAG (1<<26) +#define KBASE_TRANSLATION_FAULT (1 << 23) +#define KBASE_PERMISSION_FAULT (1 << 24) +#define KBASE_TRANSTAB_BUS_FAULT (1 << 25) +#define KBASE_ACCESS_FLAG (1 << 26) /* generic useful bitmasks */ #define IS_A_JOB_ERROR ((KBASE_UNKNOWN << 1) - KBASE_JOB_INTERRUPTED) #define IS_A_MMU_ERROR ((KBASE_ACCESS_FLAG << 1) - KBASE_TRANSLATION_FAULT) -#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT|KBASE_SHAREABILITY_FAULT) +#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT | KBASE_SHAREABILITY_FAULT) /* number of possible MMU address spaces */ -#define NUM_MMU_AS 16 /* total number of MMU address spaces as in +#define NUM_MMU_AS \ + 16 /* total number of MMU address spaces as in * MMU_IRQ_RAWSTAT register */ @@ -169,8 +170,7 @@ struct gpu_model_prfcnt_en { void midgard_set_error(int job_slot); int job_atom_inject_error(struct kbase_error_params *params); -int gpu_model_control(void *h, - struct kbase_model_control_params *params); +int gpu_model_control(void *h, struct kbase_model_control_params *params); /** * gpu_model_set_dummy_prfcnt_user_sample() - Set performance counter values @@ -194,10 +194,10 @@ int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size); */ void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size); -void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, - u64 *l2_present, u64 *shader_present); -void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, - u64 l2_present, u64 shader_present); +void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 *l2_present, + u64 *shader_present); +void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 l2_present, + u64 shader_present); /* Clear the counter values array maintained by the dummy model */ void gpu_model_clear_prfcnt_values(void); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c index f310cc74cb24..072ad5bb01a0 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c @@ -56,44 +56,37 @@ static void gpu_generate_error(void) /* pick up a faulty mmu address space */ hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS; /* pick up an mmu table level */ - hw_error_status.mmu_table_level = - 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL); - hw_error_status.errors_mask = - (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); + hw_error_status.mmu_table_level = 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL); + hw_error_status.errors_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); /*is there also one or more errors? */ if ((prandom_u32() % 100) < multiple_error_probability) { - errors_num = 1 + (prandom_u32() % - (MAX_CONCURRENT_FAULTS - 1)); + errors_num = 1 + (prandom_u32() % (MAX_CONCURRENT_FAULTS - 1)); while (errors_num-- > 0) { u32 temp_mask; - temp_mask = (u32)( - 1 << (prandom_u32() % TOTAL_FAULTS)); + temp_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); /* below we check that no bit of the same error * type is set again in the error mask */ if ((temp_mask & IS_A_JOB_ERROR) && - (hw_error_status.errors_mask & - IS_A_JOB_ERROR)) { + (hw_error_status.errors_mask & IS_A_JOB_ERROR)) { errors_num++; continue; } if ((temp_mask & IS_A_MMU_ERROR) && - (hw_error_status.errors_mask & - IS_A_MMU_ERROR)) { + (hw_error_status.errors_mask & IS_A_MMU_ERROR)) { errors_num++; continue; } if ((temp_mask & IS_A_GPU_ERROR) && - (hw_error_status.errors_mask & - IS_A_GPU_ERROR)) { + (hw_error_status.errors_mask & IS_A_GPU_ERROR)) { errors_num++; continue; } /* this error mask is already set */ if ((hw_error_status.errors_mask | temp_mask) == - hw_error_status.errors_mask) { + hw_error_status.errors_mask) { errors_num++; continue; } @@ -114,8 +107,7 @@ int job_atom_inject_error(struct kbase_error_params *params) if (!new_elem) { model_error_log(KBASE_CORE, - "\njob_atom_inject_error: kzalloc failed for new_elem\n" - ); + "\njob_atom_inject_error: kzalloc failed for new_elem\n"); return -ENOMEM; } new_elem->params.jc = params->jc; @@ -124,7 +116,7 @@ int job_atom_inject_error(struct kbase_error_params *params) new_elem->params.faulty_mmu_as = params->faulty_mmu_as; /*circular list below */ - if (error_track_list == NULL) { /*no elements */ + if (error_track_list == NULL) { /*no elements */ error_track_list = new_elem; new_elem->next = error_track_list; } else { @@ -154,12 +146,9 @@ void midgard_set_error(int job_slot) /* found a faulty atom matching with the * current one */ - hw_error_status.errors_mask = - walker->params.errors_mask; - hw_error_status.mmu_table_level = - walker->params.mmu_table_level; - hw_error_status.faulty_mmu_as = - walker->params.faulty_mmu_as; + hw_error_status.errors_mask = walker->params.errors_mask; + hw_error_status.mmu_table_level = walker->params.mmu_table_level; + hw_error_status.faulty_mmu_as = walker->params.faulty_mmu_as; hw_error_status.current_job_slot = job_slot; if (walker->next == walker) { @@ -179,5 +168,5 @@ void midgard_set_error(int job_slot) walker = walker->next; } while (auxiliar->next != error_track_list); } -#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */ +#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */ } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c index e90e4df2f494..098b60d4f4a2 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ */ #include -#include +#include #include "backend/gpu/mali_kbase_model_linux.h" #include "device/mali_kbase_device.h" @@ -39,16 +39,14 @@ struct model_irq_data { static void serve_job_irq(struct work_struct *work) { - struct model_irq_data *data = container_of(work, struct model_irq_data, - work); + struct model_irq_data *data = container_of(work, struct model_irq_data, work); struct kbase_device *kbdev = data->kbdev; /* Make sure no worker is already serving this IRQ */ while (atomic_cmpxchg(&kbdev->serving_job_irq, 1, 0) == 1) { u32 val; - while ((val = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_STATUS)))) { + while ((val = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_STATUS)))) { unsigned long flags; /* Handle the IRQ */ @@ -67,19 +65,18 @@ static void serve_job_irq(struct work_struct *work) static void serve_gpu_irq(struct work_struct *work) { - struct model_irq_data *data = container_of(work, struct model_irq_data, - work); + struct model_irq_data *data = container_of(work, struct model_irq_data, work); struct kbase_device *kbdev = data->kbdev; /* Make sure no worker is already serving this IRQ */ while (atomic_cmpxchg(&kbdev->serving_gpu_irq, 1, 0) == 1) { u32 val; - while ((val = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_STATUS)))) { - /* Handle the IRQ */ + while ((val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_STATUS)))) { + /* Handle the GPU_IRQ */ kbase_gpu_interrupt(kbdev, val); } + } kmem_cache_free(kbdev->irq_slab, data); @@ -87,16 +84,14 @@ static void serve_gpu_irq(struct work_struct *work) static void serve_mmu_irq(struct work_struct *work) { - struct model_irq_data *data = container_of(work, struct model_irq_data, - work); + struct model_irq_data *data = container_of(work, struct model_irq_data, work); struct kbase_device *kbdev = data->kbdev; /* Make sure no worker is already serving this IRQ */ if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) { u32 val; - while ((val = kbase_reg_read(kbdev, - MMU_REG(MMU_IRQ_STATUS)))) { + while ((val = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_STATUS)))) { /* Handle the IRQ */ kbase_mmu_interrupt(kbdev, val); } @@ -142,30 +137,6 @@ void gpu_device_raise_irq(void *model, u32 irq) queue_work(kbdev->irq_workq, &data->work); } -void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->reg_op_lock, flags); - midgard_model_write_reg(kbdev->model, offset, value); - spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); -} - -KBASE_EXPORT_TEST_API(kbase_reg_write); - -u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) -{ - unsigned long flags; - u32 val; - - spin_lock_irqsave(&kbdev->reg_op_lock, flags); - midgard_model_read_reg(kbdev->model, offset, &val); - spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); - - return val; -} -KBASE_EXPORT_TEST_API(kbase_reg_read); - int kbase_install_interrupts(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); @@ -178,8 +149,8 @@ int kbase_install_interrupts(struct kbase_device *kbdev) if (kbdev->irq_workq == NULL) return -ENOMEM; - kbdev->irq_slab = kmem_cache_create("dummy_irq_slab", - sizeof(struct model_irq_data), 0, 0, NULL); + kbdev->irq_slab = + kmem_cache_create("dummy_irq_slab", sizeof(struct model_irq_data), 0, 0, NULL); if (kbdev->irq_slab == NULL) { destroy_workqueue(kbdev->irq_workq); return -ENOMEM; @@ -203,9 +174,8 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); -int kbase_set_custom_irq_handler(struct kbase_device *kbdev, - irq_handler_t custom_handler, - int irq_type) +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler, + int irq_type) { return 0; } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c index bbf629065877..37c35ee9bd32 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2015, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,11 +28,13 @@ static bool always_on_shaders_needed(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); return true; } static bool always_on_get_core_active(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); return true; } @@ -58,15 +60,15 @@ static void always_on_term(struct kbase_device *kbdev) * and name. */ const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { - "always_on", /* name */ - always_on_init, /* init */ - always_on_term, /* term */ - always_on_shaders_needed, /* shaders_needed */ - always_on_get_core_active, /* get_core_active */ - NULL, /* handle_event */ - KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ + "always_on", /* name */ + always_on_init, /* init */ + always_on_term, /* term */ + always_on_shaders_needed, /* shaders_needed */ + always_on_get_core_active, /* get_core_active */ + NULL, /* handle_event */ + KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ #if MALI_USE_CSF - ALWAYS_ON_PM_SCHED_FLAGS, /* pm_sched_flags */ + ALWAYS_ON_PM_SCHED_FLAGS, /* pm_sched_flags */ #endif }; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h index 98d35dabec88..d0c209b05c80 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -75,4 +75,3 @@ struct kbasep_pm_policy_always_on { extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; #endif /* MALI_KBASE_PM_ALWAYS_ON_H */ - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c index 5c71fdf154b9..61b756855060 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ */ #include -#include +#include #include #include @@ -52,30 +52,22 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev) callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; if (callbacks) { - kbdev->pm.backend.callback_power_on = - callbacks->power_on_callback; - kbdev->pm.backend.callback_power_off = - callbacks->power_off_callback; - kbdev->pm.backend.callback_power_suspend = - callbacks->power_suspend_callback; - kbdev->pm.backend.callback_power_resume = - callbacks->power_resume_callback; - kbdev->pm.callback_power_runtime_init = - callbacks->power_runtime_init_callback; - kbdev->pm.callback_power_runtime_term = - callbacks->power_runtime_term_callback; - kbdev->pm.backend.callback_power_runtime_on = - callbacks->power_runtime_on_callback; + kbdev->pm.backend.callback_power_on = callbacks->power_on_callback; + kbdev->pm.backend.callback_power_off = callbacks->power_off_callback; + kbdev->pm.backend.callback_power_suspend = callbacks->power_suspend_callback; + kbdev->pm.backend.callback_power_resume = callbacks->power_resume_callback; + kbdev->pm.callback_power_runtime_init = callbacks->power_runtime_init_callback; + kbdev->pm.callback_power_runtime_term = callbacks->power_runtime_term_callback; + kbdev->pm.backend.callback_power_runtime_on = callbacks->power_runtime_on_callback; kbdev->pm.backend.callback_power_runtime_off = - callbacks->power_runtime_off_callback; + callbacks->power_runtime_off_callback; kbdev->pm.backend.callback_power_runtime_idle = - callbacks->power_runtime_idle_callback; - kbdev->pm.backend.callback_soft_reset = - callbacks->soft_reset_callback; + callbacks->power_runtime_idle_callback; + kbdev->pm.backend.callback_soft_reset = callbacks->soft_reset_callback; kbdev->pm.backend.callback_power_runtime_gpu_idle = - callbacks->power_runtime_gpu_idle_callback; + callbacks->power_runtime_gpu_idle_callback; kbdev->pm.backend.callback_power_runtime_gpu_active = - callbacks->power_runtime_gpu_active_callback; + callbacks->power_runtime_gpu_active_callback; if (callbacks->power_runtime_init_callback) return callbacks->power_runtime_init_callback(kbdev); @@ -83,19 +75,6 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev) return 0; } - kbdev->pm.backend.callback_power_on = NULL; - kbdev->pm.backend.callback_power_off = NULL; - kbdev->pm.backend.callback_power_suspend = NULL; - kbdev->pm.backend.callback_power_resume = NULL; - kbdev->pm.callback_power_runtime_init = NULL; - kbdev->pm.callback_power_runtime_term = NULL; - kbdev->pm.backend.callback_power_runtime_on = NULL; - kbdev->pm.backend.callback_power_runtime_off = NULL; - kbdev->pm.backend.callback_power_runtime_idle = NULL; - kbdev->pm.backend.callback_soft_reset = NULL; - kbdev->pm.backend.callback_power_runtime_gpu_idle = NULL; - kbdev->pm.backend.callback_power_runtime_gpu_active = NULL; - return 0; } @@ -142,24 +121,17 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) mutex_init(&kbdev->pm.lock); - kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", - WQ_HIGHPRI | WQ_UNBOUND, 1); + kbdev->pm.backend.gpu_poweroff_wait_wq = + alloc_workqueue("kbase_pm_poweroff_wait", WQ_HIGHPRI | WQ_UNBOUND, 1); if (!kbdev->pm.backend.gpu_poweroff_wait_wq) return -ENOMEM; - INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, - kbase_pm_gpu_poweroff_wait_wq); + INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, kbase_pm_gpu_poweroff_wait_wq); kbdev->pm.backend.ca_cores_enabled = ~0ull; - kbdev->pm.backend.gpu_powered = false; - kbdev->pm.backend.gpu_ready = false; - kbdev->pm.suspending = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_pm_set_gpu_lost(kbdev, false); #endif -#ifdef CONFIG_MALI_BIFROST_DEBUG - kbdev->pm.backend.driver_ready_for_irqs = false; -#endif /* CONFIG_MALI_BIFROST_DEBUG */ init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); #if !MALI_USE_CSF @@ -187,6 +159,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); + if (kbase_pm_ca_init(kbdev) != 0) goto workq_fail; @@ -195,10 +168,8 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) if (kbase_pm_state_machine_init(kbdev) != 0) goto pm_state_machine_fail; - kbdev->pm.backend.hwcnt_desired = false; kbdev->pm.backend.hwcnt_disabled = true; - INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, - kbase_pm_hwcnt_disable_worker); + INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, kbase_pm_hwcnt_disable_worker); kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) @@ -209,35 +180,24 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) kbdev->pm.backend.callback_power_runtime_gpu_idle; #endif - if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) { - kbdev->pm.backend.l2_always_on = false; - kbdev->pm.backend.gpu_clock_slow_down_wa = false; - + if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) return 0; - } /* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */ if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) { - kbdev->pm.backend.gpu_clock_slow_down_wa = false; if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) kbdev->pm.backend.l2_always_on = true; - else - kbdev->pm.backend.l2_always_on = false; return 0; } /* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */ - kbdev->pm.backend.l2_always_on = false; if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) { kbdev->pm.backend.gpu_clock_slow_down_wa = true; - kbdev->pm.backend.gpu_clock_suspend_freq = 0; kbdev->pm.backend.gpu_clock_slow_down_desired = true; - kbdev->pm.backend.gpu_clock_slowed_down = false; INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work, - kbase_pm_gpu_clock_control_worker); - } else - kbdev->pm.backend.gpu_clock_slow_down_wa = false; + kbase_pm_gpu_clock_control_worker); + } return 0; @@ -299,8 +259,7 @@ static void pm_handle_power_off(struct kbase_device *kbdev) return; } #endif - WARN_ON(backend->shaders_state != - KBASE_SHADERS_OFF_CORESTACK_OFF || + WARN_ON(backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF || backend->l2_state != KBASE_L2_OFF); #if MALI_USE_CSF mcu_state = backend->mcu_state; @@ -351,8 +310,8 @@ static void pm_handle_power_off(struct kbase_device *kbdev) static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) { - struct kbase_device *kbdev = container_of(data, struct kbase_device, - pm.backend.gpu_poweroff_wait_work); + struct kbase_device *kbdev = + container_of(data, struct kbase_device, pm.backend.gpu_poweroff_wait_work); struct kbase_pm_device_data *pm = &kbdev->pm; struct kbase_pm_backend_data *backend = &pm->backend; unsigned long flags; @@ -413,8 +372,7 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) kbdev->previous_frequency = kbdev->current_nominal_freq; /* Slow down GPU clock to the suspend clock*/ - kbase_devfreq_force_freq(kbdev, - kbdev->pm.backend.gpu_clock_suspend_freq); + kbase_devfreq_force_freq(kbdev, kbdev->pm.backend.gpu_clock_suspend_freq); #elif defined(CONFIG_MALI_BIFROST_DVFS) /* CONFIG_MALI_BIFROST_DEVFREQ */ @@ -428,8 +386,7 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) kbdev->previous_frequency = clk_get_rate(clk); /* Slow down GPU clock to the suspend clock*/ - if (WARN_ON_ONCE(clk_set_rate(clk, - kbdev->pm.backend.gpu_clock_suspend_freq))) + if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->pm.backend.gpu_clock_suspend_freq))) dev_err(kbdev->dev, "Failed to set suspend freq\n"); #endif /* CONFIG_MALI_BIFROST_DVFS */ @@ -459,8 +416,7 @@ static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev) /* Restore GPU clock */ if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->previous_frequency))) - dev_err(kbdev->dev, "Failed to restore freq (%lu)\n", - kbdev->previous_frequency); + dev_err(kbdev->dev, "Failed to restore freq (%lu)\n", kbdev->previous_frequency); /* Restart the metrics gathering framework */ kbase_pm_metrics_start(kbdev); @@ -470,8 +426,8 @@ static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev) static void kbase_pm_gpu_clock_control_worker(struct work_struct *data) { - struct kbase_device *kbdev = container_of(data, struct kbase_device, - pm.backend.gpu_clock_control_work); + struct kbase_device *kbdev = + container_of(data, struct kbase_device, pm.backend.gpu_clock_control_work); struct kbase_pm_device_data *pm = &kbdev->pm; struct kbase_pm_backend_data *backend = &pm->backend; unsigned long flags; @@ -479,12 +435,10 @@ static void kbase_pm_gpu_clock_control_worker(struct work_struct *data) /* Determine if GPU clock control is required */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (!backend->gpu_clock_slowed_down && - backend->gpu_clock_slow_down_desired) { + if (!backend->gpu_clock_slowed_down && backend->gpu_clock_slow_down_desired) { slow_down = true; backend->gpu_clock_slowed_down = true; - } else if (backend->gpu_clock_slowed_down && - !backend->gpu_clock_slow_down_desired) { + } else if (backend->gpu_clock_slowed_down && !backend->gpu_clock_slow_down_desired) { normalize = true; backend->gpu_clock_slowed_down = false; } @@ -507,8 +461,8 @@ static void kbase_pm_gpu_clock_control_worker(struct work_struct *data) static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) { - struct kbase_device *kbdev = container_of(data, struct kbase_device, - pm.backend.hwcnt_disable_work); + struct kbase_device *kbdev = + container_of(data, struct kbase_device, pm.backend.hwcnt_disable_work); struct kbase_pm_device_data *pm = &kbdev->pm; struct kbase_pm_backend_data *backend = &pm->backend; unsigned long flags; @@ -575,18 +529,19 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; unsigned long flags; - int ret = 0; + int ret; WARN_ON(kbdev->pm.active_count); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + ret = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (ret) + return ret; kbase_pm_lock(kbdev); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(backend->poweroff_wait_in_progress); WARN_ON(backend->gpu_sleep_mode_active); if (backend->gpu_powered) { - backend->mcu_desired = false; backend->l2_desired = false; kbase_pm_update_state(kbdev); @@ -594,9 +549,8 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) ret = kbase_pm_wait_for_desired_state(kbdev); if (ret) { - dev_warn( - kbdev->dev, - "Wait for pm state change failed on synchronous power off"); + dev_warn(kbdev->dev, + "Wait for pm state change failed on synchronous power off"); ret = -EBUSY; goto out; } @@ -605,8 +559,7 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) * throughout and so need to invoke the idle callback before * the power down. */ - if (backend->callback_power_runtime_gpu_idle && - !backend->gpu_idled) { + if (backend->callback_power_runtime_gpu_idle && !backend->gpu_idled) { backend->callback_power_runtime_gpu_idle(kbdev); backend->gpu_idled = true; } @@ -664,25 +617,6 @@ unlock_hwaccess: spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -static bool is_poweroff_in_progress(struct kbase_device *kbdev) -{ - bool ret; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return ret; -} - -void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) -{ - wait_event_killable(kbdev->pm.backend.poweroff_wait, - is_poweroff_in_progress(kbdev)); -} -KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); - /** * is_gpu_powered_down - Check whether GPU is powered down * @@ -704,13 +638,11 @@ static bool is_gpu_powered_down(struct kbase_device *kbdev) void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev) { - wait_event_killable(kbdev->pm.backend.poweroff_wait, - is_gpu_powered_down(kbdev)); + wait_event_killable(kbdev->pm.backend.poweroff_wait, is_gpu_powered_down(kbdev)); } KBASE_EXPORT_TEST_API(kbase_pm_wait_for_gpu_power_down); -int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, - unsigned int flags) +int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, unsigned int flags) { unsigned long irq_flags; int ret; @@ -731,8 +663,7 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, return ret; } #if MALI_USE_CSF - kbdev->pm.debug_core_mask = - kbdev->gpu_props.props.raw_props.shader_present; + kbdev->pm.debug_core_mask = kbdev->gpu_props.shader_present; spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); /* Set the initial value for 'shaders_avail'. It would be later * modified only from the MCU state machine, when the shader core @@ -744,9 +675,8 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); #else kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = - kbdev->pm.debug_core_mask[1] = - kbdev->pm.debug_core_mask[2] = - kbdev->gpu_props.props.raw_props.shader_present; + kbdev->pm.debug_core_mask[1] = kbdev->pm.debug_core_mask[2] = + kbdev->gpu_props.shader_present; #endif /* Pretend the GPU is active to prevent a power policy turning the GPU @@ -763,13 +693,10 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, } #endif - spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); /* Ensure cycle counter is off */ kbdev->pm.backend.gpu_cycle_counter_requests = 0; - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); /* We are ready to receive IRQ's now as power policy is set up, so * enable them now. @@ -881,23 +808,23 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) } KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask); #else -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, - u64 new_core_mask_js0, u64 new_core_mask_js1, - u64 new_core_mask_js2) +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, + u64 new_core_mask_js1, u64 new_core_mask_js2) { lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&kbdev->pm.lock); if (kbase_dummy_job_wa_enabled(kbdev)) { - dev_warn_once(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled"); + dev_warn_once( + kbdev->dev, + "Change of core mask not supported for slot 0 as dummy job WA is enabled"); new_core_mask_js0 = kbdev->pm.debug_core_mask[0]; } kbdev->pm.debug_core_mask[0] = new_core_mask_js0; kbdev->pm.debug_core_mask[1] = new_core_mask_js1; kbdev->pm.debug_core_mask[2] = new_core_mask_js2; - kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | - new_core_mask_js2; + kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | new_core_mask_js2; kbase_pm_update_dynamic_cores_onoff(kbdev); } @@ -936,7 +863,15 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) kbase_pm_unlock(kbdev); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + ret = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (ret) { +#if !MALI_USE_CSF + mutex_lock(&kbdev->js_data.runpool_mutex); + kbase_backend_timer_resume(kbdev); + mutex_unlock(&kbdev->js_data.runpool_mutex); +#endif /* !MALI_USE_CSF */ + return ret; + } #endif WARN_ON(kbdev->pm.backend.gpu_powered); @@ -945,13 +880,15 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) if (kbdev->pm.backend.callback_power_suspend) kbdev->pm.backend.callback_power_suspend(kbdev); - return ret; + return 0; } void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) { kbase_pm_lock(kbdev); + /* System resume callback has begun */ + kbdev->pm.resuming = true; kbdev->pm.suspending = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_gpu_lost(kbdev)) { @@ -966,7 +903,6 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) kbase_backend_timer_resume(kbdev); #endif /* !MALI_USE_CSF */ - wake_up_all(&kbdev->pm.resume_wait); kbase_pm_unlock(kbdev); } @@ -982,22 +918,19 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) mutex_lock(&kbdev->pm.lock); mutex_lock(&arb_vm_state->vm_state_lock); - if (kbdev->pm.backend.gpu_powered && - !kbase_pm_is_gpu_lost(kbdev)) { + if (kbdev->pm.backend.gpu_powered && !kbase_pm_is_gpu_lost(kbdev)) { kbase_pm_set_gpu_lost(kbdev, true); /* GPU is no longer mapped to VM. So no interrupts will * be received and Mali registers have been replaced by * dummy RAM */ - WARN(!kbase_is_gpu_removed(kbdev), - "GPU is still available after GPU lost event\n"); + WARN(!kbase_is_gpu_removed(kbdev), "GPU is still available after GPU lost event\n"); /* Full GPU reset will have been done by hypervisor, so * cancel */ - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); kbase_synchronize_irqs(kbdev); @@ -1012,7 +945,7 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) /* Cancel any pending HWC dumps */ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING || - kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); @@ -1064,9 +997,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); if (ret) { spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_warn( - kbdev->dev, - "Waiting for MCU to wake up failed on runtime suspend"); + dev_warn(kbdev->dev, "Waiting for MCU to wake up failed on runtime suspend"); kbdev->pm.backend.gpu_wakeup_override = false; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; @@ -1074,9 +1005,9 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) /* Check if a Doorbell mirror interrupt occurred meanwhile */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_sleep_mode_active && - kbdev->pm.backend.exit_gpu_sleep_mode) { - dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend after L2 power up"); + if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode) { + dev_dbg(kbdev->dev, + "DB mirror interrupt occurred during runtime suspend after L2 power up"); kbdev->pm.backend.gpu_wakeup_override = false; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return -EBUSY; @@ -1099,8 +1030,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) /* After re-acquiring the kbdev->pm.lock, check if the device * became active (or active then idle) meanwhile. */ - if (kbdev->pm.active_count || - kbdev->pm.backend.poweroff_wait_in_progress) { + if (kbdev->pm.active_count || kbdev->pm.backend.poweroff_wait_in_progress) { dev_dbg(kbdev->dev, "Device became active on runtime suspend after suspending Scheduler"); ret = -EBUSY; @@ -1175,17 +1105,16 @@ int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev) * the fact that pm.lock is released before invoking Scheduler function * to suspend the CSGs. */ - if (kbdev->pm.active_count || - kbdev->pm.backend.poweroff_wait_in_progress) { + if (kbdev->pm.active_count || kbdev->pm.backend.poweroff_wait_in_progress) { dev_dbg(kbdev->dev, "Device became active on runtime suspend"); ret = -EBUSY; goto unlock; } spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_sleep_mode_active && - kbdev->pm.backend.exit_gpu_sleep_mode) { - dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend before L2 power up"); + if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode) { + dev_dbg(kbdev->dev, + "DB mirror interrupt occurred during runtime suspend before L2 power up"); ret = -EBUSY; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); goto unlock; @@ -1203,7 +1132,8 @@ int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev) /* Disable interrupts and turn off the GPU clocks */ if (!kbase_pm_clock_off(kbdev)) { - dev_warn(kbdev->dev, "Failed to turn off GPU clocks on runtime suspend, MMU faults pending"); + dev_warn(kbdev->dev, + "Failed to turn off GPU clocks on runtime suspend, MMU faults pending"); WARN_ON(!kbdev->poweroff_pending); /* Previous call to kbase_pm_clock_off() would have disabled diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c index e2b0a919282e..b16d8d99ad7e 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,8 +37,7 @@ int kbase_pm_ca_init(struct kbase_device *kbdev) if (kbdev->current_core_mask) pm_backend->ca_cores_enabled = kbdev->current_core_mask; else - pm_backend->ca_cores_enabled = - kbdev->gpu_props.props.raw_props.shader_present; + pm_backend->ca_cores_enabled = kbdev->gpu_props.shader_present; #endif return 0; @@ -46,6 +45,7 @@ int kbase_pm_ca_init(struct kbase_device *kbdev) void kbase_pm_ca_term(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); } #ifdef CONFIG_MALI_BIFROST_DEVFREQ @@ -70,13 +70,15 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) old_core_mask = pm_backend->ca_cores_enabled; #else if (!(core_mask & kbdev->pm.debug_core_mask_all)) { - dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", - core_mask, kbdev->pm.debug_core_mask_all); + dev_err(kbdev->dev, + "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", + core_mask, kbdev->pm.debug_core_mask_all); goto unlock; } if (kbase_dummy_job_wa_enabled(kbdev)) { - dev_err_once(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled"); + dev_err_once(kbdev->dev, + "Dynamic core scaling not supported as dummy job WA is enabled"); goto unlock; } #endif /* MALI_USE_CSF */ @@ -98,8 +100,7 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) } #endif - dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", - pm_backend->ca_cores_enabled); + dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", pm_backend->ca_cores_enabled); return; unlock: @@ -125,12 +126,10 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) * to limit it to be a subgroup of the curr config, otherwise the * shaders state machine on the PM does not evolve. */ - return kbdev->gpu_props.curr_config.shader_present & - kbdev->pm.backend.ca_cores_enabled & - debug_core_mask; + return kbdev->gpu_props.curr_config.shader_present & kbdev->pm.backend.ca_cores_enabled & + debug_core_mask; #else - return kbdev->gpu_props.curr_config.shader_present & - debug_core_mask; + return kbdev->gpu_props.curr_config.shader_present & debug_core_mask; #endif } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h index 90dcaf5b67db..95ec1dfb0739 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,7 +71,7 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); * Calls into the core availability policy */ void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, - u64 cores_transitioning); + u64 cores_transitioning); /** * kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h index d1e4b5327f8f..cc2773995582 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,4 +56,3 @@ extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops; void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); #endif /* MALI_KBASE_PM_CA_DEVFREQ_H */ - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c index f40b753850df..cd2d65b1e4bc 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,15 +52,15 @@ static void coarse_demand_term(struct kbase_device *kbdev) * and name. */ const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { - "coarse_demand", /* name */ - coarse_demand_init, /* init */ - coarse_demand_term, /* term */ - coarse_demand_shaders_needed, /* shaders_needed */ - coarse_demand_get_core_active, /* get_core_active */ - NULL, /* handle_event */ - KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ + "coarse_demand", /* name */ + coarse_demand_init, /* init */ + coarse_demand_term, /* term */ + coarse_demand_shaders_needed, /* shaders_needed */ + coarse_demand_get_core_active, /* get_core_active */ + NULL, /* handle_event */ + KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ #if MALI_USE_CSF - COARSE_ON_DEMAND_PM_SCHED_FLAGS, /* pm_sched_flags */ + COARSE_ON_DEMAND_PM_SCHED_FLAGS, /* pm_sched_flags */ #endif }; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h index 75d99a30efc0..e5ae92a23c90 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -49,17 +49,22 @@ struct kbase_jd_atom; * - kbase_pm_get_present_cores() * - kbase_pm_get_active_cores() * - kbase_pm_get_trans_cores() - * - kbase_pm_get_ready_cores(). + * - kbase_pm_get_ready_cores() + * - kbase_pm_get_state() + * - core_type_to_reg() + * - pwr_cmd_constructor() + * - valid_to_power_up() + * - valid_to_power_down() + * - kbase_pm_invoke() * - * They specify which type of core should be acted on. These values are set in - * a manner that allows core_type_to_reg() function to be simpler and more - * efficient. + * They specify which type of core should be acted on. */ + enum kbase_pm_core_type { - KBASE_PM_CORE_L2 = L2_PRESENT_LO, - KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, - KBASE_PM_CORE_TILER = TILER_PRESENT_LO, - KBASE_PM_CORE_STACK = STACK_PRESENT_LO + KBASE_PM_CORE_L2 = GPU_CONTROL_ENUM(L2_PRESENT), + KBASE_PM_CORE_SHADER = GPU_CONTROL_ENUM(SHADER_PRESENT), + KBASE_PM_CORE_TILER = GPU_CONTROL_ENUM(TILER_PRESENT), + KBASE_PM_CORE_STACK = GPU_CONTROL_ENUM(STACK_PRESENT) }; /* @@ -67,7 +72,7 @@ enum kbase_pm_core_type { * state machine. */ enum kbase_l2_core_state { -#define KBASEP_L2_STATE(n) KBASE_L2_ ## n, +#define KBASEP_L2_STATE(n) KBASE_L2_##n, #include "mali_kbase_pm_l2_states.h" #undef KBASEP_L2_STATE }; @@ -77,7 +82,7 @@ enum kbase_l2_core_state { * enum kbase_mcu_state - The states used for the MCU state machine. */ enum kbase_mcu_state { -#define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n, +#define KBASEP_MCU_STATE(n) KBASE_MCU_##n, #include "mali_kbase_pm_mcu_states.h" #undef KBASEP_MCU_STATE }; @@ -87,7 +92,7 @@ enum kbase_mcu_state { * enum kbase_shader_core_state - The states used for the shaders' state machine. */ enum kbase_shader_core_state { -#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n, +#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_##n, #include "mali_kbase_pm_shader_states.h" #undef KBASEP_SHADER_STATE }; @@ -353,6 +358,11 @@ union kbase_pm_policy_data { * mode for the saving the HW state before power down. * @db_mirror_interrupt_enabled: Flag tracking if the Doorbell mirror interrupt * is enabled or not. + * @l2_force_off_after_mcu_halt: Flag to indicate that L2 cache power down is + * must after performing the MCU halt. Flag is set + * immediately after the MCU halt and cleared + * after the L2 cache power down. MCU can't be + * re-enabled whilst the flag is set. * @in_reset: True if a GPU is resetting and normal power manager operation is * suspended * @partial_shaderoff: True if we want to partial power off shader cores, @@ -470,6 +480,8 @@ struct kbase_pm_backend_data { bool gpu_wakeup_override; bool db_mirror_interrupt_enabled; #endif + + bool l2_force_off_after_mcu_halt; #endif bool l2_desired; bool l2_always_on; @@ -498,16 +510,16 @@ struct kbase_pm_backend_data { #if MALI_USE_CSF /* CSF PM flag, signaling that the MCU shader Core should be kept on */ -#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) +#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) /* CSF PM flag, signaling no scheduler suspension on idle groups */ #define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1) /* CSF PM flag, signaling no scheduler suspension on no runnable groups */ #define CSF_DYNAMIC_PM_SCHED_NO_SUSPEND (1 << 2) /* The following flags corresponds to existing defined PM policies */ -#define ALWAYS_ON_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_CORE_KEEP_ON | \ - CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \ - CSF_DYNAMIC_PM_SCHED_NO_SUSPEND) +#define ALWAYS_ON_PM_SCHED_FLAGS \ + (CSF_DYNAMIC_PM_CORE_KEEP_ON | CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \ + CSF_DYNAMIC_PM_SCHED_NO_SUSPEND) #define COARSE_ON_DEMAND_PM_SCHED_FLAGS (0) #if !MALI_CUSTOMER_RELEASE #define ALWAYS_ON_DEMAND_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE) @@ -547,7 +559,7 @@ enum kbase_pm_policy_event { * @KBASE_PM_POLICY_EVENT_TIMER_MISS: Indicates that the GPU did not * become active before the Shader Tick Timer timeout occurred. */ - KBASE_PM_POLICY_EVENT_TIMER_MISS, + KBASE_PM_POLICY_EVENT_TIMER_MISS }; /** @@ -631,8 +643,7 @@ struct kbase_pm_policy { * valid pointer) * @event: The id of the power event that has occurred */ - void (*handle_event)(struct kbase_device *kbdev, - enum kbase_pm_policy_event event); + void (*handle_event)(struct kbase_device *kbdev, enum kbase_pm_policy_event event); enum kbase_pm_policy_id id; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c index 5be8acd75d0a..b77e46b35422 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include #include @@ -50,9 +50,6 @@ #ifdef CONFIG_MALI_ARBITER_SUPPORT #include #endif /* CONFIG_MALI_ARBITER_SUPPORT */ -#if MALI_USE_CSF -#include -#endif #if MALI_USE_CSF #include @@ -67,10 +64,10 @@ bool corestack_driver_control; /* Default value of 0/false */ #endif module_param(corestack_driver_control, bool, 0444); MODULE_PARM_DESC(corestack_driver_control, - "Let the driver power on/off the GPU core stack independently " - "without involving the Power Domain Controller. This should " - "only be enabled on platforms for which integration of the PDC " - "to the Mali GPU is known to be problematic."); + "Let the driver power on/off the GPU core stack independently " + "without involving the Power Domain Controller. This should " + "only be enabled on platforms for which integration of the PDC " + "to the Mali GPU is known to be problematic."); KBASE_EXPORT_TEST_API(corestack_driver_control); /** @@ -88,22 +85,21 @@ KBASE_EXPORT_TEST_API(corestack_driver_control); * and more efficient. */ enum kbasep_pm_action { - ACTION_PRESENT = 0, - ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), - ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), - ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), - ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), - ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) + ACTION_PRESENT, + ACTION_READY, + ACTION_PWRON, + ACTION_PWROFF, + ACTION_PWRTRANS, + ACTION_PWRACTIVE }; -static u64 kbase_pm_get_state( - struct kbase_device *kbdev, - enum kbase_pm_core_type core_type, - enum kbasep_pm_action action); +static u64 kbase_pm_get_state(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, + enum kbasep_pm_action action); static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev); #if MALI_USE_CSF + bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -111,8 +107,10 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) if (unlikely(!kbdev->csf.firmware_inited)) return false; - if (kbdev->csf.scheduler.pm_active_count && - kbdev->pm.backend.mcu_desired) + if (kbdev->pm.backend.l2_force_off_after_mcu_halt) + return false; + + if (kbdev->csf.scheduler.pm_active_count && kbdev->pm.backend.mcu_desired) return true; #ifdef KBASE_PM_RUNTIME @@ -125,8 +123,7 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) * unless policy changing transition needs it off. */ - return (kbdev->pm.backend.mcu_desired && - kbase_pm_no_mcu_core_pwroff(kbdev) && + return (kbdev->pm.backend.mcu_desired && kbase_pm_no_mcu_core_pwroff(kbdev) && !kbdev->pm.backend.policy_change_clamp_state_to_off); } #endif @@ -138,11 +135,10 @@ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) return false; if (kbdev->pm.backend.protected_transition_override && - kbdev->pm.backend.protected_l2_override) + kbdev->pm.backend.protected_l2_override) return true; - if (kbdev->pm.backend.protected_transition_override && - !kbdev->pm.backend.shaders_desired) + if (kbdev->pm.backend.protected_transition_override && !kbdev->pm.backend.shaders_desired) return false; #else if (unlikely(kbdev->pm.backend.policy_change_clamp_state_to_off)) @@ -176,8 +172,7 @@ int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev) WARN_ON(!kbdev->protected_mode_transition); - if (kbdev->pm.backend.l2_always_on && - (kbdev->system_coherency == COHERENCY_ACE)) { + if (kbdev->pm.backend.l2_always_on && (kbdev->system_coherency == COHERENCY_ACE)) { WARN_ON(kbdev->pm.backend.protected_entry_transition_override); /* @@ -200,8 +195,7 @@ void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev) WARN_ON(!kbdev->protected_mode_transition); - if (kbdev->pm.backend.l2_always_on && - (kbdev->system_coherency == COHERENCY_ACE)) { + if (kbdev->pm.backend.l2_always_on && (kbdev->system_coherency == COHERENCY_ACE)) { WARN_ON(!kbdev->pm.backend.protected_entry_transition_override); kbdev->pm.backend.protected_entry_transition_override = false; @@ -224,9 +218,38 @@ void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) } #endif + +#define ACTION_TYPE_TO_REG_GPU_CONTROL(core_name) \ + { \ + switch (action) { \ + case ACTION_PRESENT: \ + reg = GPU_CONTROL_ENUM(core_name##_PRESENT); \ + break; \ + case ACTION_READY: \ + reg = GPU_CONTROL_ENUM(core_name##_READY); \ + break; \ + case ACTION_PWRON: \ + reg = GPU_CONTROL_ENUM(core_name##_PWRON); \ + break; \ + case ACTION_PWROFF: \ + reg = GPU_CONTROL_ENUM(core_name##_PWROFF); \ + break; \ + case ACTION_PWRTRANS: \ + reg = GPU_CONTROL_ENUM(core_name##_PWRTRANS); \ + break; \ + case ACTION_PWRACTIVE: \ + reg = GPU_CONTROL_ENUM(core_name##_PWRACTIVE); \ + break; \ + default: \ + dev_err(kbdev->dev, "Invalid action"); \ + break; \ + } \ + } + /** * core_type_to_reg - Decode a core type and action to a register. * + * @kbdev: The kbase device for the core * @core_type: The type of core * @action: The type of action * @@ -238,31 +261,96 @@ void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) * Return: The register offset of the _LO register that performs an action of * type @action on a core of type @core_type. */ -static u32 core_type_to_reg(enum kbase_pm_core_type core_type, - enum kbasep_pm_action action) +static u32 core_type_to_reg(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) { - if (corestack_driver_control) { - if (core_type == KBASE_PM_CORE_STACK) { - switch (action) { - case ACTION_PRESENT: - return STACK_PRESENT_LO; - case ACTION_READY: - return STACK_READY_LO; - case ACTION_PWRON: - return STACK_PWRON_LO; - case ACTION_PWROFF: - return STACK_PWROFF_LO; - case ACTION_PWRTRANS: - return STACK_PWRTRANS_LO; - default: - WARN(1, "Invalid action for core type\n"); + u32 reg = 0x0; + + { + switch (core_type) { + case KBASE_PM_CORE_L2: + ACTION_TYPE_TO_REG_GPU_CONTROL(L2); + break; + case KBASE_PM_CORE_SHADER: + ACTION_TYPE_TO_REG_GPU_CONTROL(SHADER); + break; + case KBASE_PM_CORE_TILER: + ACTION_TYPE_TO_REG_GPU_CONTROL(TILER); + break; + case KBASE_PM_CORE_STACK: + if (corestack_driver_control) { + switch (action) { + case ACTION_PRESENT: + reg = GPU_CONTROL_ENUM(STACK_PRESENT); + break; + case ACTION_READY: + reg = GPU_CONTROL_ENUM(STACK_READY); + break; + case ACTION_PWRON: + reg = GPU_CONTROL_ENUM(STACK_PWRON); + break; + case ACTION_PWROFF: + reg = GPU_CONTROL_ENUM(STACK_PWROFF); + break; + case ACTION_PWRTRANS: + reg = GPU_CONTROL_ENUM(STACK_PWRTRANS); + break; + default: + dev_err(kbdev->dev, "Invalid action for core stack"); + } } + break; + default: + dev_err(kbdev->dev, "Invalid core type for gpu control"); + break; } } - return (u32)core_type + (u32)action; + WARN_ON(!reg); + return reg; } +#if !MALI_USE_CSF +/** + * map_core_type_to_tl_pm_state - Map core type to TL_PM_STATE. + * + * @kbdev: The kbase device for the core + * @core_type: The type of core + * + * Given a core_type (defined by kbase_pm_core_type) this function will return + * TL_PM_STATE_*, which is a mapping of core_type to respective core type timeline value. + * + * Return: Core type timeline value. + */ +__pure static u32 map_core_type_to_tl_pm_state(struct kbase_device *kbdev, + enum kbase_pm_core_type core_type) +{ + u32 map = 0; + + switch (core_type) { + case KBASE_PM_CORE_L2: + map = TL_PM_STATE_L2; + break; + case KBASE_PM_CORE_SHADER: + map = TL_PM_STATE_SHADER; + break; + case KBASE_PM_CORE_TILER: + map = TL_PM_STATE_TILER; + break; + case KBASE_PM_CORE_STACK: + if (corestack_driver_control) + map = TL_PM_STATE_STACK; + break; + default: + dev_err(kbdev->dev, "Invalid core type"); + } + + /* Core stack might not change default value */ + WARN_ON(!map); + return map; +} +#endif + #if IS_ENABLED(CONFIG_ARM64) static void mali_cci_flush_l2(struct kbase_device *kbdev) { @@ -278,22 +366,20 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev) * to be called from. */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CACHE_CLN_INV_L2); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_CACHE_CLN_INV_L2); - raw = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); + raw = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)); /* Wait for cache flush to complete before continuing, exit on * gpu resets or loop expiry. */ while (((raw & mask) == 0) && --loops) { - raw = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); + raw = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)); } } #endif + /** * kbase_pm_invoke - Invokes an action on a core set * @@ -303,24 +389,18 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev) * @action: The action to perform on the cores * * This function performs the action given by @action on a set of cores of a - * type given by @core_type. It is a static function used by - * kbase_pm_transition_core_type() + * type given by @core_type. */ -static void kbase_pm_invoke(struct kbase_device *kbdev, - enum kbase_pm_core_type core_type, - u64 cores, - enum kbasep_pm_action action) +static void kbase_pm_invoke(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, + u64 cores, enum kbasep_pm_action action) { u32 reg; - u32 lo = cores & 0xFFFFFFFF; - u32 hi = (cores >> 32) & 0xFFFFFFFF; lockdep_assert_held(&kbdev->hwaccess_lock); - reg = core_type_to_reg(core_type, action); - - KBASE_DEBUG_ASSERT(reg); + reg = core_type_to_reg(kbdev, core_type, action); +#if !MALI_USE_CSF if (cores) { u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); @@ -328,9 +408,12 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, state |= cores; else if (action == ACTION_PWROFF) state &= ~cores; - KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state); + + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, map_core_type_to_tl_pm_state(kbdev, core_type), + state); } +#endif /* Tracing */ if (cores) { if (action == ACTION_PWRON) @@ -365,18 +448,13 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, } } - if (kbase_dummy_job_wa_enabled(kbdev) && - action == ACTION_PWRON && + if (kbase_dummy_job_wa_enabled(kbdev) && action == ACTION_PWRON && core_type == KBASE_PM_CORE_SHADER && - !(kbdev->dummy_job_wa.flags & - KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { + !(kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { kbase_dummy_job_wa_execute(kbdev, cores); - } else { - if (lo != 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo); - if (hi != 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi); } + else + kbase_reg_write64(kbdev, reg, cores); } /** @@ -393,49 +471,40 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, * * Return: A bit mask specifying the state of the cores */ -static u64 kbase_pm_get_state(struct kbase_device *kbdev, - enum kbase_pm_core_type core_type, - enum kbasep_pm_action action) +static u64 kbase_pm_get_state(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) { - u32 reg; - u32 lo, hi; + u32 reg = core_type_to_reg(kbdev, core_type, action); - reg = core_type_to_reg(core_type, action); - - KBASE_DEBUG_ASSERT(reg); - - lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg)); - hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4)); - - return (((u64) hi) << 32) | ((u64) lo); + return kbase_reg_read64(kbdev, reg); } /** * kbase_pm_get_present_cores - Get the cores that are present * * @kbdev: Kbase device - * @type: The type of cores to query + * @core_type: The type of cores to query * * Return: Bitmask of the cores that are present */ -u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type) { - KBASE_DEBUG_ASSERT(kbdev != NULL); + if (WARN_ON(!kbdev)) + return 0; - switch (type) { + switch (core_type) { case KBASE_PM_CORE_L2: return kbdev->gpu_props.curr_config.l2_present; case KBASE_PM_CORE_SHADER: return kbdev->gpu_props.curr_config.shader_present; case KBASE_PM_CORE_TILER: - return kbdev->gpu_props.props.raw_props.tiler_present; + return kbdev->gpu_props.tiler_present; case KBASE_PM_CORE_STACK: - return kbdev->gpu_props.props.raw_props.stack_present; + return kbdev->gpu_props.stack_present; default: break; } - KBASE_DEBUG_ASSERT(0); + WARN_ON(1); return 0; } @@ -447,14 +516,13 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); * (busy processing work) * * @kbdev: Kbase device - * @type: The type of cores to query + * @core_type: The type of cores to query * * Return: Bitmask of cores that are active */ -u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type) { - return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); + return kbase_pm_get_state(kbdev, core_type, ACTION_PWRACTIVE); } KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); @@ -464,14 +532,13 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); * power states * * @kbdev: Kbase device - * @type: The type of cores to query + * @core_type: The type of cores to query * * Return: Bitmask of cores that are transitioning */ -u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type) { - return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); + return kbase_pm_get_state(kbdev, core_type, ACTION_PWRTRANS); } KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); @@ -480,18 +547,17 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); * kbase_pm_get_ready_cores - Get the cores that are powered on * * @kbdev: Kbase device - * @type: The type of cores to query + * @core_type: The type of cores to query * * Return: Bitmask of cores that are ready (powered on) */ -u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type) { u64 result; - result = kbase_pm_get_state(kbdev, type, ACTION_READY); + result = kbase_pm_get_state(kbdev, core_type, ACTION_READY); - switch (type) { + switch (core_type) { case KBASE_PM_CORE_SHADER: KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED, NULL, result); break; @@ -523,8 +589,7 @@ static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev) backend->hwcnt_disabled = true; } else { - kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx, - &backend->hwcnt_disable_work); + kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx, &backend->hwcnt_disable_work); } } @@ -540,9 +605,9 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) #if MALI_USE_CSF if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) { - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), - L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits)); + val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG)); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG), + L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits)); } #endif /* MALI_USE_CSF */ @@ -554,7 +619,7 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) (!kbdev->l2_hash_values_override)) return; - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); + val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG)); if (kbdev->l2_size_override) { val &= ~L2_CONFIG_SIZE_MASK; @@ -562,26 +627,29 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) } if (kbdev->l2_hash_override) { - WARN_ON(kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); + WARN_ON(kbase_hw_has_l2_slice_hash_feature(kbdev)); val &= ~L2_CONFIG_HASH_MASK; val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT); } else if (kbdev->l2_hash_values_override) { +#if MALI_USE_CSF int i; - WARN_ON(!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); - val &= ~L2_CONFIG_ASN_HASH_ENABLE_MASK; - val |= (0x1 << L2_CONFIG_ASN_HASH_ENABLE_SHIFT); + WARN_ON(!kbase_hw_has_l2_slice_hash_feature(kbdev)); - for (i = 0; i < ASN_HASH_COUNT; i++) { + val &= ~L2_CONFIG_L2_SLICE_HASH_ENABLE_MASK; + val |= (0x1 << L2_CONFIG_L2_SLICE_HASH_ENABLE_SHIFT); + for (i = 0; i < GPU_L2_SLICE_HASH_COUNT; i++) { + /* L2_SLICE_HASH and ASN_HASH alias each other */ dev_dbg(kbdev->dev, "Program 0x%x to ASN_HASH[%d]\n", kbdev->l2_hash_values[i], i); - kbase_reg_write(kbdev, GPU_CONTROL_REG(ASN_HASH(i)), - kbdev->l2_hash_values[i]); + kbase_reg_write32(kbdev, GPU_L2_SLICE_HASH_OFFSET(i), + kbdev->l2_hash_values[i]); } +#endif /* MALI_USE_CSF */ } dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val); - kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG), val); } static void kbase_pm_control_gpu_clock(struct kbase_device *kbdev) @@ -607,18 +675,20 @@ static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state) return strings[state]; } -static -void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state) +static void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state) { #if KBASE_KTRACE_ENABLE switch (state) { -#define KBASEP_MCU_STATE(n) \ - case KBASE_MCU_ ## n: \ - KBASE_KTRACE_ADD(kbdev, PM_MCU_ ## n, NULL, state); \ +#define KBASEP_MCU_STATE(n) \ + case KBASE_MCU_##n: \ + KBASE_KTRACE_ADD(kbdev, PM_MCU_##n, NULL, state); \ break; #include "mali_kbase_pm_mcu_states.h" #undef KBASEP_MCU_STATE } +#else + CSTD_UNUSED(kbdev); + CSTD_UNUSED(state); #endif } @@ -638,21 +708,19 @@ static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbd if (unlikely(kbdev->csf.firmware_hctl_core_pwr)) return false; - core_mask_update = - backend->shaders_avail != backend->shaders_desired_mask; + core_mask_update = backend->shaders_avail != backend->shaders_desired_mask; timer_update = kbdev->csf.mcu_core_pwroff_dur_count != - kbdev->csf.mcu_core_pwroff_reg_shadow; + kbdev->csf.mcu_core_pwroff_reg_shadow; if (core_mask_update || timer_update) - kbase_csf_firmware_update_core_attr(kbdev, timer_update, - core_mask_update, backend->shaders_desired_mask); + kbase_csf_firmware_update_core_attr(kbdev, timer_update, core_mask_update, + backend->shaders_desired_mask); return (core_mask_update || timer_update); } -bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, - enum kbase_mcu_state state) +bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, enum kbase_mcu_state state) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -671,12 +739,12 @@ bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, */ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) { - u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_CONTROL)); + u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL)); lockdep_assert_held(&kbdev->hwaccess_lock); val &= ~MCU_CNTRL_DOORBELL_DISABLE_MASK; - kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), val); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL), val); } /** @@ -697,9 +765,8 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev) return; /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ - while (--max_loops && - kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & - AS_STATUS_AS_ACTIVE_INT) + while (--max_loops && kbase_reg_read32(kbdev, MMU_AS_OFFSET(MCU_AS_NR, STATUS)) & + AS_STATUS_AS_ACTIVE_INT_MASK) ; if (!WARN_ON_ONCE(max_loops == 0)) @@ -730,18 +797,88 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en lockdep_assert_held(&kbdev->hwaccess_lock); - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + + irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); if (enable) { irq_mask |= POWER_CHANGED_ALL; - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), POWER_CHANGED_ALL); } else { irq_mask &= ~POWER_CHANGED_ALL; } - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), irq_mask); } + +#if MALI_USE_CSF +/** + * hctl_shader_cores_active - Check the shader cores are active + * + * @kbdev: Pointer to the device. + * @shaders_ready: Bitmask of shader cores that are ready. + * + * This function is called to check if the required shader cores are active. + * + * Return: true if required shader cores are active, otherwise false. + */ +static bool hctl_shader_cores_active(struct kbase_device *kbdev, u64 shaders_ready) +{ + const u64 shaders_active = kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_SHADER); + const u64 shaders_to_disable = shaders_ready & ~kbdev->pm.backend.shaders_desired_mask; + bool cores_are_active = !!(shaders_to_disable & shaders_active); + + + return cores_are_active; +} + +/** + * hctl_shader_cores_power_up_done - Check the shader cores are powered up. + * + * @kbdev: Pointer to the device + * @shaders_ready: Bitmask of shader cores that are ready. + * @shaders_trans: Bitmask of shader cores that are transitioning. + * @shaders_avail: Bitmask of shader cores that are allowed to be used. + * + * This function is called to check if the required number of shader cores have been + * powered up. + * + * Return: true if power up is complete for required shader cores, otherwise false. + */ +static bool hctl_shader_cores_power_up_done(struct kbase_device *kbdev, u64 shaders_ready, + u64 shaders_trans, u64 shaders_avail) +{ + if (shaders_trans || shaders_ready != shaders_avail) + return false; + + + return true; +} + +/* + * hctl_shader_cores_power_down_done - Check the shader cores are powered down + * + * @kbdev: Pointer to the device + * @shaders_ready: Bitmask of shader cores that are ready. + * @shaders_trans: Bitmask of shader cores that are transitioning. + * @shaders_avail: Bitmask of shader cores that are allowed to be used. + * + * This function is called to check if the required number of shader cores have been + * powered down. + * + * Return: true if power down is complete for required shader cores, otherwise false. + */ +static bool hctl_shader_cores_power_down_done(struct kbase_device *kbdev, u64 shaders_ready, + u64 shaders_trans, u64 shaders_avail) +{ + if (shaders_trans || shaders_ready != shaders_avail) + return false; + + + return true; +} +#endif /* MALI_USE_CSF */ + static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; @@ -781,27 +918,23 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_PEND_ON_RELOAD: if (kbdev->csf.firmware_reloaded) { - backend->shaders_desired_mask = - kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); kbase_csf_firmware_global_reinit(kbdev, - backend->shaders_desired_mask); + backend->shaders_desired_mask); if (!kbdev->csf.firmware_hctl_core_pwr) kbasep_pm_toggle_power_interrupt(kbdev, false); - backend->mcu_state = - KBASE_MCU_ON_GLB_REINIT_PEND; + backend->mcu_state = KBASE_MCU_ON_GLB_REINIT_PEND; } break; case KBASE_MCU_ON_GLB_REINIT_PEND: if (kbase_csf_firmware_global_reinit_complete(kbdev)) { - backend->shaders_avail = - backend->shaders_desired_mask; + backend->shaders_avail = backend->shaders_desired_mask; backend->pm_shaders_core_mask = 0; if (kbdev->csf.firmware_hctl_core_pwr) { kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - backend->shaders_avail, ACTION_PWRON); - backend->mcu_state = - KBASE_MCU_HCTL_SHADERS_PEND_ON; + backend->shaders_avail, ACTION_PWRON); + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_PEND_ON; } else backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; #if IS_ENABLED(CONFIG_MALI_CORESIGHT) @@ -819,16 +952,15 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) break; case KBASE_MCU_HCTL_SHADERS_PEND_ON: - if (!shaders_trans && - shaders_ready == backend->shaders_avail) { - /* Cores now stable, notify MCU the stable mask */ - kbase_csf_firmware_update_core_attr(kbdev, - false, true, shaders_ready); + if (!hctl_shader_cores_power_up_done(kbdev, shaders_ready, shaders_trans, + backend->shaders_avail)) + break; - backend->pm_shaders_core_mask = shaders_ready; - backend->mcu_state = - KBASE_MCU_HCTL_CORES_NOTIFY_PEND; - } + /* Cores now stable, notify MCU the stable mask */ + kbase_csf_firmware_update_core_attr(kbdev, false, true, shaders_ready); + + backend->pm_shaders_core_mask = shaders_ready; + backend->mcu_state = KBASE_MCU_HCTL_CORES_NOTIFY_PEND; break; case KBASE_MCU_HCTL_CORES_NOTIFY_PEND: @@ -843,6 +975,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_backend_csf_set_hw_availability( + &kbdev->hwcnt_gpu_iface, + kbdev->gpu_props.curr_config.l2_slices, + kbdev->gpu_props.curr_config.shader_present & + kbdev->pm.debug_core_mask); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbase_csf_scheduler_spin_unlock(kbdev, flags); backend->hwcnt_disabled = false; @@ -861,8 +998,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) kbase_pm_trigger_hwcnt_disable(kbdev); - backend->mcu_state = - KBASE_MCU_HCTL_MCU_ON_RECHECK; + backend->mcu_state = KBASE_MCU_HCTL_MCU_ON_RECHECK; } } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; @@ -898,16 +1034,14 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail & ~shaders_ready, ACTION_PWRON); - backend->mcu_state = - KBASE_MCU_HCTL_SHADERS_PEND_ON; + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_PEND_ON; } else if (~backend->shaders_desired_mask & shaders_ready) { kbase_csf_firmware_update_core_attr(kbdev, false, true, backend->shaders_desired_mask); backend->mcu_state = KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND; } else { - backend->mcu_state = - KBASE_MCU_HCTL_SHADERS_PEND_ON; + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_PEND_ON; } break; @@ -919,30 +1053,25 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) } break; - case KBASE_MCU_HCTL_CORE_INACTIVE_PEND: - { - u64 active_cores = kbase_pm_get_active_cores( - kbdev, - KBASE_PM_CORE_SHADER); - u64 cores_to_disable = shaders_ready & - ~backend->shaders_desired_mask; + case KBASE_MCU_HCTL_CORE_INACTIVE_PEND: { + if (hctl_shader_cores_active(kbdev, shaders_ready)) + break; - if (!(cores_to_disable & active_cores)) { - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - cores_to_disable, - ACTION_PWROFF); - backend->shaders_avail = backend->shaders_desired_mask; - backend->mcu_state = KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND; - } - } - break; + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + shaders_ready & ~backend->shaders_desired_mask, + ACTION_PWROFF); + backend->shaders_avail = backend->shaders_desired_mask; + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND; + } break; case KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND: - if (!shaders_trans && shaders_ready == backend->shaders_avail) { - /* Cores now stable */ - backend->pm_shaders_core_mask = shaders_ready; - backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; - } + if (!hctl_shader_cores_power_down_done(kbdev, shaders_ready, shaders_trans, + backend->shaders_avail)) + break; + + /* Cores now stable */ + backend->pm_shaders_core_mask = shaders_ready; + backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; break; case KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND: @@ -962,7 +1091,6 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) if (!backend->hwcnt_disabled) kbase_pm_trigger_hwcnt_disable(kbdev); - if (backend->hwcnt_disabled) { #ifdef KBASE_PM_RUNTIME if (backend->gpu_sleep_mode_active) @@ -1016,34 +1144,40 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) break; case KBASE_MCU_ON_PEND_HALT: - if (kbase_csf_firmware_mcu_halted(kbdev)) { + if (kbase_csf_firmware_mcu_halt_req_complete(kbdev)) { KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_HALTED, NULL, - kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); + kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); if (kbdev->csf.firmware_hctl_core_pwr) - backend->mcu_state = - KBASE_MCU_HCTL_SHADERS_READY_OFF; + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_READY_OFF; else backend->mcu_state = KBASE_MCU_POWER_DOWN; } break; case KBASE_MCU_HCTL_SHADERS_READY_OFF: - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - shaders_ready, ACTION_PWROFF); - backend->mcu_state = - KBASE_MCU_HCTL_SHADERS_PEND_OFF; + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, shaders_ready, ACTION_PWROFF); + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_PEND_OFF; break; case KBASE_MCU_HCTL_SHADERS_PEND_OFF: - if (!shaders_trans && !shaders_ready) { - backend->pm_shaders_core_mask = 0; - backend->mcu_state = KBASE_MCU_POWER_DOWN; - } + if (!hctl_shader_cores_power_down_done(kbdev, shaders_ready, shaders_trans, + 0)) + break; + + backend->pm_shaders_core_mask = 0; + backend->mcu_state = KBASE_MCU_POWER_DOWN; break; case KBASE_MCU_POWER_DOWN: - kbase_csf_firmware_disable_mcu(kbdev); - backend->mcu_state = KBASE_MCU_PEND_OFF; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TITANHW_2922)) { + if (!kbdev->csf.firmware_hctl_core_pwr) + kbasep_pm_toggle_power_interrupt(kbdev, true); + backend->mcu_state = KBASE_MCU_OFF; + backend->l2_force_off_after_mcu_halt = true; + } else { + kbase_csf_firmware_disable_mcu(kbdev); + backend->mcu_state = KBASE_MCU_PEND_OFF; + } break; case KBASE_MCU_PEND_OFF: @@ -1065,7 +1199,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_PEND_SLEEP: if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) { KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_SLEEP, NULL, - kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); + kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); backend->mcu_state = KBASE_MCU_IN_SLEEP; kbase_pm_enable_db_mirror_interrupt(kbdev); kbase_csf_scheduler_reval_idleness_post_sleep(kbdev); @@ -1074,12 +1208,21 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) */ if (!kbdev->csf.firmware_hctl_core_pwr) kbasep_pm_toggle_power_interrupt(kbdev, true); + /* If Scheduler's PM refcount is not zero then the early wakeup + * on reaching the sleep state can be skipped as the waiting thread + * (like Scheduler kthread) would be interested in MCU being + * turned ON. + * In the more regular flow, the refcount is very likely to be zero + * and there would be no waiters. The wake_up() call won't have an + * effect if there are no waiters. + */ + if (likely(!kbdev->csf.scheduler.pm_active_count)) + wake_up(&backend->gpu_in_desired_state_wait); } break; case KBASE_MCU_IN_SLEEP: - if (kbase_pm_is_mcu_desired(kbdev) && - backend->l2_state == KBASE_L2_ON) { + if (kbase_pm_is_mcu_desired(kbdev) && backend->l2_state == KBASE_L2_ON) { wait_mcu_as_inactive(kbdev); KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( kbdev, kbase_backend_get_cycle_cnt(kbdev)); @@ -1107,8 +1250,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) break; default: - WARN(1, "Invalid state in mcu_state: %d", - backend->mcu_state); + WARN(1, "Invalid state in mcu_state: %d", backend->mcu_state); } if (backend->mcu_state != prev_state) { @@ -1133,11 +1275,9 @@ static void core_idle_worker(struct work_struct *work) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); while (backend->gpu_powered && (backend->mcu_state == KBASE_MCU_HCTL_CORE_INACTIVE_PEND)) { const unsigned int core_inactive_wait_ms = 1; - u64 active_cores = kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_SHADER); u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); - u64 cores_to_disable = shaders_ready & ~backend->shaders_desired_mask; - if (!(cores_to_disable & active_cores)) { + if (!hctl_shader_cores_active(kbdev, shaders_ready)) { kbase_pm_update_state(kbdev); break; } @@ -1164,18 +1304,21 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) return strings[state]; } -static -void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, enum kbase_l2_core_state state) +static void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, + enum kbase_l2_core_state state) { #if KBASE_KTRACE_ENABLE switch (state) { -#define KBASEP_L2_STATE(n) \ - case KBASE_L2_ ## n: \ - KBASE_KTRACE_ADD(kbdev, PM_L2_ ## n, NULL, state); \ +#define KBASEP_L2_STATE(n) \ + case KBASE_L2_##n: \ + KBASE_KTRACE_ADD(kbdev, PM_L2_##n, NULL, state); \ break; #include "mali_kbase_pm_l2_states.h" #undef KBASEP_L2_STATE } +#else + CSTD_UNUSED(kbdev); + CSTD_UNUSED(state); #endif } @@ -1225,15 +1368,61 @@ static bool need_tiler_control(struct kbase_device *kbdev) else return false; #else + CSTD_UNUSED(kbdev); return true; #endif } +/** + * hctl_l2_power_down - Initiate power down of L2 cache + * + * @kbdev: The kbase device structure for the device. + * + * This function initiates the power down of L2 cache when Host controls the power + * for Tiler block. The function expects that power down of Tiler to already have + * been initiated and it triggers the L2 power down only after the power down for + * Tiler is complete. + * The function shall be called only if L2 is in ready state. + */ +static void hctl_l2_power_down(struct kbase_device *kbdev) +{ +} + +/** + * hctl_tiler_power_up_done - Check and/or initiate power up of Tiler + * + * @kbdev: The kbase device structure for the device. + * + * This function initiates the power up of Tiler, when Host controls the power + * for Tiler block, but only if the caller hasn't already triggered the power up + * of Tiler. + * The function shall be called only if L2 is in ready state. + * + * Return: true if power up is complete for Tiler, otherwise false. + */ +static bool hctl_tiler_power_up_done(struct kbase_device *kbdev) +{ + u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_TILER); + const u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); + + tiler_trans &= ~tiler_ready; + if (tiler_trans) + return false; + + if (!tiler_ready) { + return false; + } + + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, tiler_ready); + return true; +} + + static int kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; u64 l2_present = kbdev->gpu_props.curr_config.l2_present; - u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; + u64 tiler_present = kbdev->gpu_props.tiler_present; bool l2_power_up_done; enum kbase_l2_core_state prev_state; @@ -1241,10 +1430,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) do { /* Get current state */ - u64 l2_trans = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_L2); - u64 l2_ready = kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_L2); + u64 l2_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2); + u64 l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); #ifdef CONFIG_MALI_ARBITER_SUPPORT /* @@ -1252,8 +1439,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * are vulnerable to corruption if gpu is lost */ if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { - backend->shaders_state = - KBASE_SHADERS_OFF_CORESTACK_OFF; + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) { /* Don't progress until hw counters are disabled @@ -1263,10 +1449,9 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * the hardware. This step is needed to keep the HW * counters in a consistent state after a GPU lost. */ - backend->l2_state = - KBASE_L2_ON_HWCNT_DISABLE; + backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; KBASE_KTRACE_ADD(kbdev, PM_L2_ON_HWCNT_DISABLE, NULL, - backend->l2_state); + backend->l2_state); kbase_pm_trigger_hwcnt_disable(kbdev); } @@ -1293,7 +1478,9 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) /* Enable HW timer of IPA control before * L2 cache is powered-up. */ - kbase_ipa_control_handle_gpu_sleep_exit(kbdev); + { + kbase_ipa_control_handle_gpu_sleep_exit(kbdev); + } #endif /* * Set the desired config for L2 before @@ -1309,9 +1496,10 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * L2 cache. */ if (need_tiler_control(kbdev)) { - kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_present, - ACTION_PWRON); + kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, + tiler_present, ACTION_PWRON); } else { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present, ACTION_PWRON); } @@ -1320,8 +1508,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * must power them on explicitly. */ if (l2_present != 1) - kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, - l2_present & ~1, + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present & ~1, ACTION_PWRON); /* Clear backend slot submission kctx */ kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev); @@ -1334,18 +1521,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) l2_power_up_done = false; if (!l2_trans && l2_ready == l2_present) { if (need_tiler_control(kbdev)) { - u64 tiler_trans = kbase_pm_get_trans_cores( - kbdev, KBASE_PM_CORE_TILER); - u64 tiler_ready = kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_TILER); - tiler_trans &= ~tiler_ready; - - if (!tiler_trans && tiler_ready == tiler_present) { - KBASE_KTRACE_ADD(kbdev, - PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, tiler_ready); - l2_power_up_done = true; - } + l2_power_up_done = hctl_tiler_power_up_done(kbdev); } else { KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, l2_ready); @@ -1364,11 +1540,9 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * hardware counters. */ if (kbdev->pm.backend.gpu_clock_slow_down_wa) - backend->l2_state = - KBASE_L2_RESTORE_CLOCKS; + backend->l2_state = KBASE_L2_RESTORE_CLOCKS; else - backend->l2_state = - KBASE_L2_ON_HWCNT_ENABLE; + backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; /* Now that the L2 is on, the shaders can start * powering on if they're required. The obvious @@ -1413,8 +1587,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) #if !MALI_USE_CSF backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); backend->hwcnt_disabled = false; } #endif @@ -1484,8 +1657,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) if (backend->hwcnt_disabled) { if (kbdev->pm.backend.gpu_clock_slow_down_wa) - backend->l2_state = - KBASE_L2_SLOW_DOWN_CLOCKS; + backend->l2_state = KBASE_L2_SLOW_DOWN_CLOCKS; else backend->l2_state = KBASE_L2_POWER_DOWN; } @@ -1519,14 +1691,11 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) if (kbase_pm_is_l2_desired(kbdev)) backend->l2_state = KBASE_L2_PEND_ON; else if (can_power_down_l2(kbdev)) { - if (!backend->l2_always_on) - /* Powering off the L2 will also power off the - * tiler. - */ - kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, - l2_present, - ACTION_PWROFF); - else + if (!backend->l2_always_on) { + /* Powering off the L2 will also power off the tiler. */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present, + ACTION_PWROFF); + } else /* If L2 cache is powered then we must flush it * before we power off the GPU. Normally this * would have been handled when the L2 was @@ -1544,59 +1713,69 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) break; case KBASE_L2_PEND_OFF: - if (!backend->l2_always_on) { - /* We only need to check the L2 here - if the L2 - * is off then the tiler is definitely also off. - */ - if (!l2_trans && !l2_ready) { -#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) - /* Allow clock gating within the GPU and prevent it - * from being seen as active during sleep. - */ - kbase_ipa_control_handle_gpu_sleep_enter(kbdev); -#endif - /* L2 is now powered off */ - backend->l2_state = KBASE_L2_OFF; - } - } else { - if (!kbdev->cache_clean_in_progress) { -#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) - /* Allow clock gating within the GPU and prevent it - * from being seen as active during sleep. - */ - kbase_ipa_control_handle_gpu_sleep_enter(kbdev); -#endif - backend->l2_state = KBASE_L2_OFF; + if (likely(!backend->l2_always_on)) { + if (need_tiler_control(kbdev) && l2_ready) { + hctl_l2_power_down(kbdev); + break; } + + if (l2_trans || l2_ready) + break; + } else if (kbdev->cache_clean_in_progress) + break; +#if MALI_USE_CSF +#if defined(KBASE_PM_RUNTIME) + /* Allow clock gating within the GPU and prevent it + * from being seen as active during sleep. + */ + { + kbase_ipa_control_handle_gpu_sleep_enter(kbdev); } +#endif + /* Disabling MCU after L2 cache power down is to address + * BASE_HW_ISSUE_TITANHW_2922 hardware issue. + */ + if (backend->l2_force_off_after_mcu_halt) { + kbase_csf_firmware_disable_mcu(kbdev); + kbase_csf_firmware_disable_mcu_wait(kbdev); + WARN_ON_ONCE(backend->mcu_state != KBASE_MCU_OFF); + backend->l2_force_off_after_mcu_halt = false; + } +#endif + /* L2 is now powered off */ + backend->l2_state = KBASE_L2_OFF; + break; case KBASE_L2_RESET_WAIT: /* Reset complete */ - if (!backend->in_reset) + if (!backend->in_reset) { +#if MALI_USE_CSF + backend->l2_force_off_after_mcu_halt = false; +#endif backend->l2_state = KBASE_L2_OFF; + } + break; default: - WARN(1, "Invalid state in l2_state: %d", - backend->l2_state); + WARN(1, "Invalid state in l2_state: %d", backend->l2_state); } if (backend->l2_state != prev_state) { dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n", kbase_l2_core_state_to_string(prev_state), - kbase_l2_core_state_to_string( - backend->l2_state)); + kbase_l2_core_state_to_string(backend->l2_state)); kbase_ktrace_log_l2_core_state(kbdev, backend->l2_state); } } while (backend->l2_state != prev_state); if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && - backend->l2_state == KBASE_L2_OFF) { + backend->l2_state == KBASE_L2_OFF) { kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, - &kbdev->pm.backend.gpu_poweroff_wait_work); + &kbdev->pm.backend.gpu_poweroff_wait_work); } return 0; @@ -1605,10 +1784,10 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) static void shader_poweroff_timer_stop_callback(struct work_struct *data) { unsigned long flags; - struct kbasep_pm_tick_timer_state *stt = container_of(data, - struct kbasep_pm_tick_timer_state, work); - struct kbase_device *kbdev = container_of(stt, struct kbase_device, - pm.backend.shader_tick_timer); + struct kbasep_pm_tick_timer_state *stt = + container_of(data, struct kbasep_pm_tick_timer_state, work); + struct kbase_device *kbdev = + container_of(stt, struct kbase_device, pm.backend.shader_tick_timer); hrtimer_cancel(&stt->timer); @@ -1648,8 +1827,7 @@ static void shader_poweroff_timer_stop_callback(struct work_struct *data) */ static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) { - struct kbasep_pm_tick_timer_state *stt = - &kbdev->pm.backend.shader_tick_timer; + struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1662,8 +1840,7 @@ static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) } #if !MALI_USE_CSF -static const char *kbase_shader_core_state_to_string( - enum kbase_shader_core_state state) +static const char *kbase_shader_core_state_to_string(enum kbase_shader_core_state state) { const char *const strings[] = { #define KBASEP_SHADER_STATE(n) #n, @@ -1679,8 +1856,7 @@ static const char *kbase_shader_core_state_to_string( static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - struct kbasep_pm_tick_timer_state *stt = - &kbdev->pm.backend.shader_tick_timer; + struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; enum kbase_shader_core_state prev_state; u64 stacks_avail = 0; @@ -1710,12 +1886,11 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) */ if (kbase_is_gpu_removed(kbdev) #ifdef CONFIG_MALI_ARBITER_SUPPORT - || kbase_pm_is_gpu_lost(kbdev)) { + || kbase_pm_is_gpu_lost(kbdev)) { #else - ) { + ) { #endif - backend->shaders_state = - KBASE_SHADERS_OFF_CORESTACK_OFF; + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; dev_dbg(kbdev->dev, "GPU lost has occurred - shaders off\n"); break; } @@ -1734,14 +1909,11 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) * except at certain points where we can handle it, * i.e. off and SHADERS_ON_CORESTACK_ON. */ - backend->shaders_desired_mask = - kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); backend->pm_shaders_core_mask = 0; - if (backend->shaders_desired && - backend->l2_state == KBASE_L2_ON) { - if (backend->hwcnt_desired && - !backend->hwcnt_disabled) { + if (backend->shaders_desired && backend->l2_state == KBASE_L2_ON) { + if (backend->hwcnt_desired && !backend->hwcnt_disabled) { /* Trigger a hwcounter dump */ backend->hwcnt_desired = false; kbase_pm_trigger_hwcnt_disable(kbdev); @@ -1749,10 +1921,8 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) if (backend->hwcnt_disabled) { if (corestack_driver_control) { - kbase_pm_invoke(kbdev, - KBASE_PM_CORE_STACK, - stacks_avail, - ACTION_PWRON); + kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, + stacks_avail, ACTION_PWRON); } backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_ON; @@ -1762,16 +1932,14 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: if (!stacks_trans && stacks_ready == stacks_avail) { - backend->shaders_avail = - backend->shaders_desired_mask; - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - backend->shaders_avail, ACTION_PWRON); + backend->shaders_avail = backend->shaders_desired_mask; + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail, + ACTION_PWRON); if (backend->pm_current_policy && backend->pm_current_policy->handle_event) backend->pm_current_policy->handle_event( - kbdev, - KBASE_PM_POLICY_EVENT_POWER_ON); + kbdev, KBASE_PM_POLICY_EVENT_POWER_ON); backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; } @@ -1779,21 +1947,19 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_PEND_ON_CORESTACK_ON: if (!shaders_trans && shaders_ready == backend->shaders_avail) { - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, shaders_ready); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + shaders_ready); backend->pm_shaders_core_mask = shaders_ready; backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { #if MALI_USE_CSF unsigned long flags; - kbase_csf_scheduler_spin_lock(kbdev, - &flags); + kbase_csf_scheduler_spin_lock(kbdev, &flags); #endif - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); #if MALI_USE_CSF - kbase_csf_scheduler_spin_unlock(kbdev, - flags); + kbase_csf_scheduler_spin_unlock(kbdev, flags); #endif backend->hwcnt_disabled = false; } @@ -1803,23 +1969,20 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) break; case KBASE_SHADERS_ON_CORESTACK_ON: - backend->shaders_desired_mask = - kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); /* If shaders to change state, trigger a counter dump */ if (!backend->shaders_desired || - (backend->shaders_desired_mask != shaders_ready)) { + (backend->shaders_desired_mask != shaders_ready)) { backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) kbase_pm_trigger_hwcnt_disable(kbdev); - backend->shaders_state = - KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; } break; case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: - backend->shaders_desired_mask = - kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); if (!backend->hwcnt_disabled) { /* Wait for being disabled */ @@ -1828,17 +1991,15 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) if (backend->pm_current_policy && backend->pm_current_policy->handle_event) backend->pm_current_policy->handle_event( - kbdev, - KBASE_PM_POLICY_EVENT_IDLE); + kbdev, KBASE_PM_POLICY_EVENT_IDLE); if (kbdev->pm.backend.protected_transition_override || #ifdef CONFIG_MALI_ARBITER_SUPPORT - kbase_pm_is_suspending(kbdev) || - kbase_pm_is_gpu_lost(kbdev) || + kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev) || #endif /* CONFIG_MALI_ARBITER_SUPPORT */ - !stt->configured_ticks || - WARN_ON(stt->cancel_queued)) { - backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + !stt->configured_ticks || WARN_ON(stt->cancel_queued)) { + backend->shaders_state = + KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; } else { stt->remaining_ticks = stt->configured_ticks; stt->needed = true; @@ -1856,11 +2017,11 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) * before the timeout expires. */ if (!hrtimer_active(&stt->timer)) - hrtimer_start(&stt->timer, - stt->configured_interval, - HRTIMER_MODE_REL); + hrtimer_start(&stt->timer, stt->configured_interval, + HRTIMER_MODE_REL); - backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; + backend->shaders_state = + KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; } } else if (backend->shaders_desired_mask & ~shaders_ready) { /* set cores ready but not available to @@ -1873,14 +2034,11 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail & ~shaders_ready, ACTION_PWRON); - backend->shaders_state = - KBASE_SHADERS_PEND_ON_CORESTACK_ON; + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; } else if (shaders_ready & ~backend->shaders_desired_mask) { - backend->shaders_state = - KBASE_SHADERS_WAIT_GPU_IDLE; + backend->shaders_state = KBASE_SHADERS_WAIT_GPU_IDLE; } else { - backend->shaders_state = - KBASE_SHADERS_PEND_ON_CORESTACK_ON; + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; } break; @@ -1894,8 +2052,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) if (backend->pm_current_policy && backend->pm_current_policy->handle_event) backend->pm_current_policy->handle_event( - kbdev, - KBASE_PM_POLICY_EVENT_TIMER_HIT); + kbdev, KBASE_PM_POLICY_EVENT_TIMER_HIT); stt->remaining_ticks = 0; backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; @@ -1903,13 +2060,11 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) if (backend->pm_current_policy && backend->pm_current_policy->handle_event) backend->pm_current_policy->handle_event( - kbdev, - KBASE_PM_POLICY_EVENT_TIMER_MISS); + kbdev, KBASE_PM_POLICY_EVENT_TIMER_MISS); backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; #ifdef CONFIG_MALI_ARBITER_SUPPORT - } else if (kbase_pm_is_suspending(kbdev) || - kbase_pm_is_gpu_lost(kbdev)) { + } else if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; #endif /* CONFIG_MALI_ARBITER_SUPPORT */ } @@ -1931,20 +2086,17 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) shader_poweroff_timer_queue_cancel(kbdev); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) { - kbase_gpu_start_cache_clean_nolock( - kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); - backend->shaders_state = - KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON; + kbase_gpu_start_cache_clean_nolock(kbdev, + GPU_COMMAND_CACHE_CLN_INV_L2); + backend->shaders_state = KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON; } else { - backend->shaders_state = - KBASE_SHADERS_READY_OFF_CORESTACK_ON; + backend->shaders_state = KBASE_SHADERS_READY_OFF_CORESTACK_ON; } break; case KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: if (!kbdev->cache_clean_in_progress) - backend->shaders_state = - KBASE_SHADERS_READY_OFF_CORESTACK_ON; + backend->shaders_state = KBASE_SHADERS_READY_OFF_CORESTACK_ON; break; @@ -1962,15 +2114,16 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) WARN_ON(backend->shaders_desired_mask & ~shaders_ready); WARN_ON(!(backend->shaders_desired_mask & shaders_ready)); - backend->shaders_avail = - backend->shaders_desired_mask; + backend->shaders_avail = backend->shaders_desired_mask; kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - shaders_ready & ~backend->shaders_avail, ACTION_PWROFF); + shaders_ready & ~backend->shaders_avail, + ACTION_PWROFF); backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, (shaders_ready & ~backend->shaders_avail)); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + (shaders_ready & ~backend->shaders_avail)); } else { - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - shaders_ready, ACTION_PWROFF); + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, shaders_ready, + ACTION_PWROFF); KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); @@ -1981,8 +2134,8 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_PEND_OFF_CORESTACK_ON: if (!shaders_trans && !shaders_ready) { if (corestack_driver_control) - kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, - stacks_avail, ACTION_PWROFF); + kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, stacks_avail, + ACTION_PWROFF); backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF; } @@ -1997,18 +2150,16 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) #if MALI_USE_CSF unsigned long flags; - kbase_csf_scheduler_spin_lock(kbdev, - &flags); + kbase_csf_scheduler_spin_lock(kbdev, &flags); #endif - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); #if MALI_USE_CSF - kbase_csf_scheduler_spin_unlock(kbdev, - flags); + kbase_csf_scheduler_spin_unlock(kbdev, flags); #endif backend->hwcnt_disabled = false; } - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + backend->shaders_state = + KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; } break; @@ -2020,15 +2171,15 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_RESET_WAIT: /* Reset complete */ if (!backend->in_reset) - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + backend->shaders_state = + KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; break; } if (backend->shaders_state != prev_state) dev_dbg(kbdev->dev, "Shader state transition: %s to %s\n", kbase_shader_core_state_to_string(prev_state), - kbase_shader_core_state_to_string( - backend->shaders_state)); + kbase_shader_core_state_to_string(backend->shaders_state)); } while (backend->shaders_state != prev_state); @@ -2046,10 +2197,10 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) #if !MALI_USE_CSF if (kbdev->pm.backend.shaders_desired && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) in_desired_state = false; else if (!kbdev->pm.backend.shaders_desired && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) in_desired_state = false; #else in_desired_state &= kbase_pm_mcu_is_in_desired_state(kbdev); @@ -2070,56 +2221,42 @@ static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev) return in_desired_state; } -static bool kbase_pm_is_in_desired_state_with_l2_powered( - struct kbase_device *kbdev) +static bool kbase_pm_is_in_desired_state_with_l2_powered(struct kbase_device *kbdev) { bool in_desired_state = false; unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbase_pm_is_in_desired_state_nolock(kbdev) && - (kbdev->pm.backend.l2_state == KBASE_L2_ON)) + (kbdev->pm.backend.l2_state == KBASE_L2_ON)) in_desired_state = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return in_desired_state; } +#if !MALI_USE_CSF static void kbase_pm_trace_power_state(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_TLSTREAM_AUX_PM_STATE( - kbdev, - KBASE_PM_CORE_L2, - kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_L2)); - KBASE_TLSTREAM_AUX_PM_STATE( - kbdev, - KBASE_PM_CORE_SHADER, - kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_SHADER)); - KBASE_TLSTREAM_AUX_PM_STATE( - kbdev, - KBASE_PM_CORE_TILER, - kbase_pm_get_ready_cores( - kbdev, - KBASE_PM_CORE_TILER)); + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, TL_PM_STATE_L2, + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)); + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, TL_PM_STATE_SHADER, + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER)); + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, TL_PM_STATE_TILER, + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER)); if (corestack_driver_control) - KBASE_TLSTREAM_AUX_PM_STATE( - kbdev, - KBASE_PM_CORE_STACK, - kbase_pm_get_ready_cores( - kbdev, - KBASE_PM_CORE_STACK)); + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, TL_PM_STATE_STACK, + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK)); } +#endif void kbase_pm_update_state(struct kbase_device *kbdev) { #if !MALI_USE_CSF - enum kbase_shader_core_state prev_shaders_state = - kbdev->pm.backend.shaders_state; + enum kbase_shader_core_state prev_shaders_state = kbdev->pm.backend.shaders_state; #else enum kbase_mcu_state prev_mcu_state = kbdev->pm.backend.mcu_state; #endif @@ -2141,11 +2278,10 @@ void kbase_pm_update_state(struct kbase_device *kbdev) * the L2. */ if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF && - kbdev->pm.backend.shaders_state == - KBASE_SHADERS_OFF_CORESTACK_OFF) { + kbdev->pm.backend.shaders_state == KBASE_SHADERS_OFF_CORESTACK_OFF) { if (kbase_pm_l2_update_state(kbdev)) return; - } + } #else if (kbase_pm_mcu_update_state(kbdev)) return; @@ -2158,31 +2294,30 @@ void kbase_pm_update_state(struct kbase_device *kbdev) #endif if (kbase_pm_is_in_desired_state_nolock(kbdev)) { - KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, - kbdev->pm.backend.shaders_avail); + KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, kbdev->pm.backend.shaders_avail); +#if !MALI_USE_CSF kbase_pm_trace_power_state(kbdev); +#endif KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); } } -static enum hrtimer_restart -shader_tick_timer_callback(struct hrtimer *timer) +static enum hrtimer_restart shader_tick_timer_callback(struct hrtimer *timer) { - struct kbasep_pm_tick_timer_state *stt = container_of(timer, - struct kbasep_pm_tick_timer_state, timer); - struct kbase_device *kbdev = container_of(stt, struct kbase_device, - pm.backend.shader_tick_timer); + struct kbasep_pm_tick_timer_state *stt = + container_of(timer, struct kbasep_pm_tick_timer_state, timer); + struct kbase_device *kbdev = + container_of(stt, struct kbase_device, pm.backend.shader_tick_timer); struct kbase_pm_backend_data *backend = &kbdev->pm.backend; unsigned long flags; enum hrtimer_restart restart = HRTIMER_NORESTART; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (stt->remaining_ticks && - backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { + if (stt->remaining_ticks && backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { stt->remaining_ticks--; /* If the remaining ticks just changed from 1 to 0, invoke the @@ -2212,7 +2347,6 @@ int kbase_pm_state_machine_init(struct kbase_device *kbdev) INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback); - stt->needed = false; hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); stt->timer.function = shader_tick_timer_callback; stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); @@ -2220,6 +2354,7 @@ int kbase_pm_state_machine_init(struct kbase_device *kbdev) stt->configured_ticks = stt->default_ticks; #if MALI_USE_CSF + kbdev->pm.backend.core_idle_wq = alloc_workqueue("coreoff_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); if (!kbdev->pm.backend.core_idle_wq) { destroy_workqueue(stt->wq); @@ -2276,8 +2411,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev) */ backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) { - WARN_ON(!kbase_hwcnt_context_disable_atomic( - kbdev->hwcnt_gpu_ctx)); + WARN_ON(!kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)); backend->hwcnt_disabled = true; } @@ -2315,64 +2449,43 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev) #define PM_TIMEOUT_MS (5000) /* 5s */ #endif -static void kbase_pm_timed_out(struct kbase_device *kbdev) +static void kbase_pm_timed_out(struct kbase_device *kbdev, const char *timeout_msg) { unsigned long flags; - dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); + dev_err(kbdev->dev, "%s", timeout_msg); #if !MALI_USE_CSF CSTD_UNUSED(flags); dev_err(kbdev->dev, "Desired state :\n"); dev_err(kbdev->dev, "\tShader=%016llx\n", - kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); + kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); #else spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_err(kbdev->dev, "\tMCU desired = %d\n", - kbase_pm_is_mcu_desired(kbdev)); - dev_err(kbdev->dev, "\tMCU sw state = %d\n", - kbdev->pm.backend.mcu_state); + dev_err(kbdev->dev, "\tMCU desired = %d\n", kbase_pm_is_mcu_desired(kbdev)); + dev_err(kbdev->dev, "\tMCU sw state = %d\n", kbdev->pm.backend.mcu_state); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #endif dev_err(kbdev->dev, "Current state :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_READY_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_READY_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_READY_LO))); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_READY))); + dev_err(kbdev->dev, "\tTiler =%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(TILER_READY))); + dev_err(kbdev->dev, "\tL2 =%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(L2_READY))); #if MALI_USE_CSF dev_err(kbdev->dev, "\tMCU status = %d\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_STATUS))); #endif dev_err(kbdev->dev, "Cores transitioning :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - SHADER_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - SHADER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - TILER_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - TILER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - L2_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - L2_PWRTRANS_LO))); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_PWRTRANS))); + dev_err(kbdev->dev, "\tTiler =%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(TILER_PWRTRANS))); + dev_err(kbdev->dev, "\tL2 =%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(L2_PWRTRANS))); dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); - if (kbase_prepare_to_reset_gpu(kbdev, - RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -2399,25 +2512,23 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout); #else - remaining = wait_event_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout); + remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state_with_l2_powered(kbdev), + timeout); #endif if (!remaining) { - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for desired PM state with L2 powered timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { - dev_info( - kbdev->dev, - "Wait for desired PM state with L2 powered got interrupted"); + dev_info(kbdev->dev, "Wait for desired PM state with L2 powered got interrupted"); err = (int)remaining; } return err; } -int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +static int pm_wait_for_desired_state(struct kbase_device *kbdev, bool killable_wait) { unsigned long flags; long remaining; @@ -2435,26 +2546,37 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) /* Wait for cores */ #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state(kbdev), timeout); + if (killable_wait) + remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev), + timeout); #else - remaining = wait_event_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state(kbdev), timeout); + killable_wait = false; #endif - + if (!killable_wait) + remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev), timeout); if (!remaining) { - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for power transition timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { - dev_info(kbdev->dev, - "Wait for desired PM state got interrupted"); + WARN_ON_ONCE(!killable_wait); + dev_info(kbdev->dev, "Wait for power transition got interrupted"); err = (int)remaining; } return err; } + +int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev) +{ + return pm_wait_for_desired_state(kbdev, true); +} + +int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +{ + return pm_wait_for_desired_state(kbdev, false); +} KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); #if MALI_USE_CSF @@ -2494,22 +2616,18 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) /* Wait for core mask update to complete */ #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - core_mask_update_done(kbdev), timeout); + remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + core_mask_update_done(kbdev), timeout); #else - remaining = wait_event_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - core_mask_update_done(kbdev), timeout); + remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + core_mask_update_done(kbdev), timeout); #endif if (!remaining) { - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for cores down scaling timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { - dev_info( - kbdev->dev, - "Wait for cores down scaling got interrupted"); + dev_info(kbdev->dev, "Wait for cores down scaling got interrupted"); err = (int)remaining; } @@ -2517,6 +2635,73 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) } #endif +static bool is_poweroff_wait_in_progress(struct kbase_device *kbdev) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = kbdev->pm.backend.poweroff_wait_in_progress; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +static int pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev, bool killable_wait) +{ + long remaining; +#if MALI_USE_CSF + /* gpu_poweroff_wait_work would be subjected to the kernel scheduling + * and so the wait time can't only be the function of GPU frequency. + */ + const unsigned int extra_wait_time_ms = 2000; + const long timeout = kbase_csf_timeout_in_jiffies( + kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT) + extra_wait_time_ms); +#else +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* Handling of timeout error isn't supported for arbiter builds */ + const long timeout = MAX_SCHEDULE_TIMEOUT; +#else + const long timeout = msecs_to_jiffies(PM_TIMEOUT_MS); +#endif +#endif + int err = 0; + +#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE + if (killable_wait) + remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait, + !is_poweroff_wait_in_progress(kbdev), + timeout); +#else + killable_wait = false; +#endif + + if (!killable_wait) + remaining = wait_event_timeout(kbdev->pm.backend.poweroff_wait, + !is_poweroff_wait_in_progress(kbdev), timeout); + if (!remaining) { + kbase_pm_timed_out(kbdev, "Wait for poweroff work timed out"); + err = -ETIMEDOUT; + } else if (remaining < 0) { + WARN_ON_ONCE(!killable_wait); + dev_info(kbdev->dev, "Wait for poweroff work got interrupted"); + err = (int)remaining; + } + + return err; +} + +int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev) +{ + return pm_wait_for_poweroff_work_complete(kbdev, true); +} + +int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) +{ + return pm_wait_for_poweroff_work_complete(kbdev, false); +} +KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); + void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { unsigned long flags; @@ -2527,20 +2712,21 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) * and unmask them all. */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), GPU_IRQ_REG_ALL); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_CLEAR), 0xFFFFFFFF); #if MALI_USE_CSF /* Enable only the Page fault bits part */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFF); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), 0xFFFF); #else - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), 0xFFFFFFFF); #endif + } KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); @@ -2554,13 +2740,14 @@ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) */ lockdep_assert_held(&kbdev->hwaccess_lock); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), 0); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK), 0); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), 0xFFFFFFFF); + + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), 0); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_CLEAR), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); } void kbase_pm_disable_interrupts(struct kbase_device *kbdev) @@ -2626,8 +2813,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) #ifdef CONFIG_MALI_ARBITER_SUPPORT if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) { - dev_err(kbdev->dev, - "%s: Cannot power up while GPU lost", __func__); + dev_err(kbdev->dev, "%s: Cannot power up while GPU lost", __func__); return; } #endif @@ -2677,16 +2863,14 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) #ifdef CONFIG_MALI_ARBITER_SUPPORT else { if (kbdev->arb.arb_if) { - struct kbase_arbiter_vm_state *arb_vm_state = - kbdev->pm.arb_vm_state; + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; /* In the case that the GPU has just been granted by * the Arbiter, a reset will have already been done. * However, it is still necessary to initialize the GPU. */ if (arb_vm_state->vm_arb_starting) - kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | - PM_NO_RESET); + kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | PM_NO_RESET); } } /* @@ -2694,8 +2878,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) * that a repartitioning occurred. In this case the current config * should be read again. */ - kbase_gpuprops_get_curr_config_props(kbdev, - &kbdev->gpu_props.curr_config); + kbase_gpuprops_get_curr_config_props(kbdev, &kbdev->gpu_props.curr_config); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ mutex_lock(&kbdev->mmu_hw_mutex); @@ -2704,12 +2887,10 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); - if (kbdev->dummy_job_wa.flags & - KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { + if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_dummy_job_wa_execute(kbdev, - kbase_pm_get_present_cores(kbdev, - KBASE_PM_CORE_SHADER)); + kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -2721,14 +2902,16 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) backend->gpu_ready = true; backend->l2_desired = true; #if MALI_USE_CSF - if (reset_required) { - /* GPU reset was done after the power on, so send the post - * reset event instead. This is okay as GPU power off event - * is same as pre GPU reset event. - */ - kbase_ipa_control_handle_gpu_reset_post(kbdev); - } else { - kbase_ipa_control_handle_gpu_power_on(kbdev); + { + if (reset_required) { + /* GPU reset was done after the power on, so send the post + * reset event instead. This is okay as GPU power off event + * is same as pre GPU reset event. + */ + kbase_ipa_control_handle_gpu_reset_post(kbdev); + } else { + kbase_ipa_control_handle_gpu_power_on(kbdev); + } } #endif kbase_pm_update_state(kbdev); @@ -2783,14 +2966,16 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev) kbase_pm_cache_snoop_disable(kbdev); #if MALI_USE_CSF - kbase_ipa_control_handle_gpu_power_off(kbdev); + { + kbase_ipa_control_handle_gpu_power_off(kbdev); + } #endif if (kbase_is_gpu_removed(kbdev) #ifdef CONFIG_MALI_ARBITER_SUPPORT - || kbase_pm_is_gpu_lost(kbdev)) { + || kbase_pm_is_gpu_lost(kbdev)) { #else - ) { + ) { #endif /* Ensure we unblock any threads that are stuck waiting * for the GPU @@ -2845,8 +3030,7 @@ static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->pm.lock); - wait_event(kbdev->pm.backend.reset_done_wait, - (kbdev->pm.backend.reset_done)); + wait_event(kbdev->pm.backend.reset_done_wait, (kbdev->pm.backend.reset_done)); kbdev->pm.backend.reset_done = false; } @@ -2867,26 +3051,24 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) return HRTIMER_NORESTART; } -static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) +static int kbase_set_gpu_quirks(struct kbase_device *kbdev) { #if MALI_USE_CSF - kbdev->hw_quirks_gpu = - kbase_reg_read(kbdev, GPU_CONTROL_REG(CSF_CONFIG)); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(CSF_CONFIG))) + kbdev->hw_quirks_gpu = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(CSF_CONFIG)); #else - u32 hw_quirks_gpu = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)); + u32 hw_quirks_gpu = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG)); - if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { + if (kbdev->gpu_props.gpu_id.product_model == GPU_ID_PRODUCT_TMIX) { /* Only for tMIx */ u32 coherency_features; - coherency_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(COHERENCY_FEATURES)); + coherency_features = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(COHERENCY_FEATURES)); /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly * documented for tMIx so force correct value here. */ - if (coherency_features == - COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { + if (coherency_features == COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { hw_quirks_gpu |= (COHERENCY_ACE_LITE | COHERENCY_ACE) << JM_FORCE_COHERENCY_FEATURES_SHIFT; } @@ -2902,8 +3084,7 @@ static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) int default_idvs_group_size = 0xF; u32 group_size = 0; - if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size", - &group_size)) + if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size", &group_size)) group_size = default_idvs_group_size; if (group_size > IDVS_GROUP_MAX_SIZE) { @@ -2923,19 +3104,16 @@ static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) return 0; } -static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) +static int kbase_set_sc_quirks(struct kbase_device *kbdev) { - u32 hw_quirks_sc = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_CONFIG)); + u32 hw_quirks_sc = 0; + + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG))) + hw_quirks_sc = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG)); if (kbase_is_gpu_removed(kbdev)) return -EIO; - if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ - hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; - else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ - hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162)) hw_quirks_sc |= SC_VAR_ALGORITHM; @@ -2949,8 +3127,10 @@ static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) static int kbase_set_tiler_quirks(struct kbase_device *kbdev) { - u32 hw_quirks_tiler = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_CONFIG)); + u32 hw_quirks_tiler = 0; + + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG))) + hw_quirks_tiler = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG)); if (kbase_is_gpu_removed(kbdev)) return -EIO; @@ -2967,9 +3147,6 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev) static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) { struct device_node *np = kbdev->dev->of_node; - const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - const u32 prod_id = - (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; int error = 0; kbdev->hw_quirks_gpu = 0; @@ -2977,43 +3154,44 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) kbdev->hw_quirks_tiler = 0; kbdev->hw_quirks_mmu = 0; - if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { - dev_info(kbdev->dev, - "Found quirks_gpu = [0x%x] in Devicetree\n", + /* Read the "-" versions of the properties and fall back to + * the "_" versions if these are not found + */ + + if (!of_property_read_u32(np, "quirks-gpu", &kbdev->hw_quirks_gpu) || + !of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { + dev_info(kbdev->dev, "Found quirks_gpu = [0x%x] in Devicetree\n", kbdev->hw_quirks_gpu); } else { - error = kbase_set_gpu_quirks(kbdev, prod_id); + error = kbase_set_gpu_quirks(kbdev); if (error) return error; } - if (!of_property_read_u32(np, "quirks_sc", - &kbdev->hw_quirks_sc)) { - dev_info(kbdev->dev, - "Found quirks_sc = [0x%x] in Devicetree\n", - kbdev->hw_quirks_sc); + if (!of_property_read_u32(np, "quirks-sc", &kbdev->hw_quirks_sc) || + !of_property_read_u32(np, "quirks_sc", &kbdev->hw_quirks_sc)) { + dev_info(kbdev->dev, "Found quirks_sc = [0x%x] in Devicetree\n", + kbdev->hw_quirks_sc); } else { - error = kbase_set_sc_quirks(kbdev, prod_id); + error = kbase_set_sc_quirks(kbdev); if (error) return error; } - if (!of_property_read_u32(np, "quirks_tiler", - &kbdev->hw_quirks_tiler)) { - dev_info(kbdev->dev, - "Found quirks_tiler = [0x%x] in Devicetree\n", - kbdev->hw_quirks_tiler); + if (!of_property_read_u32(np, "quirks-tiler", &kbdev->hw_quirks_tiler) || + !of_property_read_u32(np, "quirks_tiler", &kbdev->hw_quirks_tiler)) { + dev_info(kbdev->dev, "Found quirks_tiler = [0x%x] in Devicetree\n", + kbdev->hw_quirks_tiler); } else { error = kbase_set_tiler_quirks(kbdev); if (error) return error; } - if (!of_property_read_u32(np, "quirks_mmu", - &kbdev->hw_quirks_mmu)) { - dev_info(kbdev->dev, - "Found quirks_mmu = [0x%x] in Devicetree\n", - kbdev->hw_quirks_mmu); + if (!of_property_read_u32(np, "quirks-mmu", &kbdev->hw_quirks_mmu) || + !of_property_read_u32(np, "quirks_mmu", &kbdev->hw_quirks_mmu)) { + dev_info(kbdev->dev, "Found MMU quirks = [0x%x] in Devicetree\n", + kbdev->hw_quirks_mmu); } else { error = kbase_set_mmu_quirks(kbdev); } @@ -3023,27 +3201,25 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) { - kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), - kbdev->hw_quirks_sc); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG))) + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG), kbdev->hw_quirks_sc); - kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), - kbdev->hw_quirks_tiler); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG))) + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG), kbdev->hw_quirks_tiler); - kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), - kbdev->hw_quirks_mmu); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG))) + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG), kbdev->hw_quirks_mmu); #if MALI_USE_CSF - kbase_reg_write(kbdev, GPU_CONTROL_REG(CSF_CONFIG), - kbdev->hw_quirks_gpu); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(CSF_CONFIG))) + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(CSF_CONFIG), kbdev->hw_quirks_gpu); #else - kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), - kbdev->hw_quirks_gpu); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG), kbdev->hw_quirks_gpu); #endif } void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) { - if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && - !kbdev->cci_snoop_enabled) { + if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && !kbdev->cci_snoop_enabled) { #if IS_ENABLED(CONFIG_ARM64) if (kbdev->snoop_enable_smc != 0) kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); @@ -3085,6 +3261,7 @@ static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) static int kbase_pm_do_reset(struct kbase_device *kbdev) { struct kbasep_reset_timeout_data rtdata; + u32 reg_offset, reg_val; int ret; KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0); @@ -3098,12 +3275,17 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) else if (ret > 0) return 0; } else { - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_SOFT_RESET); + { + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_SOFT_RESET); + } } + reg_offset = GPU_CONTROL_ENUM(GPU_IRQ_MASK); + reg_val = RESET_COMPLETED; + /* Unmask the reset complete interrupt only */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED); + kbase_reg_write32(kbdev, reg_offset, reg_val); /* Initialize a structure for tracking the status of the reset */ rtdata.kbdev = kbdev; @@ -3113,8 +3295,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rtdata.timer.function = kbasep_reset_timeout; - hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), - HRTIMER_MODE_REL); + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), HRTIMER_MODE_REL); /* Wait for the RESET_COMPLETED interrupt to be raised */ kbase_pm_wait_for_reset(kbdev); @@ -3126,15 +3307,19 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) return 0; } + reg_offset = GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT); + reg_val = RESET_COMPLETED; + + /* No interrupt has been received - check if the RAWSTAT register says - * the reset has completed + * the reset has completed. */ - if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & - RESET_COMPLETED)) { + if (kbase_reg_read32(kbdev, reg_offset) & reg_val) { /* The interrupt is set in the RAWSTAT; this suggests that the * interrupts are not getting to the CPU */ - dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); + dev_err(kbdev->dev, + "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); /* If interrupts aren't working we can't continue. */ destroy_hrtimer_on_stack(&rtdata.timer); return -EINVAL; @@ -3152,17 +3337,20 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) #ifdef CONFIG_MALI_ARBITER_SUPPORT if (!kbdev->arb.arb_if) { #endif /* CONFIG_MALI_ARBITER_SUPPORT */ - dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", - RESET_TIMEOUT); + dev_err(kbdev->dev, + "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", + RESET_TIMEOUT); KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_HARD_RESET); + + { + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_HARD_RESET); + } /* Restart the timer to wait for the hard reset to complete */ rtdata.timed_out = false; - hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), - HRTIMER_MODE_REL); + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), HRTIMER_MODE_REL); /* Wait for the RESET_COMPLETED interrupt to be raised */ kbase_pm_wait_for_reset(kbdev); @@ -3177,7 +3365,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) destroy_hrtimer_on_stack(&rtdata.timer); dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", - RESET_TIMEOUT); + RESET_TIMEOUT); #ifdef CONFIG_MALI_ARBITER_SUPPORT } #endif /* CONFIG_MALI_ARBITER_SUPPORT */ @@ -3187,8 +3375,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) int kbase_pm_protected_mode_enable(struct kbase_device *const kbdev) { - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_SET_PROTECTED_MODE); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_SET_PROTECTED_MODE); return 0; } @@ -3215,6 +3402,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbdev->pm.backend.gpu_powered = true; } + /* Ensure interrupts are off to begin with, this also clears any * outstanding interrupts */ @@ -3234,8 +3422,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) #ifdef CONFIG_MALI_ARBITER_SUPPORT if (!(flags & PM_NO_RESET)) #endif /* CONFIG_MALI_ARBITER_SUPPORT */ - err = kbdev->protected_ops->protected_mode_disable( - kbdev->protected_dev); + err = kbdev->protected_ops->protected_mode_disable(kbdev->protected_dev); spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); #if MALI_USE_CSF @@ -3255,6 +3442,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) if (err) goto exit; + if (flags & PM_HW_ISSUES_DETECT) { err = kbase_pm_hw_issues_detect(kbdev); if (err) @@ -3263,28 +3451,25 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_hw_issues_apply(kbdev); kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); + kbase_amba_set_shareable_cache_support(kbdev); /* Sanity check protected mode was left after reset */ - WARN_ON(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & - GPU_STATUS_PROTECTED_MODE_ACTIVE); + WARN_ON(kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & + GPU_STATUS_PROTECTED_MODE_ACTIVE); /* If cycle counter was in use re-enable it, enable_irqs will only be * false when called from kbase_pm_powerup */ - if (kbdev->pm.backend.gpu_cycle_counter_requests && - (flags & PM_ENABLE_IRQS)) { + if (kbdev->pm.backend.gpu_cycle_counter_requests && (flags & PM_ENABLE_IRQS)) { kbase_pm_enable_interrupts(kbdev); /* Re-enable the counters if we need to */ - spin_lock_irqsave( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); if (kbdev->pm.backend.gpu_cycle_counter_requests) - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_START); - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); kbase_pm_disable_interrupts(kbdev); } @@ -3319,34 +3504,30 @@ exit: * When this function is called the l2 cache must be on - i.e., the GPU must be * on. */ -static void -kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) +static void kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) { unsigned long flags; - spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); ++kbdev->pm.backend.gpu_cycle_counter_requests; if (kbdev->pm.backend.gpu_cycle_counter_requests == 1) - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_START); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); else { /* This might happen after GPU reset. * Then counter needs to be kicked. */ -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & +#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) && !MALI_USE_CSF + if (!(kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & GPU_STATUS_CYCLE_COUNT_ACTIVE)) { - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_START); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); } #endif } - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); } void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) @@ -3355,8 +3536,7 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < - INT_MAX); + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < INT_MAX); kbase_pm_wait_for_l2_powered(kbdev); @@ -3371,8 +3551,7 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < - INT_MAX); + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < INT_MAX); kbase_pm_request_gpu_cycle_counter_do_request(kbdev); } @@ -3387,20 +3566,17 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); - spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); --kbdev->pm.backend.gpu_cycle_counter_requests; if (kbdev->pm.backend.gpu_cycle_counter_requests == 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_STOP); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_STOP); - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); } void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h index e66ce57d3120..851d56141e53 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,7 +31,6 @@ #include "backend/gpu/mali_kbase_pm_ca.h" #include "mali_kbase_pm_policy.h" - /** * kbase_pm_dev_idle - The GPU is idle. * @@ -56,7 +55,7 @@ void kbase_pm_dev_activate(struct kbase_device *kbdev); * * @kbdev: The kbase device structure for the device (must be a valid * pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * @core_type: The type of core (see the enum kbase_pm_core_type enumeration) * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) present in the GPU device and also a count of @@ -64,15 +63,14 @@ void kbase_pm_dev_activate(struct kbase_device *kbdev); * * Return: The bit mask of cores present */ -u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type); /** * kbase_pm_get_active_cores - Get details of the cores that are currently * active in the device. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * @core_type: The type of core (see the enum kbase_pm_core_type enumeration) * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are actively processing work (i.e. @@ -80,15 +78,14 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, * * Return: The bit mask of active cores */ -u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type); /** * kbase_pm_get_trans_cores - Get details of the cores that are currently * transitioning between power states. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * @core_type: The type of core (see the enum kbase_pm_core_type enumeration) * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are currently transitioning between @@ -96,15 +93,14 @@ u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, * * Return: The bit mask of transitioning cores */ -u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type); /** * kbase_pm_get_ready_cores - Get details of the cores that are currently * powered and ready for jobs. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * @core_type: The type of core (see the enum kbase_pm_core_type enumeration) * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are powered and ready for jobs (they may @@ -112,8 +108,7 @@ u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, * * Return: The bit mask of ready cores */ -u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type); /** * kbase_pm_clock_on - Turn the clock for the device on, and enable device @@ -224,7 +219,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); * power off in progress and kbase_pm_context_active() was called instead of * kbase_csf_scheduler_pm_active(). * - * Return: 0 on success, error code on error + * Return: 0 on success, or -ETIMEDOUT code on timeout error. */ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); #else @@ -247,11 +242,26 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); * must ensure that this is not the case by, for example, calling * kbase_pm_wait_for_poweroff_work_complete() * - * Return: 0 on success, error code on error + * Return: 0 on success, or -ETIMEDOUT error code on timeout error. */ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); #endif +/** + * kbase_pm_killable_wait_for_desired_state - Wait for the desired power state to be + * reached in a killable state. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function is same as kbase_pm_wait_for_desired_state(), expect that it would + * allow the SIGKILL signal to interrupt the wait. + * This function is supposed to be called from the code that is executed in ioctl or + * Userspace context, wherever it is safe to do so. + * + * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the + * wait was interrupted. + */ +int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev); + /** * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on * @@ -333,6 +343,8 @@ void kbase_pm_update_state(struct kbase_device *kbdev); * shader poweroff timer * @kbdev: Device pointer * + * This function must be called only when a kbase device is initialized. + * * Return: 0 on success, error code on error */ int kbase_pm_state_machine_init(struct kbase_device *kbdev); @@ -360,8 +372,8 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev); * kbasep_pm_metrics_init - Initialize the metrics gathering framework. * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * This must be called before other metric gathering APIs are called. - * + * This function must be called only when a kbase device is initialized and + * also must be called before other metric gathering APIs are called. * * Return: 0 on success, error code on error */ @@ -467,8 +479,26 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); * This function effectively just waits for the @gpu_poweroff_wait_work work * item to complete, if it was enqueued. GPU may not have been powered down * before this function returns. + * + * Return: 0 on success, error code on error */ -void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev); +int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev); + +/** + * kbase_pm_killable_wait_for_poweroff_work_complete - Wait for the poweroff workqueue to + * complete in killable state. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function is same as kbase_pm_wait_for_poweroff_work_complete(), expect that + * it would allow the SIGKILL signal to interrupt the wait. + * This function is supposed to be called from the code that is executed in ioctl or + * Userspace context, wherever it is safe to do so. + * + * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the + * wait was interrupted. + */ +int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev); /** * kbase_pm_wait_for_gpu_power_down - Wait for the GPU power down to complete @@ -484,8 +514,9 @@ void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev); * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Setup the power management callbacks and initialize/enable the runtime-pm - * for the Mali GPU platform device, using the callback function. This must be - * called before the kbase_pm_register_access_enable() function. + * for the Mali GPU platform device, using the callback function. + * This function must be called only when a kbase device is initialized and + * also must be called before the kbase_pm_register_access_enable() function. * * Return: 0 on success, error code on error */ @@ -569,8 +600,7 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); void kbase_pm_do_poweroff(struct kbase_device *kbdev); #if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) -void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, - struct kbasep_pm_metrics *last, +void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, struct kbasep_pm_metrics *last, struct kbasep_pm_metrics *diff); #endif /* defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) */ @@ -605,8 +635,8 @@ int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); * * Return: Returns 0 on failure and non zero on success. */ -int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, - u32 util_gl_share, u32 util_cl_share[2]); +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, + u32 util_cl_share[2]); #endif #endif /* CONFIG_MALI_BIFROST_DVFS */ @@ -621,8 +651,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev); * * Caller must hold hwaccess_lock */ -void kbase_pm_metrics_update(struct kbase_device *kbdev, - ktime_t *now); +void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *now); /** * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU @@ -790,8 +819,7 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev); * * Return: true if MCU is inactive */ -bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, - enum kbase_mcu_state state); +bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, enum kbase_mcu_state state); /** * kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be @@ -802,13 +830,11 @@ bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, * * Return: true if allowed to enter the suspended state. */ -static inline -bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev) +static inline bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - return !(kbdev->pm.backend.csf_pm_sched_flags & - CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE); + return !(kbdev->pm.backend.csf_pm_sched_flags & CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE); } /** @@ -820,13 +846,11 @@ bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev) * * Return: true if allowed to enter the suspended state. */ -static inline -bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) +static inline bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - return !(kbdev->pm.backend.csf_pm_sched_flags & - CSF_DYNAMIC_PM_SCHED_NO_SUSPEND); + return !(kbdev->pm.backend.csf_pm_sched_flags & CSF_DYNAMIC_PM_SCHED_NO_SUSPEND); } /** @@ -842,8 +866,7 @@ static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - return kbdev->pm.backend.csf_pm_sched_flags & - CSF_DYNAMIC_PM_CORE_KEEP_ON; + return kbdev->pm.backend.csf_pm_sched_flags & CSF_DYNAMIC_PM_CORE_KEEP_ON; } /** @@ -857,6 +880,8 @@ static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev) { bool in_desired_state = true; + lockdep_assert_held(&kbdev->hwaccess_lock); + if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON) in_desired_state = false; else if (!kbase_pm_is_mcu_desired(kbdev) && @@ -961,13 +986,12 @@ static inline void kbase_pm_enable_db_mirror_interrupt(struct kbase_device *kbde lockdep_assert_held(&kbdev->hwaccess_lock); if (!kbdev->pm.backend.db_mirror_interrupt_enabled) { - u32 irq_mask = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_MASK)); + u32 irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); WARN_ON(irq_mask & DOORBELL_MIRROR); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask | DOORBELL_MIRROR); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + irq_mask | DOORBELL_MIRROR); kbdev->pm.backend.db_mirror_interrupt_enabled = true; } } @@ -985,11 +1009,10 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd lockdep_assert_held(&kbdev->hwaccess_lock); if (kbdev->pm.backend.db_mirror_interrupt_enabled) { - u32 irq_mask = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_MASK)); + u32 irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask & ~DOORBELL_MIRROR); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + irq_mask & ~DOORBELL_MIRROR); kbdev->pm.backend.db_mirror_interrupt_enabled = false; } } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c index 865f526f61f2..5e6e9f058da9 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,7 +43,7 @@ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly * under 11s. Exceeding this will cause overflow */ -#define KBASE_PM_TIME_SHIFT 8 +#define KBASE_PM_TIME_SHIFT 8 #endif #if MALI_USE_CSF @@ -111,9 +111,6 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); - kbdev->pm.backend.metrics.values.time_busy = 0; - kbdev->pm.backend.metrics.values.time_idle = 0; - kbdev->pm.backend.metrics.values.time_in_protm = 0; perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR; @@ -126,39 +123,21 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) /* We need the GPU_ACTIVE counter */ perf_counter.idx = GPU_ACTIVE_CNT_IDX; - err = kbase_ipa_control_register( - kbdev, &perf_counter, NUM_PERF_COUNTERS, - &kbdev->pm.backend.metrics.ipa_control_client); + err = kbase_ipa_control_register(kbdev, &perf_counter, NUM_PERF_COUNTERS, + &kbdev->pm.backend.metrics.ipa_control_client); if (err) { - dev_err(kbdev->dev, - "Failed to register IPA with kbase_ipa_control: err=%d", - err); + dev_err(kbdev->dev, "Failed to register IPA with kbase_ipa_control: err=%d", err); return -1; } #else KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); - - kbdev->pm.backend.metrics.gpu_active = false; - kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; - kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; - kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; - kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; - kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; - - kbdev->pm.backend.metrics.values.time_busy = 0; - kbdev->pm.backend.metrics.values.time_idle = 0; - kbdev->pm.backend.metrics.values.busy_cl[0] = 0; - kbdev->pm.backend.metrics.values.busy_cl[1] = 0; - kbdev->pm.backend.metrics.values.busy_gl = 0; - #endif spin_lock_init(&kbdev->pm.backend.metrics.lock); #ifdef CONFIG_MALI_BIFROST_DVFS - hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); + hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); kbdev->pm.backend.metrics.timer.function = dvfs_callback; kbdev->pm.backend.metrics.initialized = true; atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); @@ -189,8 +168,9 @@ void kbasep_pm_metrics_term(struct kbase_device *kbdev) #endif /* CONFIG_MALI_BIFROST_DVFS */ #if MALI_USE_CSF - kbase_ipa_control_unregister( - kbdev, kbdev->pm.backend.metrics.ipa_control_client); + kbase_ipa_control_unregister(kbdev, kbdev->pm.backend.metrics.ipa_control_client); +#else + CSTD_UNUSED(kbdev); #endif } @@ -213,9 +193,8 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) /* Query IPA_CONTROL for the latest GPU-active and protected-time * info. */ - err = kbase_ipa_control_query( - kbdev, kbdev->pm.backend.metrics.ipa_control_client, - &gpu_active_counter, 1, &protected_time); + err = kbase_ipa_control_query(kbdev, kbdev->pm.backend.metrics.ipa_control_client, + &gpu_active_counter, 1, &protected_time); /* Read the timestamp after reading the GPU_ACTIVE counter value. * This ensures the time gap between the 2 reads is consistent for @@ -226,15 +205,13 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) now = ktime_get_raw(); if (err) { - dev_err(kbdev->dev, - "Failed to query the increment of GPU_ACTIVE counter: err=%d", + dev_err(kbdev->dev, "Failed to query the increment of GPU_ACTIVE counter: err=%d", err); } else { u64 diff_ns; s64 diff_ns_signed; u32 ns_time; - ktime_t diff = ktime_sub( - now, kbdev->pm.backend.metrics.time_period_start); + ktime_t diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); diff_ns_signed = ktime_to_ns(diff); @@ -294,25 +271,21 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) */ gpu_active_counter = MIN(gpu_active_counter, ns_time); - kbdev->pm.backend.metrics.values.time_busy += - gpu_active_counter; + kbdev->pm.backend.metrics.values.time_busy += gpu_active_counter; - kbdev->pm.backend.metrics.values.time_idle += - ns_time - gpu_active_counter; + kbdev->pm.backend.metrics.values.time_idle += ns_time - gpu_active_counter; /* Also make time in protected mode available explicitly, * so users of this data have this info, too. */ - kbdev->pm.backend.metrics.values.time_in_protm += - protected_time; + kbdev->pm.backend.metrics.values.time_in_protm += protected_time; } kbdev->pm.backend.metrics.time_period_start = now; } #endif /* defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) */ #else -static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, - ktime_t now) +static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, ktime_t now) { ktime_t diff; @@ -323,7 +296,7 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, return; if (kbdev->pm.backend.metrics.gpu_active) { - u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + u32 ns_time = (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); kbdev->pm.backend.metrics.values.time_busy += ns_time; if (kbdev->pm.backend.metrics.active_cl_ctx[0]) @@ -343,11 +316,10 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, kbdev->pm.backend.metrics.time_period_start = now; } -#endif /* MALI_USE_CSF */ +#endif /* MALI_USE_CSF */ #if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) -void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, - struct kbasep_pm_metrics *last, +void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, struct kbasep_pm_metrics *last, struct kbasep_pm_metrics *diff) { struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values; @@ -394,11 +366,9 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) diff = &kbdev->pm.backend.metrics.dvfs_diff; - kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, - diff); + kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff); - utilisation = (100 * diff->time_busy) / - max(diff->time_busy + diff->time_idle, 1u); + utilisation = (100 * diff->time_busy) / max(diff->time_busy + diff->time_idle, 1u); #if !MALI_USE_CSF busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); @@ -407,8 +377,7 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; - kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, - util_cl_share); + kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share); #else /* Note that, at present, we don't pass protected-mode time to the * platform here. It's unlikely to be useful, however, as the platform @@ -451,7 +420,6 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev) atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED); } - #endif /* CONFIG_MALI_BIFROST_DVFS */ #if !MALI_USE_CSF @@ -484,12 +452,12 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) katom = kbase_gpu_inspect(kbdev, js, 1); - if (katom && katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) { if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { - int device_nr = (katom->core_req & - BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) - ? katom->device_nr : 0; + int device_nr = + (katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ? + katom->device_nr : + 0; if (!WARN_ON(device_nr >= 2)) kbdev->pm.backend.metrics.active_cl_ctx[device_nr] = 1; } else { diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c index 4788f04132c1..59d38cad0031 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ */ #include -#include +#include #include #include #include @@ -51,9 +51,11 @@ void kbase_pm_policy_init(struct kbase_device *kbdev) struct device_node *np = kbdev->dev->of_node; const char *power_policy_name; unsigned long flags; - int i; + unsigned int i; - if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) { + /* Read "power-policy" property and fallback to "power_policy" if not found */ + if ((of_property_read_string(np, "power-policy", &power_policy_name) == 0) || + (of_property_read_string(np, "power_policy", &power_policy_name) == 0)) { for (i = 0; i < ARRAY_SIZE(all_policy_list); i++) if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) { default_policy = all_policy_list[i]; @@ -103,13 +105,13 @@ void kbase_pm_update_active(struct kbase_device *kbdev) active = backend->pm_current_policy->get_core_active(kbdev); WARN((kbase_pm_is_active(kbdev) && !active), - "GPU is active but policy '%s' is indicating that it can be powered off", - kbdev->pm.backend.pm_current_policy->name); + "GPU is active but policy '%s' is indicating that it can be powered off", + kbdev->pm.backend.pm_current_policy->name); if (active) { /* Power on the GPU and any cores requested by the policy */ if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off && - pm->backend.poweroff_wait_in_progress) { + pm->backend.poweroff_wait_in_progress) { KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); pm->backend.poweron_required = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -207,7 +209,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) #endif if (kbdev->pm.backend.shaders_desired != shaders_desired) { - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, + kbdev->pm.backend.shaders_desired); kbdev->pm.backend.shaders_desired = shaders_desired; kbase_pm_update_state(kbdev); @@ -225,9 +228,9 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -int kbase_pm_list_policies(struct kbase_device *kbdev, - const struct kbase_pm_policy * const **list) +int kbase_pm_list_policies(struct kbase_device *kbdev, const struct kbase_pm_policy *const **list) { + CSTD_UNUSED(kbdev); if (list) *list = all_policy_list; @@ -259,32 +262,29 @@ static int policy_change_wait_for_L2_off(struct kbase_device *kbdev) * for host control of shader cores. */ #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); + remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbdev->pm.backend.l2_state == KBASE_L2_OFF, + timeout); #else - remaining = wait_event_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); + remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); #endif if (!remaining) { err = -ETIMEDOUT; } else if (remaining < 0) { - dev_info(kbdev->dev, - "Wait for L2_off got interrupted"); + dev_info(kbdev->dev, "Wait for L2_off got interrupted"); err = (int)remaining; } - dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__, - err, kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state); + dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__, err, + kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state); return err; } #endif -void kbase_pm_set_policy(struct kbase_device *kbdev, - const struct kbase_pm_policy *new_policy) +void kbase_pm_set_policy(struct kbase_device *kbdev, const struct kbase_pm_policy *new_policy) { const struct kbase_pm_policy *old_policy; unsigned long flags; @@ -294,6 +294,8 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, bool reset_gpu = false; bool reset_op_prevented = true; struct kbase_csf_scheduler *scheduler = NULL; + u32 pwroff; + bool switching_to_always_on; #endif KBASE_DEBUG_ASSERT(kbdev != NULL); @@ -302,6 +304,18 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); #if MALI_USE_CSF + pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); + switching_to_always_on = new_policy == &kbase_pm_always_on_policy_ops; + if (pwroff == 0 && !switching_to_always_on) { + dev_warn( + kbdev->dev, + "power_policy: cannot switch away from always_on with mcu_shader_pwroff_timeout set to 0\n"); + dev_warn( + kbdev->dev, + "power_policy: resetting mcu_shader_pwroff_timeout to default value to switch policy from always_on\n"); + kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev); + } + scheduler = &kbdev->csf.scheduler; KBASE_DEBUG_ASSERT(scheduler != NULL); @@ -372,8 +386,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, if (old_policy->term) old_policy->term(kbdev); - memset(&kbdev->pm.backend.pm_policy_data, 0, - sizeof(union kbase_pm_policy_data)); + memset(&kbdev->pm.backend.pm_policy_data, 0, sizeof(union kbase_pm_policy_data)); KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, new_policy->id); if (new_policy->init) diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h index e8113659b92e..aa9ed9c58913 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -80,8 +80,7 @@ void kbase_pm_update_cores(struct kbase_device *kbdev); * Return: true if the request to the HW was successfully made else false if the * request is still pending. */ -static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, - bool shader_required) +static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, bool shader_required) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -89,14 +88,14 @@ static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, * available, and shaders are definitely not powered. */ if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON && - kbdev->pm.backend.l2_state != KBASE_L2_ON && - kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE) + kbdev->pm.backend.l2_state != KBASE_L2_ON && + kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE) return false; if (shader_required && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK) + kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK) return false; return true; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c index 1b33461796e2..f4ff61ff5eb6 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c @@ -29,28 +29,48 @@ #include #include #include +#include -void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, - u64 *cycle_counter, - u64 *system_time, - struct timespec64 *ts) +struct kbase_timeout_info { + char *selector_str; + u64 timeout_cycles; +}; + +#if MALI_USE_CSF +static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { + [CSF_FIRMWARE_TIMEOUT] = { "CSF_FIRMWARE_TIMEOUT", MIN(CSF_FIRMWARE_TIMEOUT_CYCLES, + CSF_FIRMWARE_PING_TIMEOUT_CYCLES) }, + [CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES }, + [CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES }, + [CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES }, + [CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT", + CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES }, + [CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT", + CSF_FIRMWARE_PING_TIMEOUT_CYCLES }, + [CSF_SCHED_PROTM_PROGRESS_TIMEOUT] = { "CSF_SCHED_PROTM_PROGRESS_TIMEOUT", + DEFAULT_PROGRESS_TIMEOUT_CYCLES }, + [MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT", + MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES }, + [KCPU_FENCE_SIGNAL_TIMEOUT] = { "KCPU_FENCE_SIGNAL_TIMEOUT", + KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES }, +}; +#else +static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { + [MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT", + MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES }, + [JM_DEFAULT_JS_FREE_TIMEOUT] = { "JM_DEFAULT_JS_FREE_TIMEOUT", + JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES }, +}; +#endif + +void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec64 *ts) { - u32 hi1, hi2; - if (cycle_counter) *cycle_counter = kbase_backend_get_cycle_cnt(kbdev); if (system_time) { - /* Read hi, lo, hi to ensure a coherent u64 */ - do { - hi1 = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TIMESTAMP_HI)); - *system_time = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TIMESTAMP_LO)); - hi2 = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TIMESTAMP_HI)); - } while (hi1 != hi2); - *system_time |= (((u64) hi1) << 32); + *system_time = kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(TIMESTAMP)); } /* Record the CPU's idea of current time */ @@ -80,7 +100,7 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev) const unsigned long remaining = jiffies + msecs_to_jiffies(timeout); while (time_is_after_jiffies(remaining)) { - if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + if ((kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & GPU_STATUS_CYCLE_COUNT_ACTIVE)) { success = true; break; @@ -91,129 +111,150 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev) } #endif -void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec64 *ts) +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, u64 *system_time, + struct timespec64 *ts) { #if !MALI_USE_CSF kbase_pm_request_gpu_cycle_counter(kbdev); - WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON, - "L2 not powered up"); - WARN_ONCE((!timedwait_cycle_count_active(kbdev)), - "Timed out on CYCLE_COUNT_ACTIVE"); + WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON, "L2 not powered up"); + WARN_ONCE((!timedwait_cycle_count_active(kbdev)), "Timed out on CYCLE_COUNT_ACTIVE"); #endif - kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time, - ts); + kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time, ts); #if !MALI_USE_CSF kbase_pm_release_gpu_cycle_counter(kbdev); #endif } -unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, - enum kbase_timeout_selector selector) +static u64 kbase_device_get_scaling_frequency(struct kbase_device *kbdev) { + u64 freq_khz = kbdev->lowest_gpu_freq_khz; + + if (!freq_khz) { + dev_dbg(kbdev->dev, + "Lowest frequency uninitialized! Using reference frequency for scaling"); + return DEFAULT_REF_TIMEOUT_FREQ_KHZ; + } + + return freq_khz; +} + +void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + unsigned int timeout_ms) +{ + char *selector_str; + + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + selector = KBASE_DEFAULT_TIMEOUT; + selector_str = timeout_info[selector].selector_str; + dev_warn(kbdev->dev, + "Unknown timeout selector passed, falling back to default: %s\n", + timeout_info[selector].selector_str); + } + selector_str = timeout_info[selector].selector_str; + + kbdev->backend_time.device_scaled_timeouts[selector] = timeout_ms; + dev_dbg(kbdev->dev, "\t%-35s: %ums\n", selector_str, timeout_ms); +} + +void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + u64 timeout_cycles, u32 cycle_multiplier) +{ + u64 final_cycles; + u64 timeout; + u64 freq_khz = kbase_device_get_scaling_frequency(kbdev); + + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + selector = KBASE_DEFAULT_TIMEOUT; + dev_warn(kbdev->dev, + "Unknown timeout selector passed, falling back to default: %s\n", + timeout_info[selector].selector_str); + } + + /* If the multiplication overflows, we will have unsigned wrap-around, and so might + * end up with a shorter timeout. In those cases, we then want to have the largest + * timeout possible that will not run into these issues. Note that this will not + * wait for U64_MAX/frequency ms, as it will be clamped to a max of UINT_MAX + * milliseconds by subsequent steps. + */ + if (check_mul_overflow(timeout_cycles, (u64)cycle_multiplier, &final_cycles)) + final_cycles = U64_MAX; + /* Timeout calculation: * dividing number of cycles by freq in KHz automatically gives value * in milliseconds. nr_cycles will have to be multiplied by 1e3 to * get result in microseconds, and 1e6 to get result in nanoseconds. */ + timeout = div_u64(final_cycles, freq_khz); - u64 timeout, nr_cycles = 0; - u64 freq_khz; - - /* Only for debug messages, safe default in case it's mis-maintained */ - const char *selector_str = "(unknown)"; - - if (!kbdev->lowest_gpu_freq_khz) { + if (unlikely(timeout > UINT_MAX)) { dev_dbg(kbdev->dev, - "Lowest frequency uninitialized! Using reference frequency for scaling"); - freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; - } else { - freq_khz = kbdev->lowest_gpu_freq_khz; - } - - switch (selector) { - case MMU_AS_INACTIVE_WAIT_TIMEOUT: - selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT"; - nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES; - break; - case KBASE_TIMEOUT_SELECTOR_COUNT: - default: -#if !MALI_USE_CSF - WARN(1, "Invalid timeout selector used! Using default value"); - nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES; - break; - case JM_DEFAULT_JS_FREE_TIMEOUT: - selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT"; - nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES; - break; -#else - /* Use Firmware timeout if invalid selection */ - WARN(1, - "Invalid timeout selector used! Using CSF Firmware timeout"); - fallthrough; - case CSF_FIRMWARE_TIMEOUT: - selector_str = "CSF_FIRMWARE_TIMEOUT"; - /* Any FW timeout cannot be longer than the FW ping interval, after which - * the firmware_aliveness_monitor will be triggered and may restart - * the GPU if the FW is unresponsive. - */ - nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES); - - if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES) - dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n", - selector_str); - break; - case CSF_PM_TIMEOUT: - selector_str = "CSF_PM_TIMEOUT"; - nr_cycles = CSF_PM_TIMEOUT_CYCLES; - break; - case CSF_GPU_RESET_TIMEOUT: - selector_str = "CSF_GPU_RESET_TIMEOUT"; - nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES; - break; - case CSF_CSG_SUSPEND_TIMEOUT: - selector_str = "CSF_CSG_SUSPEND_TIMEOUT"; - nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES; - break; - case CSF_FIRMWARE_BOOT_TIMEOUT: - selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES; - break; - case CSF_FIRMWARE_PING_TIMEOUT: - selector_str = "CSF_FIRMWARE_PING_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES; - break; - case CSF_SCHED_PROTM_PROGRESS_TIMEOUT: - selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT"; - nr_cycles = kbase_csf_timeout_get(kbdev); - break; -#endif - } - - timeout = div_u64(nr_cycles, freq_khz); - if (WARN(timeout > UINT_MAX, - "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", - (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz)) + "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", + timeout, timeout_info[selector].selector_str, + kbase_device_get_scaling_frequency(kbdev)); timeout = UINT_MAX; - return (unsigned int)timeout; + } + + kbase_device_set_timeout_ms(kbdev, selector, (unsigned int)timeout); +} + +/** + * kbase_timeout_scaling_init - Initialize the table of scaled timeout + * values associated with a @kbase_device. + * + * @kbdev: KBase device pointer. + * + * Return: 0 on success, negative error code otherwise. + */ +static int kbase_timeout_scaling_init(struct kbase_device *kbdev) +{ + int err; + enum kbase_timeout_selector selector; + + /* First, we initialize the minimum and maximum device frequencies, which + * are used to compute the timeouts. + */ + err = kbase_pm_gpu_freq_init(kbdev); + if (unlikely(err < 0)) { + dev_dbg(kbdev->dev, "Could not initialize GPU frequency\n"); + return err; + } + + dev_dbg(kbdev->dev, "Scaling kbase timeouts:\n"); + for (selector = 0; selector < KBASE_TIMEOUT_SELECTOR_COUNT; selector++) { + u32 cycle_multiplier = 1; + u64 nr_cycles = timeout_info[selector].timeout_cycles; +#if MALI_USE_CSF + /* Special case: the scheduler progress timeout can be set manually, + * and does not have a canonical length defined in the headers. Hence, + * we query it once upon startup to get a baseline, and change it upon + * every invocation of the appropriate functions + */ + if (selector == CSF_SCHED_PROTM_PROGRESS_TIMEOUT) + nr_cycles = kbase_csf_timeout_get(kbdev); +#endif + + /* Since we are in control of the iteration bounds for the selector, + * we don't have to worry about bounds checking when setting the timeout. + */ + kbase_device_set_timeout(kbdev, selector, nr_cycles, cycle_multiplier); + } + return 0; +} + +unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector) +{ + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + dev_warn(kbdev->dev, "Querying wrong selector, falling back to default\n"); + selector = KBASE_DEFAULT_TIMEOUT; + } + + return kbdev->backend_time.device_scaled_timeouts[selector]; } KBASE_EXPORT_TEST_API(kbase_get_timeout_ms); u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev) { - u32 hi1, hi2, lo; - - /* Read hi, lo, hi to ensure a coherent u64 */ - do { - hi1 = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CYCLE_COUNT_HI)); - lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CYCLE_COUNT_LO)); - hi2 = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CYCLE_COUNT_HI)); - } while (hi1 != hi2); - - return lo | (((u64) hi1) << 32); + return kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(CYCLE_COUNT)); } #if MALI_USE_CSF @@ -247,18 +288,21 @@ static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_t int kbase_backend_time_init(struct kbase_device *kbdev) { + int err = 0; #if MALI_USE_CSF u64 cpu_ts = 0; u64 gpu_ts = 0; u64 freq; u64 common_factor; + kbase_pm_register_access_enable(kbdev); get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); freq = arch_timer_get_cntfrq(); if (!freq) { dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); - return -EINVAL; + err = -EINVAL; + goto disable_registers; } common_factor = gcd(NSEC_PER_SEC, freq); @@ -268,12 +312,23 @@ int kbase_backend_time_init(struct kbase_device *kbdev) if (!kbdev->backend_time.divisor) { dev_warn(kbdev->dev, "CPU to GPU divisor is zero!"); - return -EINVAL; + err = -EINVAL; + goto disable_registers; } kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor); #endif - return 0; + if (kbase_timeout_scaling_init(kbdev)) { + dev_warn(kbdev->dev, "Could not initialize timeout scaling"); + err = -EINVAL; + } + +#if MALI_USE_CSF +disable_registers: + kbase_pm_register_access_disable(kbdev); +#endif + + return err; } diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp index 0a61a12d902a..72dd15f2d8cc 100644 --- a/drivers/gpu/arm/bifrost/build.bp +++ b/drivers/gpu/arm/bifrost/build.bp @@ -35,9 +35,6 @@ bob_defaults { "CONFIG_GPU_HWVER={{.hwver}}", ], }, - mali_platform_dt_pin_rst: { - kbuild_options: ["CONFIG_MALI_PLATFORM_DT_PIN_RST=y"], - }, gpu_has_csf: { kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"], }, @@ -62,11 +59,11 @@ bob_defaults { mali_dma_buf_legacy_compat: { kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], }, - large_page_alloc_override: { - kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC_OVERRIDE=y"], + page_migration_support: { + kbuild_options: ["CONFIG_PAGE_MIGRATION_SUPPORT=y"], }, - large_page_alloc: { - kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"], + large_page_support: { + kbuild_options: ["CONFIG_LARGE_PAGE_SUPPORT=y"], }, mali_memory_fully_backed: { kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], @@ -101,9 +98,6 @@ bob_defaults { mali_system_trace: { kbuild_options: ["CONFIG_MALI_BIFROST_SYSTEM_TRACE=y"], }, - buslog: { - kbuild_options: ["CONFIG_MALI_BUSLOG=y"], - }, cinstr_vector_dump: { kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"], }, @@ -140,6 +134,15 @@ bob_defaults { mali_coresight: { kbuild_options: ["CONFIG_MALI_CORESIGHT=y"], }, + mali_fw_trace_mode_manual: { + kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_MANUAL=y"], + }, + mali_fw_trace_mode_auto_print: { + kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_AUTO_PRINT=y"], + }, + mali_fw_trace_mode_auto_discard: { + kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_AUTO_DISCARD=y"], + }, kbuild_options: [ "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", "MALI_CUSTOMER_RELEASE={{.release}}", @@ -201,6 +204,7 @@ bob_kernel_module { "platform/*/*/*/*.h", "platform/*/*/*/Kbuild", "thirdparty/*.c", + "thirdparty/*.h", "thirdparty/Kbuild", "debug/*.c", "debug/*.h", @@ -211,6 +215,11 @@ bob_kernel_module { "gpu/*.c", "gpu/*.h", "gpu/Kbuild", + "hw_access/*.c", + "hw_access/*.h", + "hw_access/*/*.c", + "hw_access/*/*.h", + "hw_access/Kbuild", "tl/*.c", "tl/*.h", "tl/Kbuild", @@ -272,9 +281,4 @@ bob_kernel_module { "CONFIG_MALI_BIFROST=m", "CONFIG_MALI_KUTF=n", ], - buslog: { - extra_symbols: [ - "bus_logger", - ], - }, } diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c index 07d277b947d2..8b1410886b05 100644 --- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,12 +24,13 @@ */ #include -#include +#include #include #include #include #include #include +#include #if IS_ENABLED(CONFIG_DEBUG_FS) #include @@ -92,24 +93,20 @@ static const struct kbase_context_init context_init[] = { "Memory pool group initialization failed" }, { kbase_mem_evictable_init, kbase_mem_evictable_deinit, "Memory evictable initialization failed" }, - { kbase_context_mmu_init, kbase_context_mmu_term, - "MMU initialization failed" }, - { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, - "Memory alloc page failed" }, + { kbase_ctx_sched_init_ctx, NULL, NULL }, + { kbase_context_mmu_init, kbase_context_mmu_term, "MMU initialization failed" }, + { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, "Memory alloc page failed" }, { kbase_region_tracker_init, kbase_region_tracker_term, "Region tracker initialization failed" }, { kbase_sticky_resource_init, kbase_context_sticky_resource_term, "Sticky resource initialization failed" }, { kbase_jit_init, kbase_jit_term, "JIT initialization failed" }, - { kbase_csf_ctx_init, kbase_csf_ctx_term, - "CSF context initialization failed" }, + { kbase_csf_ctx_init, kbase_csf_ctx_term, "CSF context initialization failed" }, { kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list, "Adding kctx to device failed" }, }; -static void kbase_context_term_partial( - struct kbase_context *kctx, - unsigned int i) +static void kbase_context_term_partial(struct kbase_context *kctx, unsigned int i) { while (i-- > 0) { if (context_init[i].term) @@ -117,11 +114,10 @@ static void kbase_context_term_partial( } } -struct kbase_context *kbase_create_context(struct kbase_device *kbdev, - bool is_compat, - base_context_create_flags const flags, - unsigned long const api_version, - struct file *const filp) +struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, + base_context_create_flags const flags, + unsigned long const api_version, + struct kbase_file *const kfile) { struct kbase_context *kctx; unsigned int i = 0; @@ -140,9 +136,11 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, kctx->kbdev = kbdev; kctx->api_version = api_version; - kctx->filp = filp; + kctx->kfile = kfile; kctx->create_flags = flags; + memcpy(kctx->comm, current->comm, sizeof(current->comm)); + if (is_compat) kbase_ctx_flag_set(kctx, KCTX_COMPAT); #if defined(CONFIG_64BIT) @@ -157,8 +155,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, err = context_init[i].init(kctx); if (err) { - dev_err(kbdev->dev, "%s error = %d\n", - context_init[i].err_mes, err); + dev_err(kbdev->dev, "%s error = %d\n", context_init[i].err_mes, err); /* kctx should be freed by kbase_context_free(). * Otherwise it will result in memory leak. @@ -190,14 +187,22 @@ void kbase_destroy_context(struct kbase_context *kctx) * Customer side that a hang could occur if context termination is * not blocked until the resume of GPU device. */ - while (kbase_pm_context_active_handle_suspend( - kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { - dev_info(kbdev->dev, - "Suspend in progress when destroying context"); - wait_event(kbdev->pm.resume_wait, - !kbase_pm_is_suspending(kbdev)); + while (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + dev_info(kbdev->dev, "Suspend in progress when destroying context"); + wait_event(kbdev->pm.resume_wait, !kbase_pm_is_suspending(kbdev)); } + /* Have synchronized against the System suspend and incremented the + * pm.active_count. So any subsequent invocation of System suspend + * callback would get blocked. + * If System suspend callback was already in progress then the above loop + * would have waited till the System resume callback has begun. + * So wait for the System resume callback to also complete as we want to + * avoid context termination during System resume also. + */ + wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev)); + kbase_mem_pool_group_mark_dying(&kctx->mem_pools); kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c index 995a08e36f43..f2eefe9ddcd0 100644 --- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ */ #include -#include +#include #include #include #include @@ -81,8 +81,7 @@ static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx) static int kbase_context_kbase_timer_setup(struct kbase_context *kctx) { - kbase_timer_setup(&kctx->soft_job_timeout, - kbasep_soft_job_timeout_worker); + kbase_timer_setup(&kctx->soft_job_timeout, kbasep_soft_job_timeout_worker); return 0; } @@ -133,41 +132,33 @@ static const struct kbase_context_init context_init[] = { "Memory pool group initialization failed" }, { kbase_mem_evictable_init, kbase_mem_evictable_deinit, "Memory evictable initialization failed" }, - { kbase_context_mmu_init, kbase_context_mmu_term, - "MMU initialization failed" }, - { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, - "Memory alloc page failed" }, + { kbase_ctx_sched_init_ctx, NULL, NULL }, + { kbase_context_mmu_init, kbase_context_mmu_term, "MMU initialization failed" }, + { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, "Memory alloc page failed" }, { kbase_region_tracker_init, kbase_region_tracker_term, "Region tracker initialization failed" }, { kbase_sticky_resource_init, kbase_context_sticky_resource_term, "Sticky resource initialization failed" }, { kbase_jit_init, kbase_jit_term, "JIT initialization failed" }, - { kbase_context_kbase_kinstr_jm_init, - kbase_context_kbase_kinstr_jm_term, + { kbase_context_kbase_kinstr_jm_init, kbase_context_kbase_kinstr_jm_term, "JM instrumentation initialization failed" }, - { kbase_context_kbase_timer_setup, NULL, - "Timers initialization failed" }, - { kbase_event_init, kbase_event_cleanup, - "Event initialization failed" }, - { kbasep_js_kctx_init, kbasep_js_kctx_term, - "JS kctx initialization failed" }, + { kbase_context_kbase_timer_setup, NULL, "Timers initialization failed" }, + { kbase_event_init, kbase_event_cleanup, "Event initialization failed" }, + { kbasep_js_kctx_init, kbasep_js_kctx_term, "JS kctx initialization failed" }, { kbase_jd_init, kbase_jd_exit, "JD initialization failed" }, { kbase_context_submit_check, NULL, "Enabling job submission failed" }, #if IS_ENABLED(CONFIG_DEBUG_FS) - { kbase_debug_job_fault_context_init, - kbase_debug_job_fault_context_term, + { kbase_debug_job_fault_context_init, kbase_debug_job_fault_context_term, "Job fault context initialization failed" }, #endif + { kbasep_platform_context_init, kbasep_platform_context_term, + "Platform callback for kctx initialization failed" }, { NULL, kbase_context_flush_jobs, NULL }, { kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list, "Adding kctx to device failed" }, - { kbasep_platform_context_init, kbasep_platform_context_term, - "Platform callback for kctx initialization failed" }, }; -static void kbase_context_term_partial( - struct kbase_context *kctx, - unsigned int i) +static void kbase_context_term_partial(struct kbase_context *kctx, unsigned int i) { while (i-- > 0) { if (context_init[i].term) @@ -175,11 +166,10 @@ static void kbase_context_term_partial( } } -struct kbase_context *kbase_create_context(struct kbase_device *kbdev, - bool is_compat, - base_context_create_flags const flags, - unsigned long const api_version, - struct file *const filp) +struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, + base_context_create_flags const flags, + unsigned long const api_version, + struct kbase_file *const kfile) { struct kbase_context *kctx; unsigned int i = 0; @@ -198,7 +188,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, kctx->kbdev = kbdev; kctx->api_version = api_version; - kctx->filp = filp; + kctx->kfile = kfile; kctx->create_flags = flags; if (is_compat) @@ -215,8 +205,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, err = context_init[i].init(kctx); if (err) { - dev_err(kbdev->dev, "%s error = %d\n", - context_init[i].err_mes, err); + dev_err(kbdev->dev, "%s error = %d\n", context_init[i].err_mes, err); /* kctx should be freed by kbase_context_free(). * Otherwise it will result in memory leak. @@ -243,7 +232,7 @@ void kbase_destroy_context(struct kbase_context *kctx) if (WARN_ON(!kbdev)) return; - /* Context termination could happen whilst the system suspend of + /* Context termination could happen whilst the system suspend of * the GPU device is ongoing or has completed. It has been seen on * Customer side that a hang could occur if context termination is * not blocked until the resume of GPU device. @@ -251,13 +240,22 @@ void kbase_destroy_context(struct kbase_context *kctx) #ifdef CONFIG_MALI_ARBITER_SUPPORT atomic_inc(&kbdev->pm.gpu_users_waiting); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ - while (kbase_pm_context_active_handle_suspend( - kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { - dev_dbg(kbdev->dev, - "Suspend in progress when destroying context"); - wait_event(kbdev->pm.resume_wait, - !kbase_pm_is_suspending(kbdev)); + while (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + dev_dbg(kbdev->dev, "Suspend in progress when destroying context"); + wait_event(kbdev->pm.resume_wait, !kbase_pm_is_suspending(kbdev)); } + + /* Have synchronized against the System suspend and incremented the + * pm.active_count. So any subsequent invocation of System suspend + * callback would get blocked. + * If System suspend callback was already in progress then the above loop + * would have waited till the System resume callback has begun. + * So wait for the System resume callback to also complete as we want to + * avoid context termination during System resume also. + */ + wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev)); + #ifdef CONFIG_MALI_ARBITER_SUPPORT atomic_dec(&kbdev->pm.gpu_users_waiting); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c index 88be6c2e7587..41f129624245 100644 --- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c @@ -25,12 +25,16 @@ #include #if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE #include +#endif + +#if KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE +#include #else #include #endif #include -#include +#include #include #include #include @@ -53,8 +57,7 @@ static struct kbase_process *find_process_node(struct rb_node *node, pid_t tgid) /* Check if the kctx creation request is from a existing process.*/ while (node) { - struct kbase_process *prcs_node = - rb_entry(node, struct kbase_process, kprcs_node); + struct kbase_process *prcs_node = rb_entry(node, struct kbase_process, kprcs_node); if (prcs_node->tgid == tgid) { kprcs = prcs_node; break; @@ -110,8 +113,7 @@ static int kbase_insert_kctx_to_process(struct kbase_context *kctx) struct kbase_process *prcs_node; parent = *new; - prcs_node = rb_entry(parent, struct kbase_process, - kprcs_node); + prcs_node = rb_entry(parent, struct kbase_process, kprcs_node); if (tgid < prcs_node->tgid) new = &(*new)->rb_left; else @@ -135,19 +137,15 @@ int kbase_context_common_init(struct kbase_context *kctx) /* creating a context is considered a disjoint event */ kbase_disjoint_event(kctx->kbdev); - kctx->process_mm = NULL; - kctx->task = NULL; - atomic_set(&kctx->nonmapped_pages, 0); - atomic_set(&kctx->permanent_mapped_pages, 0); kctx->tgid = task_tgid_vnr(current); kctx->pid = task_pid_vnr(current); /* Check if this is a Userspace created context */ - if (likely(kctx->filp)) { + if (likely(kctx->kfile)) { struct pid *pid_struct; rcu_read_lock(); - pid_struct = find_get_pid(kctx->tgid); + pid_struct = get_pid(task_tgid(current)); if (likely(pid_struct)) { struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID); @@ -158,16 +156,14 @@ int kbase_context_common_init(struct kbase_context *kctx) get_task_struct(task); kctx->task = task; } else { - dev_err(kctx->kbdev->dev, - "Failed to get task pointer for %s/%d", + dev_err(kctx->kbdev->dev, "Failed to get task pointer for %s/%d", current->comm, kctx->pid); err = -ESRCH; } put_pid(pid_struct); } else { - dev_err(kctx->kbdev->dev, - "Failed to get pid pointer for %s/%d", + dev_err(kctx->kbdev->dev, "Failed to get pid pointer for %s/%d", current->comm, kctx->pid); err = -ESRCH; } @@ -180,8 +176,6 @@ int kbase_context_common_init(struct kbase_context *kctx) kctx->process_mm = current->mm; } - atomic_set(&kctx->used_pages, 0); - mutex_init(&kctx->reg_lock); spin_lock_init(&kctx->mem_partials_lock); @@ -190,21 +184,6 @@ int kbase_context_common_init(struct kbase_context *kctx) spin_lock_init(&kctx->waiting_soft_jobs_lock); INIT_LIST_HEAD(&kctx->waiting_soft_jobs); - init_waitqueue_head(&kctx->event_queue); - atomic_set(&kctx->event_count, 0); - -#if !MALI_USE_CSF - atomic_set(&kctx->event_closed, false); -#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) - atomic_set(&kctx->jctx.work_id, 0); -#endif -#endif - -#if MALI_USE_CSF - atomic64_set(&kctx->num_fixable_allocs, 0); - atomic64_set(&kctx->num_fixed_allocs, 0); -#endif - kbase_gpu_vm_lock(kctx); bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); kbase_gpu_vm_unlock(kctx); @@ -215,9 +194,8 @@ int kbase_context_common_init(struct kbase_context *kctx) err = kbase_insert_kctx_to_process(kctx); mutex_unlock(&kctx->kbdev->kctx_list_lock); if (err) { - dev_err(kctx->kbdev->dev, - "(err:%d) failed to insert kctx to kbase_process", err); - if (likely(kctx->filp)) { + dev_err(kctx->kbdev->dev, "(err:%d) failed to insert kctx to kbase_process", err); + if (likely(kctx->kfile)) { mmdrop(kctx->process_mm); put_task_struct(kctx->task); } @@ -298,8 +276,7 @@ void kbase_context_common_term(struct kbase_context *kctx) pages = atomic_read(&kctx->used_pages); if (pages != 0) - dev_warn(kctx->kbdev->dev, - "%s: %d pages in use!\n", __func__, pages); + dev_warn(kctx->kbdev->dev, "%s: %d pages in use!\n", __func__, pages); WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); @@ -307,7 +284,7 @@ void kbase_context_common_term(struct kbase_context *kctx) kbase_remove_kctx_from_process(kctx); mutex_unlock(&kctx->kbdev->kctx_list_lock); - if (likely(kctx->filp)) { + if (likely(kctx->kfile)) { mmdrop(kctx->process_mm); put_task_struct(kctx->task); } @@ -328,9 +305,8 @@ void kbase_context_mem_pool_group_term(struct kbase_context *kctx) int kbase_context_mmu_init(struct kbase_context *kctx) { - return kbase_mmu_init( - kctx->kbdev, &kctx->mmu, kctx, - kbase_context_mmu_group_id_get(kctx->create_flags)); + return kbase_mmu_init(kctx->kbdev, &kctx->mmu, kctx, + kbase_context_mmu_group_id_get(kctx->create_flags)); } void kbase_context_mmu_term(struct kbase_context *kctx) @@ -342,7 +318,7 @@ int kbase_context_mem_alloc_page(struct kbase_context *kctx) { struct page *p; - p = kbase_mem_alloc_page(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]); + p = kbase_mem_alloc_page(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], false); if (!p) return -ENOMEM; @@ -354,10 +330,8 @@ int kbase_context_mem_alloc_page(struct kbase_context *kctx) void kbase_context_mem_pool_free(struct kbase_context *kctx) { /* drop the aliasing sink page now that it can't be mapped anymore */ - kbase_mem_pool_free( - &kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], - as_page(kctx->aliasing_sink_page), - false); + kbase_mem_pool_free(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], + as_page(kctx->aliasing_sink_page), false); } void kbase_context_sticky_resource_term(struct kbase_context *kctx) @@ -369,18 +343,15 @@ void kbase_context_sticky_resource_term(struct kbase_context *kctx) /* free pending region setups */ pending_regions_to_clean = KBASE_COOKIE_MASK; - bitmap_andnot(&pending_regions_to_clean, &pending_regions_to_clean, - kctx->cookies, BITS_PER_LONG); + bitmap_andnot(&pending_regions_to_clean, &pending_regions_to_clean, kctx->cookies, + BITS_PER_LONG); while (pending_regions_to_clean) { - unsigned int cookie = find_first_bit(&pending_regions_to_clean, - BITS_PER_LONG); + unsigned int cookie = find_first_bit(&pending_regions_to_clean, BITS_PER_LONG); if (!WARN_ON(!kctx->pending_regions[cookie])) { dev_dbg(kctx->kbdev->dev, "Freeing pending unmapped region\n"); - kbase_mem_phy_alloc_put( - kctx->pending_regions[cookie]->cpu_alloc); - kbase_mem_phy_alloc_put( - kctx->pending_regions[cookie]->gpu_alloc); + kbase_mem_phy_alloc_put(kctx->pending_regions[cookie]->cpu_alloc); + kbase_mem_phy_alloc_put(kctx->pending_regions[cookie]->gpu_alloc); kfree(kctx->pending_regions[cookie]); kctx->pending_regions[cookie] = NULL; @@ -390,3 +361,10 @@ void kbase_context_sticky_resource_term(struct kbase_context *kctx) } kbase_gpu_vm_unlock(kctx); } + +bool kbase_ctx_compat_mode(struct kbase_context *kctx) +{ + return !IS_ENABLED(CONFIG_64BIT) || + (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT)); +} +KBASE_EXPORT_TEST_API(kbase_ctx_compat_mode); diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h index 7c90e2708fa3..939eb9bbd65e 100644 --- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h @@ -56,8 +56,9 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx); * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. * @api_version: Application program interface version, as encoded in * a single integer by the KBASE_API_VERSION macro. - * @filp: Pointer to the struct file corresponding to device file - * /dev/malixx instance, passed to the file's open method. + * @kfile: Pointer to the object representing the /dev/malixx device + * file instance. Shall be passed as NULL for internally created + * contexts. * * Up to one context can be created for each client that opens the device file * /dev/malixx. Context creation is deferred until a special ioctl() system call @@ -65,11 +66,10 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx); * * Return: new kbase context or NULL on failure */ -struct kbase_context * -kbase_create_context(struct kbase_device *kbdev, bool is_compat, - base_context_create_flags const flags, - unsigned long api_version, - struct file *filp); +struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, + base_context_create_flags const flags, + unsigned long api_version, + struct kbase_file *const kfile); /** * kbase_destroy_context - Destroy a kernel base context. @@ -86,8 +86,7 @@ void kbase_destroy_context(struct kbase_context *kctx); * * Return: true if @flag is set on @kctx, false if not. */ -static inline bool kbase_ctx_flag(struct kbase_context *kctx, - enum kbase_context_flags flag) +static inline bool kbase_ctx_flag(struct kbase_context *kctx, enum kbase_context_flags flag) { return atomic_read(&kctx->flags) & flag; } @@ -99,11 +98,7 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx, * * Return: True if needs to maintain compatibility, False otherwise. */ -static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx) -{ - return !IS_ENABLED(CONFIG_64BIT) || - (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT)); -} +bool kbase_ctx_compat_mode(struct kbase_context *kctx); /** * kbase_ctx_flag_clear - Clear @flag on @kctx @@ -116,8 +111,7 @@ static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx) * Some flags have locking requirements, check the documentation for the * respective flags. */ -static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, - enum kbase_context_flags flag) +static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, enum kbase_context_flags flag) { atomic_andnot(flag, &kctx->flags); } @@ -133,8 +127,7 @@ static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, * Some flags have locking requirements, check the documentation for the * respective flags. */ -static inline void kbase_ctx_flag_set(struct kbase_context *kctx, - enum kbase_context_flags flag) +static inline void kbase_ctx_flag_set(struct kbase_context *kctx, enum kbase_context_flags flag) { atomic_or(flag, &kctx->flags); } diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h index 1cde7394c705..8d9b39419586 100644 --- a/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,6 +36,14 @@ struct kbase_context_init { char *err_mes; }; +/** + * kbase_context_common_init() - Initialize kbase context + * @kctx: Pointer to the kbase context to be initialized. + * + * This function must be called only when a kbase context is instantiated. + * + * Return: 0 on success. + */ int kbase_context_common_init(struct kbase_context *kctx); void kbase_context_common_term(struct kbase_context *kctx); diff --git a/drivers/gpu/arm/bifrost/csf/Kbuild b/drivers/gpu/arm/bifrost/csf/Kbuild index 44217dba10c0..5df35864efc7 100644 --- a/drivers/gpu/arm/bifrost/csf/Kbuild +++ b/drivers/gpu/arm/bifrost/csf/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -19,6 +19,7 @@ # bifrost_kbase-y += \ + csf/mali_kbase_csf_util.o \ csf/mali_kbase_csf_firmware_cfg.o \ csf/mali_kbase_csf_trace_buffer.o \ csf/mali_kbase_csf.o \ @@ -29,11 +30,15 @@ bifrost_kbase-y += \ csf/mali_kbase_csf_tl_reader.o \ csf/mali_kbase_csf_heap_context_alloc.o \ csf/mali_kbase_csf_reset_gpu.o \ + csf/mali_kbase_csf_csg.o \ csf/mali_kbase_csf_csg_debugfs.o \ csf/mali_kbase_csf_kcpu_debugfs.o \ + csf/mali_kbase_csf_sync.o \ csf/mali_kbase_csf_sync_debugfs.o \ + csf/mali_kbase_csf_kcpu_fence_debugfs.o \ csf/mali_kbase_csf_protected_memory.o \ csf/mali_kbase_csf_tiler_heap_debugfs.o \ + csf/mali_kbase_csf_cpu_queue.o \ csf/mali_kbase_csf_cpu_queue_debugfs.o \ csf/mali_kbase_csf_event.o \ csf/mali_kbase_csf_firmware_log.o \ diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c index c81d0a5a7236..1489d8c1971b 100644 --- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c +++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -64,12 +64,19 @@ * struct kbase_ipa_control_listener_data - Data for the GPU clock frequency * listener * - * @listener: GPU clock frequency listener. - * @kbdev: Pointer to kbase device. + * @listener: GPU clock frequency listener. + * @kbdev: Pointer to kbase device. + * @clk_chg_wq: Dedicated workqueue to process the work item corresponding to + * a clock rate notification. + * @clk_chg_work: Work item to process the clock rate change + * @rate: The latest notified rate change, in unit of Hz */ struct kbase_ipa_control_listener_data { struct kbase_clk_rate_listener listener; struct kbase_device *kbdev; + struct workqueue_struct *clk_chg_wq; + struct work_struct clk_chg_work; + atomic_t rate; }; static u32 timer_value(u32 gpu_rate) @@ -80,14 +87,14 @@ static u32 timer_value(u32 gpu_rate) static int wait_status(struct kbase_device *kbdev, u32 flags) { unsigned int max_loops = IPA_INACTIVE_MAX_LOOPS; - u32 status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); + u32 status = kbase_reg_read32(kbdev, IPA_CONTROL_ENUM(STATUS)); /* * Wait for the STATUS register to indicate that flags have been * cleared, in case a transition is pending. */ while (--max_loops && (status & flags)) - status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); + status = kbase_reg_read32(kbdev, IPA_CONTROL_ENUM(STATUS)); if (max_loops == 0) { dev_err(kbdev->dev, "IPA_CONTROL STATUS register stuck"); return -EBUSY; @@ -100,41 +107,17 @@ static int apply_select_config(struct kbase_device *kbdev, u64 *select) { int ret; - u32 select_cshw_lo = (u32)(select[KBASE_IPA_CORE_TYPE_CSHW] & U32_MAX); - u32 select_cshw_hi = - (u32)((select[KBASE_IPA_CORE_TYPE_CSHW] >> 32) & U32_MAX); - u32 select_memsys_lo = - (u32)(select[KBASE_IPA_CORE_TYPE_MEMSYS] & U32_MAX); - u32 select_memsys_hi = - (u32)((select[KBASE_IPA_CORE_TYPE_MEMSYS] >> 32) & U32_MAX); - u32 select_tiler_lo = - (u32)(select[KBASE_IPA_CORE_TYPE_TILER] & U32_MAX); - u32 select_tiler_hi = - (u32)((select[KBASE_IPA_CORE_TYPE_TILER] >> 32) & U32_MAX); - u32 select_shader_lo = - (u32)(select[KBASE_IPA_CORE_TYPE_SHADER] & U32_MAX); - u32 select_shader_hi = - (u32)((select[KBASE_IPA_CORE_TYPE_SHADER] >> 32) & U32_MAX); - - kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), select_cshw_lo); - kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), select_cshw_hi); - kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_LO), - select_memsys_lo); - kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_HI), - select_memsys_hi); - kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_LO), - select_tiler_lo); - kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_HI), - select_tiler_hi); - kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_LO), - select_shader_lo); - kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_HI), - select_shader_hi); + kbase_reg_write64(kbdev, IPA_CONTROL_ENUM(SELECT_CSHW), select[KBASE_IPA_CORE_TYPE_CSHW]); + kbase_reg_write64(kbdev, IPA_CONTROL_ENUM(SELECT_MEMSYS), + select[KBASE_IPA_CORE_TYPE_MEMSYS]); + kbase_reg_write64(kbdev, IPA_CONTROL_ENUM(SELECT_TILER), select[KBASE_IPA_CORE_TYPE_TILER]); + kbase_reg_write64(kbdev, IPA_CONTROL_ENUM(SELECT_SHADER), + select[KBASE_IPA_CORE_TYPE_SHADER]); ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); if (!ret) { - kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY); + kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(COMMAND), COMMAND_APPLY); ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); } else { dev_err(kbdev->dev, "Wait for the pending command failed"); @@ -145,48 +128,25 @@ static int apply_select_config(struct kbase_device *kbdev, u64 *select) static u64 read_value_cnt(struct kbase_device *kbdev, u8 type, int select_idx) { - u32 value_lo, value_hi; - switch (type) { case KBASE_IPA_CORE_TYPE_CSHW: - value_lo = kbase_reg_read( - kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_LO(select_idx))); - value_hi = kbase_reg_read( - kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_HI(select_idx))); - break; + return kbase_reg_read64(kbdev, IPA_VALUE_CSHW_OFFSET(select_idx)); + case KBASE_IPA_CORE_TYPE_MEMSYS: - value_lo = kbase_reg_read( - kbdev, - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(select_idx))); - value_hi = kbase_reg_read( - kbdev, - IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(select_idx))); - break; + return kbase_reg_read64(kbdev, IPA_VALUE_MEMSYS_OFFSET(select_idx)); + case KBASE_IPA_CORE_TYPE_TILER: - value_lo = kbase_reg_read( - kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_LO(select_idx))); - value_hi = kbase_reg_read( - kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_HI(select_idx))); - break; + return kbase_reg_read64(kbdev, IPA_VALUE_TILER_OFFSET(select_idx)); + case KBASE_IPA_CORE_TYPE_SHADER: - value_lo = kbase_reg_read( - kbdev, - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(select_idx))); - value_hi = kbase_reg_read( - kbdev, - IPA_CONTROL_REG(VALUE_SHADER_REG_HI(select_idx))); - break; + return kbase_reg_read64(kbdev, IPA_VALUE_SHADER_OFFSET(select_idx)); default: WARN(1, "Unknown core type: %u\n", type); - value_lo = value_hi = 0; - break; + return 0; } - - return (((u64)value_hi << 32) | value_lo); } -static void build_select_config(struct kbase_ipa_control *ipa_ctrl, - u64 *select_config) +static void build_select_config(struct kbase_ipa_control *ipa_ctrl, u64 *select_config) { size_t i; @@ -200,8 +160,7 @@ static void build_select_config(struct kbase_ipa_control *ipa_ctrl, &ipa_ctrl->blocks[i].select[j]; select_config[i] |= - ((u64)prfcnt_config->idx - << (IPA_CONTROL_SELECT_BITS_PER_CNT * j)); + ((u64)prfcnt_config->idx << (IPA_CONTROL_SELECT_BITS_PER_CNT * j)); } } } @@ -218,20 +177,17 @@ static int update_select_registers(struct kbase_device *kbdev) } static inline void calc_prfcnt_delta(struct kbase_device *kbdev, - struct kbase_ipa_control_prfcnt *prfcnt, - bool gpu_ready) + struct kbase_ipa_control_prfcnt *prfcnt, bool gpu_ready) { u64 delta_value, raw_value; if (gpu_ready) - raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, - prfcnt->select_idx); + raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, prfcnt->select_idx); else raw_value = prfcnt->latest_raw_value; if (raw_value < prfcnt->latest_raw_value) { - delta_value = (MAX_PRFCNT_VALUE - prfcnt->latest_raw_value) + - raw_value; + delta_value = (MAX_PRFCNT_VALUE - prfcnt->latest_raw_value) + raw_value; } else { delta_value = raw_value - prfcnt->latest_raw_value; } @@ -266,63 +222,65 @@ static inline void calc_prfcnt_delta(struct kbase_device *kbdev, * affect all performance counters which require GPU normalization * in every session. */ -static void -kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener, - u32 clk_index, u32 clk_rate_hz) +static void kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener, + u32 clk_index, u32 clk_rate_hz) { if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) { - size_t i; - unsigned long flags; struct kbase_ipa_control_listener_data *listener_data = - container_of(listener, - struct kbase_ipa_control_listener_data, - listener); - struct kbase_device *kbdev = listener_data->kbdev; - struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + container_of(listener, struct kbase_ipa_control_listener_data, listener); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* Save the rate and delegate the job to a work item */ + atomic_set(&listener_data->rate, clk_rate_hz); + queue_work(listener_data->clk_chg_wq, &listener_data->clk_chg_work); + } +} - if (!kbdev->pm.backend.gpu_ready) { - dev_err(kbdev->dev, - "%s: GPU frequency cannot change while GPU is off", - __func__); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - return; - } +static void kbase_ipa_ctrl_rate_change_worker(struct work_struct *data) +{ + struct kbase_ipa_control_listener_data *listener_data = + container_of(data, struct kbase_ipa_control_listener_data, clk_chg_work); + struct kbase_device *kbdev = listener_data->kbdev; + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + unsigned long flags; + u32 rate; + size_t i; - /* Interrupts are already disabled and interrupt state is also saved */ - spin_lock(&ipa_ctrl->lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { - struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i]; + if (!kbdev->pm.backend.gpu_ready) { + dev_err(kbdev->dev, "%s: GPU frequency cannot change while GPU is off", __func__); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return; + } - if (session->active) { - size_t j; + spin_lock(&ipa_ctrl->lock); + /* Picking up the latest notified rate */ + rate = (u32)atomic_read(&listener_data->rate); - for (j = 0; j < session->num_prfcnts; j++) { - struct kbase_ipa_control_prfcnt *prfcnt = - &session->prfcnts[j]; + for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { + struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i]; - if (prfcnt->gpu_norm) - calc_prfcnt_delta(kbdev, prfcnt, true); - } + if (session->active) { + size_t j; + + for (j = 0; j < session->num_prfcnts; j++) { + struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[j]; + + if (prfcnt->gpu_norm) + calc_prfcnt_delta(kbdev, prfcnt, true); } } - - ipa_ctrl->cur_gpu_rate = clk_rate_hz; - - /* Update the timer for automatic sampling if active sessions - * are present. Counters have already been manually sampled. - */ - if (ipa_ctrl->num_active_sessions > 0) { - kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), - timer_value(ipa_ctrl->cur_gpu_rate)); - } - - spin_unlock(&ipa_ctrl->lock); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } + + ipa_ctrl->cur_gpu_rate = rate; + /* Update the timer for automatic sampling if active sessions + * are present. Counters have already been manually sampled. + */ + if (ipa_ctrl->num_active_sessions > 0) + kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(TIMER), timer_value(rate)); + + spin_unlock(&ipa_ctrl->lock); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_ipa_control_init(struct kbase_device *kbdev) @@ -330,39 +288,44 @@ void kbase_ipa_control_init(struct kbase_device *kbdev) struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; struct kbase_ipa_control_listener_data *listener_data; - size_t i, j; + size_t i; + unsigned long flags; for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) { - for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { - ipa_ctrl->blocks[i].select[j].idx = 0; - ipa_ctrl->blocks[i].select[j].refcount = 0; - } - ipa_ctrl->blocks[i].num_available_counters = - KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; + ipa_ctrl->blocks[i].num_available_counters = KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; } spin_lock_init(&ipa_ctrl->lock); - ipa_ctrl->num_active_sessions = 0; - for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) - ipa_ctrl->sessions[i].active = false; - listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data), - GFP_KERNEL); + listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data), GFP_KERNEL); if (listener_data) { - listener_data->listener.notify = - kbase_ipa_control_rate_change_notify; - listener_data->kbdev = kbdev; - ipa_ctrl->rtm_listener_data = listener_data; - } + listener_data->clk_chg_wq = + alloc_workqueue("ipa_ctrl_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (listener_data->clk_chg_wq) { + INIT_WORK(&listener_data->clk_chg_work, kbase_ipa_ctrl_rate_change_worker); + listener_data->listener.notify = kbase_ipa_control_rate_change_notify; + listener_data->kbdev = kbdev; + ipa_ctrl->rtm_listener_data = listener_data; + /* Initialise to 0, which is out of normal notified rates */ + atomic_set(&listener_data->rate, 0); + } else { + dev_warn(kbdev->dev, + "%s: failed to allocate workqueue, clock rate update disabled", + __func__); + kfree(listener_data); + listener_data = NULL; + } + } else + dev_warn(kbdev->dev, + "%s: failed to allocate memory, IPA control clock rate update disabled", + __func__); - spin_lock(&clk_rtm->lock); + spin_lock_irqsave(&clk_rtm->lock, flags); if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]) - ipa_ctrl->cur_gpu_rate = - clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]->clock_val; + ipa_ctrl->cur_gpu_rate = clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]->clock_val; if (listener_data) - kbase_clk_rate_trace_manager_subscribe_no_lock( - clk_rtm, &listener_data->listener); - spin_unlock(&clk_rtm->lock); + kbase_clk_rate_trace_manager_subscribe_no_lock(clk_rtm, &listener_data->listener); + spin_unlock_irqrestore(&clk_rtm->lock, flags); } KBASE_EXPORT_TEST_API(kbase_ipa_control_init); @@ -371,18 +334,19 @@ void kbase_ipa_control_term(struct kbase_device *kbdev) unsigned long flags; struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; - struct kbase_ipa_control_listener_data *listener_data = - ipa_ctrl->rtm_listener_data; + struct kbase_ipa_control_listener_data *listener_data = ipa_ctrl->rtm_listener_data; WARN_ON(ipa_ctrl->num_active_sessions); - if (listener_data) + if (listener_data) { kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener); + destroy_workqueue(listener_data->clk_chg_wq); + } kfree(ipa_ctrl->rtm_listener_data); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbdev->pm.backend.gpu_powered) - kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0); + kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(TIMER), 0); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } KBASE_EXPORT_TEST_API(kbase_ipa_control_term); @@ -403,8 +367,7 @@ static void session_read_raw_values(struct kbase_device *kbdev, for (i = 0; i < session->num_prfcnts; i++) { struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i]; - u64 raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, - prfcnt->select_idx); + u64 raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, prfcnt->select_idx); prfcnt->latest_raw_value = raw_value; } @@ -429,12 +392,10 @@ static void session_read_raw_values(struct kbase_device *kbdev, * * Return: 0 on success, or error code on failure. */ -static int session_gpu_start(struct kbase_device *kbdev, - struct kbase_ipa_control *ipa_ctrl, +static int session_gpu_start(struct kbase_device *kbdev, struct kbase_ipa_control *ipa_ctrl, struct kbase_ipa_control_session *session) { - bool first_start = - (session != NULL) && (ipa_ctrl->num_active_sessions == 0); + bool first_start = (session != NULL) && (ipa_ctrl->num_active_sessions == 0); int ret = 0; lockdep_assert_held(&kbdev->csf.ipa_control.lock); @@ -455,14 +416,12 @@ static int session_gpu_start(struct kbase_device *kbdev, * sampling. */ if (!session || first_start) { - kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), - COMMAND_SAMPLE); + kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(COMMAND), COMMAND_SAMPLE); ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); if (ret) - dev_err(kbdev->dev, "%s: failed to sample new counters", - __func__); - kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), - timer_value(ipa_ctrl->cur_gpu_rate)); + dev_err(kbdev->dev, "%s: failed to sample new counters", __func__); + kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(TIMER), + timer_value(ipa_ctrl->cur_gpu_rate)); } /* @@ -482,10 +441,10 @@ static int session_gpu_start(struct kbase_device *kbdev, } else { size_t session_idx; - for (session_idx = 0; - session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; + for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; session_idx++) { - struct kbase_ipa_control_session *session_to_check = &ipa_ctrl->sessions[session_idx]; + struct kbase_ipa_control_session *session_to_check = + &ipa_ctrl->sessions[session_idx]; if (session_to_check->active) session_read_raw_values(kbdev, session_to_check); @@ -496,10 +455,9 @@ static int session_gpu_start(struct kbase_device *kbdev, return ret; } -int kbase_ipa_control_register( - struct kbase_device *kbdev, - const struct kbase_ipa_control_perf_counter *perf_counters, - size_t num_counters, void **client) +int kbase_ipa_control_register(struct kbase_device *kbdev, + const struct kbase_ipa_control_perf_counter *perf_counters, + size_t num_counters, void **client) { int ret = 0; size_t i, session_idx, req_counters[KBASE_IPA_CORE_TYPE_NUM]; @@ -542,10 +500,8 @@ int kbase_ipa_control_register( enum kbase_ipa_core_type type = perf_counters[i].type; u8 idx = perf_counters[i].idx; - if ((type >= KBASE_IPA_CORE_TYPE_NUM) || - (idx >= KBASE_IPA_CONTROL_CNT_MAX_IDX)) { - dev_err(kbdev->dev, - "%s: invalid requested type %u and/or index %u", + if ((type >= KBASE_IPA_CORE_TYPE_NUM) || (idx >= KBASE_IPA_CONTROL_CNT_MAX_IDX)) { + dev_err(kbdev->dev, "%s: invalid requested type %u and/or index %u", __func__, type, idx); ret = -EINVAL; goto exit; @@ -571,8 +527,7 @@ int kbase_ipa_control_register( } for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) - if (req_counters[i] > - ipa_ctrl->blocks[i].num_available_counters) { + if (req_counters[i] > ipa_ctrl->blocks[i].num_available_counters) { dev_err(kbdev->dev, "%s: more counters (%zu) than available (%zu) have been requested for type %zu", __func__, req_counters[i], @@ -587,8 +542,7 @@ int kbase_ipa_control_register( * of the session and update the configuration of performance counters * in the internal state of kbase_ipa_control. */ - for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; - session_idx++) { + for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; session_idx++) { if (!ipa_ctrl->sessions[session_idx].active) { session = &ipa_ctrl->sessions[session_idx]; break; @@ -596,8 +550,7 @@ int kbase_ipa_control_register( } if (!session) { - dev_err(kbdev->dev, "%s: wrong or corrupt session state", - __func__); + dev_err(kbdev->dev, "%s: wrong or corrupt session state", __func__); ret = -EBUSY; goto exit; } @@ -612,8 +565,7 @@ int kbase_ipa_control_register( prfcnt_config = &ipa_ctrl->blocks[type].select[j]; if (already_configured[i]) { - if ((prfcnt_config->refcount > 0) && - (prfcnt_config->idx == idx)) { + if ((prfcnt_config->refcount > 0) && (prfcnt_config->idx == idx)) { break; } } else { @@ -622,8 +574,7 @@ int kbase_ipa_control_register( } } - if (WARN_ON((prfcnt_config->refcount > 0 && - prfcnt_config->idx != idx) || + if (WARN_ON((prfcnt_config->refcount > 0 && prfcnt_config->idx != idx) || (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))) { dev_err(kbdev->dev, "%s: invalid internal state: counter already configured or no counter available to configure", @@ -640,8 +591,7 @@ int kbase_ipa_control_register( session->prfcnts[i].accumulated_diff = 0; session->prfcnts[i].type = type; session->prfcnts[i].select_idx = j; - session->prfcnts[i].scaling_factor = - perf_counters[i].scaling_factor; + session->prfcnts[i].scaling_factor = perf_counters[i].scaling_factor; session->prfcnts[i].gpu_norm = perf_counters[i].gpu_norm; /* Reports to this client for GPU time spent in protected mode @@ -663,8 +613,7 @@ int kbase_ipa_control_register( if (new_config) { ret = update_select_registers(kbdev); if (ret) - dev_err(kbdev->dev, - "%s: failed to apply new SELECT configuration", + dev_err(kbdev->dev, "%s: failed to apply new SELECT configuration", __func__); } @@ -730,8 +679,7 @@ int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client) } if (!session->active) { - dev_err(kbdev->dev, "%s: session is already inactive", - __func__); + dev_err(kbdev->dev, "%s: session is already inactive", __func__); ret = -EINVAL; goto exit; } @@ -755,9 +703,7 @@ int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client) if (new_config) { ret = update_select_registers(kbdev); if (ret) - dev_err(kbdev->dev, - "%s: failed to apply SELECT configuration", - __func__); + dev_err(kbdev->dev, "%s: failed to apply SELECT configuration", __func__); } session->num_prfcnts = 0; @@ -771,8 +717,8 @@ exit: } KBASE_EXPORT_TEST_API(kbase_ipa_control_unregister); -int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, - u64 *values, size_t num_values, u64 *protected_time) +int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, u64 *values, + size_t num_values, u64 *protected_time) { struct kbase_ipa_control *ipa_ctrl; struct kbase_ipa_control_session *session; @@ -792,14 +738,12 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, session = (struct kbase_ipa_control_session *)client; if (!session->active) { - dev_err(kbdev->dev, - "%s: attempt to query inactive session", __func__); + dev_err(kbdev->dev, "%s: attempt to query inactive session", __func__); return -EINVAL; } if (WARN_ON(num_values < session->num_prfcnts)) { - dev_err(kbdev->dev, - "%s: not enough space (%zu) to return all counter values (%zu)", + dev_err(kbdev->dev, "%s: not enough space (%zu) to return all counter values (%zu)", __func__, num_values, session->num_prfcnts); return -EINVAL; } @@ -826,8 +770,7 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, if (kbdev->protected_mode) { *protected_time += - time_now - MAX(session->last_query_time, - ipa_ctrl->protm_start); + time_now - MAX(session->last_query_time, ipa_ctrl->protm_start); } session->last_query_time = time_now; session->protm_time = 0; @@ -857,35 +800,27 @@ void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev) spin_lock(&ipa_ctrl->lock); /* First disable the automatic sampling through TIMER */ - kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0); + kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(TIMER), 0); ret = wait_status(kbdev, STATUS_TIMER_ENABLED); if (ret) { - dev_err(kbdev->dev, - "Wait for disabling of IPA control timer failed: %d", - ret); + dev_err(kbdev->dev, "Wait for disabling of IPA control timer failed: %d", ret); } /* Now issue the manual SAMPLE command */ - kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_SAMPLE); + kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(COMMAND), COMMAND_SAMPLE); ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); if (ret) { - dev_err(kbdev->dev, - "Wait for the completion of manual sample failed: %d", - ret); + dev_err(kbdev->dev, "Wait for the completion of manual sample failed: %d", ret); } - for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; - session_idx++) { - - struct kbase_ipa_control_session *session = - &ipa_ctrl->sessions[session_idx]; + for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; session_idx++) { + struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[session_idx]; if (session->active) { size_t i; for (i = 0; i < session->num_prfcnts; i++) { - struct kbase_ipa_control_prfcnt *prfcnt = - &session->prfcnts[i]; + struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i]; calc_prfcnt_delta(kbdev, prfcnt, true); } @@ -909,8 +844,7 @@ void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev) ret = update_select_registers(kbdev); if (ret) { - dev_err(kbdev->dev, - "Failed to reconfigure the select registers: %d", ret); + dev_err(kbdev->dev, "Failed to reconfigure the select registers: %d", ret); } /* Accumulator registers would not contain any sample after GPU power @@ -943,15 +877,13 @@ void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev) spin_lock(&ipa_ctrl->lock); /* Check the status reset bit is set before acknowledging it */ - status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); + status = kbase_reg_read32(kbdev, IPA_CONTROL_ENUM(STATUS)); if (status & STATUS_RESET) { /* Acknowledge the reset command */ - kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_RESET_ACK); + kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(COMMAND), COMMAND_RESET_ACK); ret = wait_status(kbdev, STATUS_RESET); if (ret) { - dev_err(kbdev->dev, - "Wait for the reset ack command failed: %d", - ret); + dev_err(kbdev->dev, "Wait for the reset ack command failed: %d", ret); } } @@ -973,8 +905,7 @@ void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev) /* SELECT_CSHW register needs to be cleared to prevent any * IPA control message to be sent to the top level GPU HWCNT. */ - kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), 0); - kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), 0); + kbase_reg_write64(kbdev, IPA_CONTROL_ENUM(SELECT_CSHW), 0); /* No need to issue the APPLY command here */ } @@ -999,15 +930,15 @@ KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_exit); #endif #if MALI_UNIT_TEST -void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, - u32 clk_index, u32 clk_rate_hz) +void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, u32 clk_index, + u32 clk_rate_hz) { struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; - struct kbase_ipa_control_listener_data *listener_data = - ipa_ctrl->rtm_listener_data; + struct kbase_ipa_control_listener_data *listener_data = ipa_ctrl->rtm_listener_data; - kbase_ipa_control_rate_change_notify(&listener_data->listener, - clk_index, clk_rate_hz); + kbase_ipa_control_rate_change_notify(&listener_data->listener, clk_index, clk_rate_hz); + /* Ensure the callback has taken effect before returning back to the test caller */ + flush_work(&listener_data->clk_chg_work); } KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test); #endif @@ -1030,13 +961,11 @@ void kbase_ipa_control_protm_exited(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { - - struct kbase_ipa_control_session *session = - &ipa_ctrl->sessions[i]; + struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i]; if (session->active) { - u64 protm_time = time_now - MAX(session->last_query_time, - ipa_ctrl->protm_start); + u64 protm_time = + time_now - MAX(session->last_query_time, ipa_ctrl->protm_start); session->protm_time += protm_time; } @@ -1045,19 +974,15 @@ void kbase_ipa_control_protm_exited(struct kbase_device *kbdev) /* Acknowledge the protected_mode bit in the IPA_CONTROL STATUS * register */ - status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); + status = kbase_reg_read32(kbdev, IPA_CONTROL_ENUM(STATUS)); if (status & STATUS_PROTECTED_MODE) { int ret; /* Acknowledge the protm command */ - kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), - COMMAND_PROTECTED_ACK); + kbase_reg_write32(kbdev, IPA_CONTROL_ENUM(COMMAND), COMMAND_PROTECTED_ACK); ret = wait_status(kbdev, STATUS_PROTECTED_MODE); if (ret) { - dev_err(kbdev->dev, - "Wait for the protm ack command failed: %d", - ret); + dev_err(kbdev->dev, "Wait for the protm ack command failed: %d", ret); } } } - diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h index 69ff8973bac4..80aabc1022c5 100644 --- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h +++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -68,6 +68,8 @@ struct kbase_ipa_control_perf_counter { * kbase_ipa_control_init - Initialize the IPA Control component * * @kbdev: Pointer to Kbase device. + * + * This function must be called only when a kbase device is initialized. */ void kbase_ipa_control_init(struct kbase_device *kbdev); @@ -103,10 +105,9 @@ void kbase_ipa_control_term(struct kbase_device *kbdev); * * Return: 0 on success, negative -errno on error */ -int kbase_ipa_control_register( - struct kbase_device *kbdev, - const struct kbase_ipa_control_perf_counter *perf_counters, - size_t num_counters, void **client); +int kbase_ipa_control_register(struct kbase_device *kbdev, + const struct kbase_ipa_control_perf_counter *perf_counters, + size_t num_counters, void **client); /** * kbase_ipa_control_unregister - Unregister a client from IPA Control @@ -117,8 +118,7 @@ int kbase_ipa_control_register( * * Return: 0 on success, negative -errno on error */ -int kbase_ipa_control_unregister(struct kbase_device *kbdev, - const void *client); +int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client); /** * kbase_ipa_control_query - Query performance counters @@ -152,9 +152,8 @@ int kbase_ipa_control_unregister(struct kbase_device *kbdev, * * Return: 0 on success, negative -errno on error */ -int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, - u64 *values, size_t num_values, - u64 *protected_time); +int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, u64 *values, + size_t num_values, u64 *protected_time); /** * kbase_ipa_control_handle_gpu_power_on - Handle the GPU power on event @@ -236,8 +235,8 @@ void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev); * * Notify the IPA Control component about a GPU rate change. */ -void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, - u32 clk_index, u32 clk_rate_hz); +void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, u32 clk_index, + u32 clk_rate_hz); #endif /* MALI_UNIT_TEST */ /** diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c index 7a939fc3382d..2cd4b201173d 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "mali_kbase_csf_tiler_heap.h" #include @@ -36,6 +37,7 @@ #include "mali_kbase_csf_event.h" #include #include "mali_kbase_csf_mcu_shared_reg.h" +#include #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) @@ -46,16 +48,12 @@ #define PROTM_ALLOC_MAX_RETRIES ((u8)5) const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { - KBASE_QUEUE_GROUP_PRIORITY_HIGH, - KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, - KBASE_QUEUE_GROUP_PRIORITY_LOW, - KBASE_QUEUE_GROUP_PRIORITY_REALTIME + KBASE_QUEUE_GROUP_PRIORITY_HIGH, KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, + KBASE_QUEUE_GROUP_PRIORITY_LOW, KBASE_QUEUE_GROUP_PRIORITY_REALTIME }; const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT] = { - BASE_QUEUE_GROUP_PRIORITY_REALTIME, - BASE_QUEUE_GROUP_PRIORITY_HIGH, - BASE_QUEUE_GROUP_PRIORITY_MEDIUM, - BASE_QUEUE_GROUP_PRIORITY_LOW + BASE_QUEUE_GROUP_PRIORITY_REALTIME, BASE_QUEUE_GROUP_PRIORITY_HIGH, + BASE_QUEUE_GROUP_PRIORITY_MEDIUM, BASE_QUEUE_GROUP_PRIORITY_LOW }; /* @@ -93,22 +91,20 @@ static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx) /** * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page. - * * @kctx: Pointer to the kbase context * + * This function must be called only when a kbase context is instantiated. + * * @return: 0 on success. */ static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx) { INIT_LIST_HEAD(&kctx->csf.user_reg.link); - kctx->csf.user_reg.vma = NULL; - kctx->csf.user_reg.file_offset = 0; return 0; } -static void put_user_pages_mmap_handle(struct kbase_context *kctx, - struct kbase_queue *queue) +static void put_user_pages_mmap_handle(struct kbase_context *kctx, struct kbase_queue *queue) { unsigned long cookie_nr; @@ -117,8 +113,7 @@ static void put_user_pages_mmap_handle(struct kbase_context *kctx, if (queue->handle == BASEP_MEM_INVALID_HANDLE) return; - cookie_nr = - PFN_DOWN(queue->handle - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); + cookie_nr = PFN_DOWN(queue->handle - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); if (!WARN_ON(kctx->csf.user_pages_info[cookie_nr] != queue)) { /* free up cookie */ @@ -133,26 +128,21 @@ static void put_user_pages_mmap_handle(struct kbase_context *kctx, * the CPU mapping of the pair of input/output pages and Hw doorbell page. * Will return 0 in case of success otherwise negative on failure. */ -static int get_user_pages_mmap_handle(struct kbase_context *kctx, - struct kbase_queue *queue) +static int get_user_pages_mmap_handle(struct kbase_context *kctx, struct kbase_queue *queue) { unsigned long cookie, cookie_nr; lockdep_assert_held(&kctx->csf.lock); - if (bitmap_empty(kctx->csf.cookies, - KBASE_CSF_NUM_USER_IO_PAGES_HANDLE)) { - dev_err(kctx->kbdev->dev, - "No csf cookies available for allocation!"); + if (bitmap_empty(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE)) { + dev_err(kctx->kbdev->dev, "No csf cookies available for allocation!"); return -ENOMEM; } /* allocate a cookie */ - cookie_nr = find_first_bit(kctx->csf.cookies, - KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); + cookie_nr = find_first_bit(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); if (kctx->csf.user_pages_info[cookie_nr]) { - dev_err(kctx->kbdev->dev, - "Inconsistent state of csf cookies!"); + dev_err(kctx->kbdev->dev, "Inconsistent state of csf cookies!"); return -EINVAL; } kctx->csf.user_pages_info[cookie_nr] = queue; @@ -169,23 +159,22 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx, static void init_user_io_pages(struct kbase_queue *queue) { - u32 *input_addr = (u32 *)(queue->user_io_addr); - u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + u64 *input_addr = queue->user_io_addr; + u64 *output_addr64 = queue->user_io_addr + PAGE_SIZE / sizeof(u64); + u32 *output_addr32 = (u32 *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64)); - input_addr[CS_INSERT_LO/4] = 0; - input_addr[CS_INSERT_HI/4] = 0; - - input_addr[CS_EXTRACT_INIT_LO/4] = 0; - input_addr[CS_EXTRACT_INIT_HI/4] = 0; - - output_addr[CS_EXTRACT_LO/4] = 0; - output_addr[CS_EXTRACT_HI/4] = 0; - - output_addr[CS_ACTIVE/4] = 0; + /* + * CS_INSERT and CS_EXTRACT registers contain 64-bit memory addresses which + * should be accessed atomically. Here we update them 32-bits at a time, but + * as this is initialisation code, non-atomic accesses are safe. + */ + input_addr[CS_INSERT_LO / sizeof(*input_addr)] = 0; + input_addr[CS_EXTRACT_INIT_LO / sizeof(*input_addr)] = 0; + output_addr64[CS_EXTRACT_LO / sizeof(*output_addr64)] = 0; + output_addr32[CS_ACTIVE / sizeof(*output_addr32)] = 0; } -static void kernel_unmap_user_io_pages(struct kbase_context *kctx, - struct kbase_queue *queue) +static void kernel_unmap_user_io_pages(struct kbase_context *kctx, struct kbase_queue *queue) { kbase_gpu_vm_lock(kctx); @@ -197,20 +186,19 @@ static void kernel_unmap_user_io_pages(struct kbase_context *kctx, kbase_gpu_vm_unlock(kctx); } -static int kernel_map_user_io_pages(struct kbase_context *kctx, - struct kbase_queue *queue) +static int kernel_map_user_io_pages(struct kbase_context *kctx, struct kbase_queue *queue) { struct page *page_list[2]; pgprot_t cpu_map_prot; unsigned long flags; - char *user_io_addr; + uint64_t *user_io_addr; int ret = 0; size_t i; kbase_gpu_vm_lock(kctx); if (ARRAY_SIZE(page_list) > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - - atomic_read(&kctx->permanent_mapped_pages))) { + atomic_read(&kctx->permanent_mapped_pages))) { ret = -ENOMEM; goto unlock; } @@ -249,7 +237,7 @@ unlock: static void term_queue_group(struct kbase_queue_group *group); static void get_queue(struct kbase_queue *queue); -static void release_queue(struct kbase_queue *queue); +static bool release_queue(struct kbase_queue *queue); /** * kbase_csf_free_command_stream_user_pages() - Free the resources allocated @@ -277,9 +265,8 @@ void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct { kernel_unmap_user_io_pages(kctx, queue); - kbase_mem_pool_free_pages( - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], - KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false); + kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], + KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false); kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES); /* The user_io_gpu_va should have been unmapped inside the scheduler */ @@ -300,8 +287,8 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct lockdep_assert_held(&kctx->csf.lock); ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], - KBASEP_NUM_CS_USER_IO_PAGES, - queue->phys, false, kctx->task); + KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, + kctx->task); if (ret != KBASEP_NUM_CS_USER_IO_PAGES) { /* Marking both the phys to zero for indicating there is no phys allocated */ queue->phys[0].tagged_addr = 0; @@ -350,8 +337,7 @@ kernel_map_failed: } KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages); -static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, - u8 group_handle) +static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, u8 group_handle) { uint index = group_handle; @@ -372,8 +358,7 @@ struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, } KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group); -int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, - u8 group_handle) +int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle) { struct kbase_queue_group *group; @@ -403,7 +388,16 @@ static void get_queue(struct kbase_queue *queue) WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount)); } -static void release_queue(struct kbase_queue *queue) +/** + * release_queue() - Release a reference to a GPU queue + * + * @queue: The queue to release. + * + * Return: true if the queue has been released. + * + * The queue will be released when its reference count reaches zero. + */ +static bool release_queue(struct kbase_queue *queue) { lockdep_assert_held(&queue->kctx->csf.lock); if (kbase_refcount_dec_and_test(&queue->refcount)) { @@ -411,9 +405,8 @@ static void release_queue(struct kbase_queue *queue) WARN_ON(!list_empty(&queue->link)); WARN_ON(queue->group); dev_dbg(queue->kctx->kbdev->dev, - "Remove any pending command queue fatal from ctx %d_%d", - queue->kctx->tgid, queue->kctx->id); - kbase_csf_event_remove_error(queue->kctx, &queue->error); + "Remove any pending command queue fatal from ctx %d_%d", queue->kctx->tgid, + queue->kctx->id); /* After this the Userspace would be able to free the * memory for GPU queue. In case the Userspace missed @@ -426,7 +419,11 @@ static void release_queue(struct kbase_queue *queue) kbase_gpu_vm_unlock(queue->kctx); kfree(queue); + + return true; } + + return false; } static void oom_event_worker(struct work_struct *data); @@ -434,8 +431,8 @@ static void cs_error_worker(struct work_struct *data); /* Between reg and reg_ex, one and only one must be null */ static int csf_queue_register_internal(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_register *reg, - struct kbase_ioctl_cs_queue_register_ex *reg_ex) + struct kbase_ioctl_cs_queue_register *reg, + struct kbase_ioctl_cs_queue_register_ex *reg_ex) { struct kbase_queue *queue; int ret = 0; @@ -445,8 +442,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx, /* Only one pointer expected, otherwise coding error */ if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) { - dev_dbg(kctx->kbdev->dev, - "Error, one and only one param-ptr expected!"); + dev_dbg(kctx->kbdev->dev, "Error, one and only one param-ptr expected!"); return -EINVAL; } @@ -475,8 +471,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx, /* Check if the queue address is valid */ kbase_gpu_vm_lock(kctx); - region = kbase_region_tracker_find_region_enclosing_address(kctx, - queue_addr); + region = kbase_region_tracker_find_region_enclosing_address(kctx, queue_addr); if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) || region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { @@ -484,8 +479,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx, goto out_unlock_vm; } - if (queue_size > (region->nr_pages - - ((queue_addr >> PAGE_SHIFT) - region->start_pfn))) { + if (queue_size > (region->nr_pages - ((queue_addr >> PAGE_SHIFT) - region->start_pfn))) { ret = -EINVAL; goto out_unlock_vm; } @@ -494,8 +488,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx, * if not enabled (i.e. when size is 0). */ if (reg_ex && reg_ex->ex_buffer_size) { - int buf_pages = (reg_ex->ex_buffer_size + - (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT; + size_t buf_pages = (reg_ex->ex_buffer_size + (1UL << PAGE_SHIFT) - 1) >> PAGE_SHIFT; struct kbase_va_region *region_ex = kbase_region_tracker_find_region_enclosing_address(kctx, reg_ex->ex_buffer_base); @@ -534,37 +527,25 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->size = (queue_size << PAGE_SHIFT); queue->csi_index = KBASEP_IF_NR_INVALID; - queue->enabled = false; queue->priority = reg->priority; + /* Default to a safe value, this would be updated on binding */ + queue->group_priority = KBASE_QUEUE_GROUP_PRIORITY_LOW; kbase_refcount_set(&queue->refcount, 1); - queue->group = NULL; queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; queue->handle = BASEP_MEM_INVALID_HANDLE; queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; - queue->status_wait = 0; - queue->sync_ptr = 0; - queue->sync_value = 0; - -#if IS_ENABLED(CONFIG_DEBUG_FS) - queue->saved_cmd_ptr = 0; -#endif - - queue->sb_status = 0; queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; - atomic_set(&queue->pending, 0); - INIT_LIST_HEAD(&queue->link); - INIT_LIST_HEAD(&queue->error.link); + atomic_set(&queue->pending_kick, 0); + INIT_LIST_HEAD(&queue->pending_kick_link); INIT_WORK(&queue->oom_event_work, oom_event_worker); INIT_WORK(&queue->cs_error_work, cs_error_worker); list_add(&queue->link, &kctx->csf.queue_list); - queue->extract_ofs = 0; - region->user_data = queue; /* Initialize the cs_trace configuration parameters, When buffer_size @@ -572,10 +553,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx, * enabled, otherwise leave them as default zeros. */ if (reg_ex && reg_ex->ex_buffer_size) { - u32 cfg = CS_INSTR_CONFIG_EVENT_SIZE_SET( - 0, reg_ex->ex_event_size); - cfg = CS_INSTR_CONFIG_EVENT_STATE_SET( - cfg, reg_ex->ex_event_state); + u32 cfg = CS_INSTR_CONFIG_EVENT_SIZE_SET(0, reg_ex->ex_event_size); + cfg = CS_INSTR_CONFIG_EVENT_STATE_SET(cfg, reg_ex->ex_event_state); queue->trace_cfg = cfg; queue->trace_buffer_size = reg_ex->ex_buffer_size; @@ -591,8 +570,7 @@ out: return ret; } -int kbase_csf_queue_register(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_register *reg) +int kbase_csf_queue_register(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_register *reg) { /* Validate the ring buffer configuration parameters */ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE || @@ -607,13 +585,12 @@ int kbase_csf_queue_register(struct kbase_context *kctx, int kbase_csf_queue_register_ex(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_register_ex *reg) { - struct kbase_csf_global_iface const *const iface = - &kctx->kbdev->csf.global_iface; + struct kbase_csf_global_iface const *const iface = &kctx->kbdev->csf.global_iface; u32 const glb_version = iface->version; u32 instr = iface->instr_features; u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr); - u32 min_buf_size = (1u << reg->ex_event_size) * - GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr); + u32 min_buf_size = + (1u << reg->ex_event_size) * GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr); /* If cs_trace_command not supported, the call fails */ if (glb_version < kbase_csf_interface_version(1, 1, 0)) @@ -628,19 +605,33 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx, /* Validate the cs_trace configuration parameters */ if (reg->ex_buffer_size && - ((reg->ex_event_size > max_size) || - (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || - (reg->ex_buffer_size < min_buf_size))) + ((reg->ex_event_size > max_size) || (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || + (reg->ex_buffer_size < min_buf_size))) return -EINVAL; return csf_queue_register_internal(kctx, NULL, reg); } -static void unbind_queue(struct kbase_context *kctx, - struct kbase_queue *queue); +static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue); + +static void wait_pending_queue_kick(struct kbase_queue *queue) +{ + struct kbase_context *const kctx = queue->kctx; + + /* Drain a pending queue kick if any. It should no longer be + * possible to issue further queue kicks at this point: either the + * queue has been unbound, or the context is being terminated. + * + * Signal kbase_csf_scheduler_kthread() to allow for the + * eventual completion of the current iteration. Once it's done the + * event_wait wait queue shall be signalled. + */ + complete(&kctx->kbdev->csf.scheduler.kthread_signal); + wait_event(kctx->kbdev->csf.event_wait, atomic_read(&queue->pending_kick) == 0); +} void kbase_csf_queue_terminate(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_terminate *term) + struct kbase_ioctl_cs_queue_terminate *term) { struct kbase_device *kbdev = kctx->kbdev; struct kbase_queue *queue; @@ -676,6 +667,18 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, queue->queue_reg->user_data = NULL; kbase_gpu_vm_unlock(kctx); + mutex_unlock(&kctx->csf.lock); + /* The GPU reset can be allowed now as the queue has been unbound. */ + if (reset_prevented) { + kbase_reset_gpu_allow(kbdev); + reset_prevented = false; + } + wait_pending_queue_kick(queue); + /* The work items can be cancelled as Userspace is terminating the queue */ + cancel_work_sync(&queue->oom_event_work); + cancel_work_sync(&queue->cs_error_work); + mutex_lock(&kctx->csf.lock); + release_queue(queue); } @@ -720,6 +723,7 @@ int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_ bind->out.mmap_handle = queue->handle; group->bound_queues[bind->in.csi_index] = queue; queue->group = group; + queue->group_priority = group->priority; queue->csi_index = bind->in.csi_index; queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS; @@ -729,12 +733,20 @@ out: return ret; } -static struct kbase_queue_group *get_bound_queue_group( - struct kbase_queue *queue) +/** + * get_bound_queue_group - Get the group to which a queue was bound + * + * @queue: Pointer to the queue for this group + * + * Return: The group to which this queue was bound, or NULL on error. + */ +static struct kbase_queue_group *get_bound_queue_group(struct kbase_queue *queue) { struct kbase_context *kctx = queue->kctx; struct kbase_queue_group *group; + lockdep_assert_held(&kctx->csf.lock); + if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND) return NULL; @@ -756,63 +768,6 @@ static struct kbase_queue_group *get_bound_queue_group( return group; } -static void enqueue_gpu_submission_work(struct kbase_context *const kctx) -{ - queue_work(system_highpri_wq, &kctx->csf.pending_submission_work); -} - -/** - * pending_submission_worker() - Work item to process pending kicked GPU command queues. - * - * @work: Pointer to pending_submission_work. - * - * This function starts all pending queues, for which the work - * was previously submitted via ioctl call from application thread. - * If the queue is already scheduled and resident, it will be started - * right away, otherwise once the group is made resident. - */ -static void pending_submission_worker(struct work_struct *work) -{ - struct kbase_context *kctx = - container_of(work, struct kbase_context, csf.pending_submission_work); - struct kbase_device *kbdev = kctx->kbdev; - struct kbase_queue *queue; - int err = kbase_reset_gpu_prevent_and_wait(kbdev); - - if (err) { - dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue "); - return; - } - - mutex_lock(&kctx->csf.lock); - - /* Iterate through the queue list and schedule the pending ones for submission. */ - list_for_each_entry(queue, &kctx->csf.queue_list, link) { - if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) { - struct kbase_queue_group *group = get_bound_queue_group(queue); - int ret; - - if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) { - dev_dbg(kbdev->dev, "queue is not bound to a group"); - continue; - } - - ret = kbase_csf_scheduler_queue_start(queue); - if (unlikely(ret)) { - dev_dbg(kbdev->dev, "Failed to start queue"); - if (ret == -EBUSY) { - atomic_cmpxchg(&queue->pending, 0, 1); - enqueue_gpu_submission_work(kctx); - } - } - } - } - - mutex_unlock(&kctx->csf.lock); - - kbase_reset_gpu_allow(kbdev); -} - void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot) { if (WARN_ON(slot < 0)) @@ -820,16 +775,13 @@ void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot) kbase_csf_scheduler_spin_lock_assert_held(kbdev); - kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot)); + kbase_csf_ring_csg_slots_doorbell(kbdev, (u32)(1 << slot)); } -void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, - u32 slot_bitmap) +void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, u32 slot_bitmap) { - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; - const u32 allowed_bitmap = - (u32) ((1U << kbdev->csf.global_iface.group_num) - 1); + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; + const u32 allowed_bitmap = (u32)((1U << kbdev->csf.global_iface.group_num) - 1); u32 value; kbase_csf_scheduler_spin_lock_assert_held(kbdev); @@ -848,14 +800,12 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK); value ^= slot_bitmap; - kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value, - slot_bitmap); + kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value, slot_bitmap); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } -void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, - struct kbase_queue *queue) +void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, struct kbase_queue *queue) { mutex_lock(&kbdev->csf.reg_lock); @@ -865,8 +815,7 @@ void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, mutex_unlock(&kbdev->csf.reg_lock); } -void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, - int csi_index, int csg_nr, +void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, int csi_index, int csg_nr, bool ring_csg_doorbell) { struct kbase_csf_cmd_stream_group_info *ginfo; @@ -874,14 +823,12 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, kbase_csf_scheduler_spin_lock_assert_held(kbdev); - if (WARN_ON(csg_nr < 0) || - WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) + if (WARN_ON(csg_nr < 0) || WARN_ON((u32)csg_nr >= kbdev->csf.global_iface.group_num)) return; ginfo = &kbdev->csf.global_iface.groups[csg_nr]; - if (WARN_ON(csi_index < 0) || - WARN_ON(csi_index >= ginfo->stream_num)) + if (WARN_ON(csi_index < 0) || WARN_ON((u32)csi_index >= ginfo->stream_num)) return; /* The access to CSG_DB_REQ/ACK needs to be ordered with respect to @@ -894,18 +841,15 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK); value ^= (1 << csi_index); - kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value, - 1 << csi_index); + kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value, 1 << csi_index); if (likely(ring_csg_doorbell)) kbase_csf_ring_csg_doorbell(kbdev, csg_nr); } -int kbase_csf_queue_kick(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_kick *kick) +int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick) { struct kbase_device *kbdev = kctx->kbdev; - bool trigger_submission = false; struct kbase_va_region *region; int err = 0; @@ -923,9 +867,19 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, if (!kbase_is_region_invalid_or_free(region)) { struct kbase_queue *queue = region->user_data; - if (queue) { - atomic_cmpxchg(&queue->pending, 0, 1); - trigger_submission = true; + if (queue && (queue->bind_state == KBASE_CSF_QUEUE_BOUND)) { + spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + if (list_empty(&queue->pending_kick_link)) { + /* Queue termination shall block until this + * kick has been handled. + */ + atomic_inc(&queue->pending_kick); + list_add_tail( + &queue->pending_kick_link, + &kbdev->csf.pending_gpuq_kicks[queue->group_priority]); + complete(&kbdev->csf.scheduler.kthread_signal); + } + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); } } else { dev_dbg(kbdev->dev, @@ -934,14 +888,10 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, } kbase_gpu_vm_unlock(kctx); - if (likely(trigger_submission)) - enqueue_gpu_submission_work(kctx); - return err; } -static void unbind_stopped_queue(struct kbase_context *kctx, - struct kbase_queue *queue) +static void unbind_stopped_queue(struct kbase_context *kctx, struct kbase_queue *queue) { lockdep_assert_held(&kctx->csf.lock); @@ -952,10 +902,9 @@ static void unbind_stopped_queue(struct kbase_context *kctx, unsigned long flags; kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); - bitmap_clear(queue->group->protm_pending_bitmap, - queue->csi_index, 1); - KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR, - queue->group, queue, queue->group->protm_pending_bitmap[0]); + bitmap_clear(queue->group->protm_pending_bitmap, queue->csi_index, 1); + KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR, queue->group, queue, + queue->group->protm_pending_bitmap[0]); queue->group->bound_queues[queue->csi_index] = NULL; queue->group = NULL; kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); @@ -1071,9 +1020,7 @@ static int find_free_group_handle(struct kbase_context *const kctx) lockdep_assert_held(&kctx->csf.lock); - for (idx = 0; - (idx != MAX_QUEUE_GROUP_NUM) && (group_handle < 0); - idx++) { + for (idx = 0; (idx != MAX_QUEUE_GROUP_NUM) && (group_handle < 0); idx++) { if (!kctx->csf.queue_groups[idx]) group_handle = idx; } @@ -1091,12 +1038,10 @@ static int find_free_group_handle(struct kbase_context *const kctx) * Return: true if at least one CSG supports the given number * of CSs (or more); otherwise false. */ -static bool iface_has_enough_streams(struct kbase_device *const kbdev, - u32 const cs_min) +static bool iface_has_enough_streams(struct kbase_device *const kbdev, u32 const cs_min) { bool has_enough = false; - struct kbase_csf_cmd_stream_group_info *const groups = - kbdev->csf.global_iface.groups; + struct kbase_csf_cmd_stream_group_info *const groups = kbdev->csf.global_iface.groups; const u32 group_num = kbdev->csf.global_iface.group_num; u32 i; @@ -1119,10 +1064,9 @@ static bool iface_has_enough_streams(struct kbase_device *const kbdev, * Otherwise -ENOMEM or error code. */ static int create_normal_suspend_buffer(struct kbase_context *const kctx, - struct kbase_normal_suspend_buffer *s_buf) + struct kbase_normal_suspend_buffer *s_buf) { - const size_t nr_pages = - PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); + const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); int err; lockdep_assert_held(&kctx->csf.lock); @@ -1153,7 +1097,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx, static void timer_event_worker(struct work_struct *data); static void protm_event_worker(struct work_struct *data); static void term_normal_suspend_buffer(struct kbase_context *const kctx, - struct kbase_normal_suspend_buffer *s_buf); + struct kbase_normal_suspend_buffer *s_buf); /** * create_suspend_buffers - Setup normal and protected mode @@ -1166,7 +1110,7 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx, * Return: 0 if suspend buffers are successfully allocated. Otherwise -ENOMEM. */ static int create_suspend_buffers(struct kbase_context *const kctx, - struct kbase_queue_group * const group) + struct kbase_queue_group *const group) { if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) { dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n"); @@ -1205,17 +1149,15 @@ static u32 generate_group_uid(void) * Return: a queue group handle on success, or a negative error code on failure. */ static int create_queue_group(struct kbase_context *const kctx, - union kbase_ioctl_cs_queue_group_create *const create) + union kbase_ioctl_cs_queue_group_create *const create) { int group_handle = find_free_group_handle(kctx); if (group_handle < 0) { - dev_dbg(kctx->kbdev->dev, - "All queue group handles are already in use"); + dev_dbg(kctx->kbdev->dev, "All queue group handles are already in use"); } else { - struct kbase_queue_group * const group = - kmalloc(sizeof(struct kbase_queue_group), - GFP_KERNEL); + struct kbase_queue_group *const group = + kmalloc(sizeof(struct kbase_queue_group), GFP_KERNEL); lockdep_assert_held(&kctx->csf.lock); @@ -1225,6 +1167,9 @@ static int create_queue_group(struct kbase_context *const kctx, } else { int err = 0; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + group->prev_act = false; +#endif group->kctx = kctx; group->handle = group_handle; group->csg_nr = KBASEP_CSG_NR_INVALID; @@ -1249,6 +1194,7 @@ static int create_queue_group(struct kbase_context *const kctx, group->dvs_buf = create->in.dvs_buf; + #if IS_ENABLED(CONFIG_DEBUG_FS) group->deschedule_deferred_cnt = 0; #endif @@ -1259,16 +1205,13 @@ static int create_queue_group(struct kbase_context *const kctx, INIT_LIST_HEAD(&group->link); INIT_LIST_HEAD(&group->link_to_schedule); INIT_LIST_HEAD(&group->error_fatal.link); - INIT_LIST_HEAD(&group->error_timeout.link); - INIT_LIST_HEAD(&group->error_tiler_oom.link); INIT_WORK(&group->timer_event_work, timer_event_worker); INIT_WORK(&group->protm_event_work, protm_event_worker); - bitmap_zero(group->protm_pending_bitmap, - MAX_SUPPORTED_STREAMS_PER_GROUP); + bitmap_zero(group->protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); group->run_state = KBASE_CSF_GROUP_INACTIVE; KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group, - group->run_state); + group->run_state); err = create_suspend_buffers(kctx, group); @@ -1279,8 +1222,7 @@ static int create_queue_group(struct kbase_context *const kctx, int j; kctx->csf.queue_groups[group_handle] = group; - for (j = 0; j < MAX_SUPPORTED_STREAMS_PER_GROUP; - j++) + for (j = 0; j < MAX_SUPPORTED_STREAMS_PER_GROUP; j++) group->bound_queues[j] = NULL; } } @@ -1301,8 +1243,9 @@ static bool dvs_supported(u32 csf_version) return true; } + int kbase_csf_queue_group_create(struct kbase_context *const kctx, - union kbase_ioctl_cs_queue_group_create *const create) + union kbase_ioctl_cs_queue_group_create *const create) { int err = 0; const u32 tiler_count = hweight64(create->in.tiler_mask); @@ -1310,7 +1253,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, const u32 compute_count = hweight64(create->in.compute_mask); size_t i; - for (i = 0; i < sizeof(create->in.padding); i++) { + for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { if (create->in.padding[i] != 0) { dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); return -EINVAL; @@ -1319,36 +1262,29 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, mutex_lock(&kctx->csf.lock); - if ((create->in.tiler_max > tiler_count) || - (create->in.fragment_max > fragment_count) || + if ((create->in.tiler_max > tiler_count) || (create->in.fragment_max > fragment_count) || (create->in.compute_max > compute_count)) { - dev_dbg(kctx->kbdev->dev, - "Invalid maximum number of endpoints for a queue group"); + dev_dbg(kctx->kbdev->dev, "Invalid maximum number of endpoints for a queue group"); err = -EINVAL; } else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) { dev_dbg(kctx->kbdev->dev, "Invalid queue group priority %u", (unsigned int)create->in.priority); err = -EINVAL; } else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) { - dev_dbg(kctx->kbdev->dev, - "No CSG has at least %d CSs", - create->in.cs_min); + dev_dbg(kctx->kbdev->dev, "No CSG has at least %d CSs", create->in.cs_min); err = -EINVAL; } else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) { dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u", create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK); err = -EINVAL; - } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) && - create->in.dvs_buf) { - dev_warn( - kctx->kbdev->dev, - "GPU does not support DVS but userspace is trying to use it"); + } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) && create->in.dvs_buf) { + dev_warn(kctx->kbdev->dev, + "GPU does not support DVS but userspace is trying to use it"); err = -EINVAL; } else if (dvs_supported(kctx->kbdev->csf.global_iface.version) && !CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) && CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) { - dev_warn(kctx->kbdev->dev, - "DVS buffer pointer is null but size is not 0"); + dev_warn(kctx->kbdev->dev, "DVS buffer pointer is null but size is not 0"); err = -EINVAL; } else { /* For the CSG which satisfies the condition for having @@ -1418,8 +1354,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) struct kbase_context *kctx = group->kctx; /* Currently each group supports the same number of CS */ - u32 max_streams = - kctx->kbdev->csf.global_iface.groups[0].stream_num; + u32 max_streams = kctx->kbdev->csf.global_iface.groups[0].stream_num; u32 i; lockdep_assert_held(&kctx->csf.lock); @@ -1428,8 +1363,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); for (i = 0; i < max_streams; i++) { - struct kbase_queue *queue = - group->bound_queues[i]; + struct kbase_queue *queue = group->bound_queues[i]; /* The group is already being evicted from the scheduler */ if (queue) @@ -1438,8 +1372,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) term_normal_suspend_buffer(kctx, &group->normal_suspend_buf); if (kctx->kbdev->csf.pma_dev) - term_protected_suspend_buffer(kctx->kbdev, - &group->protected_suspend_buf); + term_protected_suspend_buffer(kctx->kbdev, &group->protected_suspend_buf); group->run_state = KBASE_CSF_GROUP_TERMINATED; KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state); @@ -1511,17 +1444,13 @@ static void remove_pending_group_fatal_error(struct kbase_queue_group *group) { struct kbase_context *kctx = group->kctx; - dev_dbg(kctx->kbdev->dev, - "Remove any pending group fatal error from context %pK\n", + dev_dbg(kctx->kbdev->dev, "Remove any pending group fatal error from context %pK\n", (void *)group->kctx); - kbase_csf_event_remove_error(kctx, &group->error_tiler_oom); - kbase_csf_event_remove_error(kctx, &group->error_timeout); kbase_csf_event_remove_error(kctx, &group->error_fatal); } -void kbase_csf_queue_group_terminate(struct kbase_context *kctx, - u8 group_handle) +void kbase_csf_queue_group_terminate(struct kbase_context *kctx, u8 group_handle) { struct kbase_queue_group *group; int err; @@ -1581,8 +1510,7 @@ KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate); #if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST int kbase_csf_queue_group_suspend(struct kbase_context *kctx, - struct kbase_suspend_copy_buffer *sus_buf, - u8 group_handle) + struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle) { struct kbase_device *const kbdev = kctx->kbdev; int err; @@ -1590,18 +1518,15 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx, err = kbase_reset_gpu_prevent_and_wait(kbdev); if (err) { - dev_warn( - kbdev->dev, - "Unsuccessful GPU reset detected when suspending group %d", - group_handle); + dev_warn(kbdev->dev, "Unsuccessful GPU reset detected when suspending group %d", + group_handle); return err; } mutex_lock(&kctx->csf.lock); group = find_queue_group(kctx, group_handle); if (group) - err = kbase_csf_scheduler_group_copy_suspend_buf(group, - sus_buf); + err = kbase_csf_scheduler_group_copy_suspend_buf(group, sus_buf); else err = -EINVAL; @@ -1612,9 +1537,8 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx, } #endif -void kbase_csf_add_group_fatal_error( - struct kbase_queue_group *const group, - struct base_gpu_queue_group_error const *const err_payload) +void kbase_csf_add_group_fatal_error(struct kbase_queue_group *const group, + struct base_gpu_queue_group_error const *const err_payload) { struct base_csf_notification error; @@ -1624,21 +1548,15 @@ void kbase_csf_add_group_fatal_error( if (WARN_ON(!err_payload)) return; - error = (struct base_csf_notification) { + error = (struct base_csf_notification){ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, - .payload = { - .csg_error = { - .handle = group->handle, - .error = *err_payload - } - } + .payload = { .csg_error = { .handle = group->handle, .error = *err_payload } } }; kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error); } -void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, - struct kbase_context *kctx) +void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, struct kbase_context *kctx) { struct list_head evicted_groups; struct kbase_queue_group *group; @@ -1650,11 +1568,10 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, kbase_csf_scheduler_evict_ctx_slots(kbdev, kctx, &evicted_groups); while (!list_empty(&evicted_groups)) { - group = list_first_entry(&evicted_groups, - struct kbase_queue_group, link); + group = list_first_entry(&evicted_groups, struct kbase_queue_group, link); - dev_dbg(kbdev->dev, "Context %d_%d active group %d terminated", - kctx->tgid, kctx->id, group->handle); + dev_dbg(kbdev->dev, "Context %d_%d active group %d terminated", kctx->tgid, + kctx->id, group->handle); kbase_csf_term_descheduled_queue_group(group); list_del_init(&group->link); } @@ -1662,8 +1579,7 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, /* Acting on the queue groups that are pending to be terminated. */ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { group = kctx->csf.queue_groups[i]; - if (group && - group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) + if (group && group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) kbase_csf_term_descheduled_queue_group(group); } @@ -1682,8 +1598,7 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) /* Mark all the cookies as 'free' */ bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); - kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq", - WQ_UNBOUND, 1); + kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq", WQ_UNBOUND, 1); if (likely(kctx->csf.wq)) { err = kbase_csf_scheduler_context_init(kctx); @@ -1696,11 +1611,12 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) if (likely(!err)) { mutex_init(&kctx->csf.lock); - INIT_WORK(&kctx->csf.pending_submission_work, - pending_submission_worker); err = kbasep_ctx_user_reg_page_mapping_init(kctx); + if (likely(!err)) + kbase_csf_cpu_queue_init(kctx); + if (unlikely(err)) kbase_csf_tiler_heap_context_term(kctx); } @@ -1720,8 +1636,37 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) return err; } -void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, - struct kbase_fault *fault) +void kbase_csf_ctx_report_page_fault_for_active_groups(struct kbase_context *kctx, + struct kbase_fault *fault) +{ + struct base_gpu_queue_group_error err_payload = + (struct base_gpu_queue_group_error){ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { .fatal_group = { + .sideband = fault->addr, + .status = fault->status, + } } }; + struct kbase_device *kbdev = kctx->kbdev; + const u32 num_groups = kbdev->csf.global_iface.group_num; + unsigned long flags; + int csg_nr; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + + if (!group || (group->kctx != kctx)) + continue; + + group->faulted = true; + kbase_csf_add_group_fatal_error(group, &err_payload); + } + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, struct kbase_fault *fault) { int gr; bool reported = false; @@ -1743,24 +1688,23 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, if (err) return; - err_payload = (struct base_gpu_queue_group_error) { - .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, - .payload = { - .fatal_group = { - .sideband = fault->addr, - .status = fault->status, - } - } - }; + err_payload = + (struct base_gpu_queue_group_error){ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { .fatal_group = { + .sideband = fault->addr, + .status = fault->status, + } } }; mutex_lock(&kctx->csf.lock); for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { - struct kbase_queue_group *const group = - kctx->csf.queue_groups[gr]; + struct kbase_queue_group *const group = kctx->csf.queue_groups[gr]; if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) { term_queue_group(group); + /* This would effectively be a NOP if the fatal error was already added to + * the error_list by kbase_csf_ctx_report_page_fault_for_active_groups(). + */ kbase_csf_add_group_fatal_error(group, &err_payload); reported = true; } @@ -1817,8 +1761,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) if (reset_prevented) kbase_reset_gpu_allow(kbdev); - cancel_work_sync(&kctx->csf.pending_submission_work); - /* Now that all queue groups have been terminated, there can be no * more OoM or timer event interrupts but there can be inflight work * items. Destroying the wq will implicitly flush those work items. @@ -1854,8 +1796,13 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) while (!list_empty(&kctx->csf.queue_list)) { struct kbase_queue *queue; - queue = list_first_entry(&kctx->csf.queue_list, - struct kbase_queue, link); + queue = list_first_entry(&kctx->csf.queue_list, struct kbase_queue, link); + + list_del_init(&queue->link); + + mutex_unlock(&kctx->csf.lock); + wait_pending_queue_kick(queue); + mutex_lock(&kctx->csf.lock); /* The reference held when the IO mapping was created on bind * would have been dropped otherwise the termination of Kbase @@ -1864,7 +1811,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) * registered. */ WARN_ON(kbase_refcount_read(&queue->refcount) != 1); - list_del_init(&queue->link); + release_queue(queue); } @@ -1902,15 +1849,11 @@ static int handle_oom_event(struct kbase_queue_group *const group, struct kbase_csf_cmd_stream_info const *const stream) { struct kbase_context *const kctx = group->kctx; - u64 gpu_heap_va = - kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) | - ((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32); - const u32 vt_start = - kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_START); - const u32 vt_end = - kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_END); - const u32 frag_end = - kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END); + u64 gpu_heap_va = kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) | + ((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32); + const u32 vt_start = kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_START); + const u32 vt_end = kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_END); + const u32 frag_end = kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END); u32 renderpasses_in_flight; u32 pending_frag_count; u64 new_chunk_ptr; @@ -1919,8 +1862,10 @@ static int handle_oom_event(struct kbase_queue_group *const group, if ((frag_end > vt_end) || (vt_end >= vt_start)) { frag_end_err = true; - dev_dbg(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n", - vt_start, vt_end, frag_end); + dev_dbg( + kctx->kbdev->dev, + "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n", + vt_start, vt_end, frag_end); } if (frag_end_err) { renderpasses_in_flight = 1; @@ -1930,8 +1875,8 @@ static int handle_oom_event(struct kbase_queue_group *const group, pending_frag_count = vt_end - frag_end; } - err = kbase_csf_tiler_heap_alloc_new_chunk(kctx, - gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr); + err = kbase_csf_tiler_heap_alloc_new_chunk(kctx, gpu_heap_va, renderpasses_in_flight, + pending_frag_count, &new_chunk_ptr); if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) && (pending_frag_count == 0) && (err == -ENOMEM || err == -EBUSY)) { @@ -1947,15 +1892,11 @@ static int handle_oom_event(struct kbase_queue_group *const group, } else if (err) return err; - kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO, - new_chunk_ptr & 0xFFFFFFFF); - kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_HI, - new_chunk_ptr >> 32); + kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO, new_chunk_ptr & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_HI, new_chunk_ptr >> 32); - kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_LO, - new_chunk_ptr & 0xFFFFFFFF); - kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_HI, - new_chunk_ptr >> 32); + kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_LO, new_chunk_ptr & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_HI, new_chunk_ptr >> 32); return 0; } @@ -1977,17 +1918,12 @@ static void report_tiler_oom_error(struct kbase_queue_group *group) BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, } } } }; - kbase_csf_event_add_error(group->kctx, - &group->error_tiler_oom, - &error); + kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error); kbase_event_wakeup(group->kctx); } static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) { - int err; - const unsigned int cache_flush_wait_timeout_ms = 2000; - kbase_pm_lock(kbdev); /* With the advent of partial cache flush, dirty cache lines could * be left in the GPU L2 caches by terminating the queue group here @@ -1997,17 +1933,12 @@ static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) */ if (kbdev->pm.backend.gpu_powered) { kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); - err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms); - - if (err) { + if (kbase_gpu_wait_cache_clean_timeout(kbdev, + kbdev->mmu_or_gpu_cache_op_wait_time_ms)) dev_warn( kbdev->dev, - "[%llu] Timeout waiting for cache clean to complete after fatal error", + "[%llu] Timeout waiting for CACHE_CLN_INV_L2_LSC to complete after fatal error", kbase_backend_get_cycle_cnt(kbdev)); - - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); - } } kbase_pm_unlock(kbdev); @@ -2067,10 +1998,8 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue) ginfo = &kbdev->csf.global_iface.groups[slot_num]; stream = &ginfo->streams[csi_index]; - cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) & - CS_ACK_TILER_OOM_MASK; - cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) & - CS_REQ_TILER_OOM_MASK; + cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) & CS_ACK_TILER_OOM_MASK; + cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) & CS_REQ_TILER_OOM_MASK; /* The group could have already undergone suspend-resume cycle before * this work item got a chance to execute. On CSG resume the CS_ACK @@ -2085,15 +2014,13 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue) err = handle_oom_event(group, stream); kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack, - CS_REQ_TILER_OOM_MASK); + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack, CS_REQ_TILER_OOM_MASK); kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); kbase_csf_scheduler_spin_unlock(kbdev, flags); if (unlikely(err)) { - dev_warn( - kbdev->dev, - "Queue group to be terminated, couldn't handle the OoM event\n"); + dev_warn(kbdev->dev, + "Queue group to be terminated, couldn't handle the OoM event\n"); kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM); kbase_csf_scheduler_unlock(kbdev); term_queue_group(group); @@ -2116,26 +2043,21 @@ unlock: */ static void oom_event_worker(struct work_struct *data) { - struct kbase_queue *queue = - container_of(data, struct kbase_queue, oom_event_work); + struct kbase_queue *queue = container_of(data, struct kbase_queue, oom_event_work); struct kbase_context *kctx = queue->kctx; struct kbase_device *const kbdev = kctx->kbdev; - - int err = kbase_reset_gpu_try_prevent(kbdev); - - /* Regardless of whether reset failed or is currently happening, exit - * early - */ - if (err) - return; + int reset_prevent_err = kbase_reset_gpu_try_prevent(kbdev); mutex_lock(&kctx->csf.lock); - - kbase_queue_oom_event(queue); - release_queue(queue); - + if (likely(!reset_prevent_err)) { + kbase_queue_oom_event(queue); + } else { + dev_warn(kbdev->dev, + "Unable to prevent GPU reset, couldn't handle the OoM event\n"); + } mutex_unlock(&kctx->csf.lock); - kbase_reset_gpu_allow(kbdev); + if (likely(!reset_prevent_err)) + kbase_reset_gpu_allow(kbdev); } /** @@ -2151,15 +2073,14 @@ static void report_group_timeout_error(struct kbase_queue_group *const group) .csg_error = { .handle = group->handle, .error = { - .error_type = - BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, + .error_type = BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, } } } }; dev_warn(group->kctx->kbdev->dev, "Notify the event notification thread, forward progress timeout (%llu cycles)\n", kbase_csf_timeout_get(group->kctx->kbdev)); - kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error); + kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error); kbase_event_wakeup(group->kctx); } @@ -2209,8 +2130,7 @@ static void timer_event_worker(struct work_struct *data) */ static void handle_progress_timer_event(struct kbase_queue_group *const group) { - kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx, - DF_PROGRESS_TIMER_TIMEOUT); + kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx, DF_PROGRESS_TIMER_TIMEOUT); queue_work(group->kctx->csf.wq, &group->timer_event_work); } @@ -2305,8 +2225,7 @@ static void protm_event_worker(struct work_struct *data) struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; int err = 0; - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, - group, 0u); + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); err = alloc_grp_protected_suspend_buffer_pages(group); if (!err) { @@ -2322,8 +2241,7 @@ static void protm_event_worker(struct work_struct *data) report_group_fatal_error(group); } - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, - group, 0u); + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u); } /** @@ -2336,38 +2254,35 @@ static void protm_event_worker(struct work_struct *data) * Print required information about the CS fault and notify the user space client * about the fault. */ -static void -handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack) +static void handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack) { struct kbase_device *const kbdev = queue->kctx->kbdev; struct kbase_csf_cmd_stream_group_info const *ginfo = - &kbdev->csf.global_iface.groups[queue->group->csg_nr]; - struct kbase_csf_cmd_stream_info const *stream = - &ginfo->streams[queue->csi_index]; + &kbdev->csf.global_iface.groups[queue->group->csg_nr]; + struct kbase_csf_cmd_stream_info const *stream = &ginfo->streams[queue->csi_index]; const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT); const u64 cs_fault_info = kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) | - ((u64)kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_HI) - << 32); - const u8 cs_fault_exception_type = - CS_FAULT_EXCEPTION_TYPE_GET(cs_fault); - const u32 cs_fault_exception_data = - CS_FAULT_EXCEPTION_DATA_GET(cs_fault); - const u64 cs_fault_info_exception_data = - CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info); + ((u64)kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_HI) << 32); + const u8 cs_fault_exception_type = CS_FAULT_EXCEPTION_TYPE_GET(cs_fault); + const u32 cs_fault_exception_data = CS_FAULT_EXCEPTION_DATA_GET(cs_fault); + const u64 cs_fault_info_exception_data = CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info); + bool use_old_log_format = true; + kbase_csf_scheduler_spin_lock_assert_held(kbdev); - dev_warn(kbdev->dev, - "Ctx %d_%d Group %d CSG %d CSI: %d\n" - "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" - "CS_FAULT.EXCEPTION_DATA: 0x%x\n" - "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n", - queue->kctx->tgid, queue->kctx->id, queue->group->handle, - queue->group->csg_nr, queue->csi_index, - cs_fault_exception_type, - kbase_gpu_exception_name(cs_fault_exception_type), - cs_fault_exception_data, cs_fault_info_exception_data); + + if (use_old_log_format) + dev_warn(kbdev->dev, + "Ctx %d_%d Group %d CSG %d CSI: %d\n" + "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" + "CS_FAULT.EXCEPTION_DATA: 0x%x\n" + "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n", + queue->kctx->tgid, queue->kctx->id, queue->group->handle, + queue->group->csg_nr, queue->csi_index, cs_fault_exception_type, + kbase_gpu_exception_name(cs_fault_exception_type), cs_fault_exception_data, + cs_fault_info_exception_data); #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -2385,52 +2300,47 @@ handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack) if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) && (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) { if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) { - get_queue(queue); queue->cs_error = cs_fault; queue->cs_error_info = cs_fault_info; queue->cs_error_fatal = false; - if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) - release_queue(queue); + queue_work(queue->kctx->csf.wq, &queue->cs_error_work); return; } } #endif - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, - CS_REQ_FAULT_MASK); + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK); kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true); } -static void report_queue_fatal_error(struct kbase_queue *const queue, - u32 cs_fatal, u64 cs_fatal_info, - u8 group_handle) +static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fatal, + u64 cs_fatal_info, struct kbase_queue_group *group) { - struct base_csf_notification error = { - .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, - .payload = { - .csg_error = { - .handle = group_handle, - .error = { - .error_type = - BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, - .payload = { - .fatal_queue = { - .sideband = cs_fatal_info, - .status = cs_fatal, - .csi_index = queue->csi_index, - } - } - } - } - } - }; + struct base_csf_notification + error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, + .payload = { + .csg_error = { + .error = { .error_type = + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, + .payload = { .fatal_queue = { + .sideband = cs_fatal_info, + .status = cs_fatal, + } } } } } }; - kbase_csf_event_add_error(queue->kctx, &queue->error, &error); + if (!queue) + return; + + if (WARN_ON_ONCE(!group)) + return; + + error.payload.csg_error.handle = group->handle; + error.payload.csg_error.error.payload.fatal_queue.csi_index = queue->csi_index; + kbase_csf_event_add_error(queue->kctx, &group->error_fatal, &error); kbase_event_wakeup(queue->kctx); } /** - * fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue + * cs_error_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue * * @data: Pointer to a work_struct embedded in GPU command queue. * @@ -2438,12 +2348,11 @@ static void report_queue_fatal_error(struct kbase_queue *const queue, */ static void cs_error_worker(struct work_struct *const data) { - struct kbase_queue *const queue = - container_of(data, struct kbase_queue, cs_error_work); + struct kbase_queue *const queue = container_of(data, struct kbase_queue, cs_error_work); + const u32 cs_fatal_exception_type = CS_FATAL_EXCEPTION_TYPE_GET(queue->cs_error); struct kbase_context *const kctx = queue->kctx; struct kbase_device *const kbdev = kctx->kbdev; struct kbase_queue_group *group; - u8 group_handle; bool reset_prevented = false; int err; @@ -2477,27 +2386,32 @@ static void cs_error_worker(struct work_struct *const data) &kbdev->csf.global_iface.groups[slot_num]; struct kbase_csf_cmd_stream_info const *stream = &ginfo->streams[queue->csi_index]; - u32 const cs_ack = - kbase_csf_firmware_cs_output(stream, CS_ACK); + u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK); - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, - CS_REQ_FAULT_MASK); - kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, - slot_num, true); + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, slot_num, true); } kbase_csf_scheduler_spin_unlock(kbdev, flags); goto unlock; } #endif - group_handle = group->handle; term_queue_group(group); flush_gpu_cache_on_fatal_error(kbdev); - report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, - group_handle); + /* For an invalid GPU page fault, CS_BUS_FAULT fatal error is expected after the + * page fault handler disables the AS of faulty context. Need to skip reporting the + * CS_BUS_FAULT fatal error to the Userspace as it doesn't have the full fault info. + * Page fault handler will report the fatal error with full page fault info. + */ + if ((cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT) && group->faulted) { + dev_dbg(kbdev->dev, + "Skipped reporting CS_BUS_FAULT for queue %d of group %d of ctx %d_%d", + queue->csi_index, group->handle, kctx->tgid, kctx->id); + } else { + report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, group); + } unlock: - release_queue(queue); mutex_unlock(&kctx->csf.lock); if (reset_prevented) kbase_reset_gpu_allow(kbdev); @@ -2517,39 +2431,34 @@ unlock: * Enqueue a work item to terminate the group and report the fatal error * to user space. */ -static void -handle_fatal_event(struct kbase_queue *const queue, - struct kbase_csf_cmd_stream_info const *const stream, - u32 cs_ack) +static void handle_fatal_event(struct kbase_queue *const queue, + struct kbase_csf_cmd_stream_info const *const stream, u32 cs_ack) { + struct kbase_device *const kbdev = queue->kctx->kbdev; const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL); const u64 cs_fatal_info = kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_LO) | - ((u64)kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_HI) - << 32); - const u32 cs_fatal_exception_type = - CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal); - const u32 cs_fatal_exception_data = - CS_FATAL_EXCEPTION_DATA_GET(cs_fatal); - const u64 cs_fatal_info_exception_data = - CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info); - struct kbase_device *const kbdev = queue->kctx->kbdev; + ((u64)kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_HI) << 32); + const u32 cs_fatal_exception_type = CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal); + const u32 cs_fatal_exception_data = CS_FATAL_EXCEPTION_DATA_GET(cs_fatal); + const u64 cs_fatal_info_exception_data = CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info); + bool use_old_log_format = true; + kbase_csf_scheduler_spin_lock_assert_held(kbdev); - dev_warn(kbdev->dev, - "Ctx %d_%d Group %d CSG %d CSI: %d\n" - "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" - "CS_FATAL.EXCEPTION_DATA: 0x%x\n" - "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n", - queue->kctx->tgid, queue->kctx->id, queue->group->handle, - queue->group->csg_nr, queue->csi_index, - cs_fatal_exception_type, - kbase_gpu_exception_name(cs_fatal_exception_type), - cs_fatal_exception_data, cs_fatal_info_exception_data); + if (use_old_log_format) + dev_warn(kbdev->dev, + "Ctx %d_%d Group %d CSG %d CSI: %d\n" + "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" + "CS_FATAL.EXCEPTION_DATA: 0x%x\n" + "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n", + queue->kctx->tgid, queue->kctx->id, queue->group->handle, + queue->group->csg_nr, queue->csi_index, cs_fatal_exception_type, + kbase_gpu_exception_name(cs_fatal_exception_type), cs_fatal_exception_data, + cs_fatal_info_exception_data); - if (cs_fatal_exception_type == - CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) { + if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) { kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR); queue_work(system_wq, &kbdev->csf.fw_error_work); } else { @@ -2559,16 +2468,13 @@ handle_fatal_event(struct kbase_queue *const queue, if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(queue->kctx->kbdev); } - get_queue(queue); queue->cs_error = cs_fatal; queue->cs_error_info = cs_fatal_info; queue->cs_error_fatal = true; - if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) - release_queue(queue); + queue_work(queue->kctx->csf.wq, &queue->cs_error_work); } - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, - CS_REQ_FATAL_MASK); + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FATAL_MASK); } @@ -2597,8 +2503,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, struct kbase_device *const kbdev = group->kctx->kbdev; u32 remaining = irqreq ^ irqack; bool protm_pend = false; - const bool group_suspending = - !kbase_csf_scheduler_group_events_enabled(kbdev, group); + const bool group_suspending = !kbase_csf_scheduler_group_events_enabled(kbdev, group); kbase_csf_scheduler_spin_lock_assert_held(kbdev); @@ -2614,23 +2519,20 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, * requires scheduler spinlock. */ if (queue && !WARN_ON(queue->csi_index != i)) { - struct kbase_csf_cmd_stream_info const *const stream = - &ginfo->streams[i]; - u32 const cs_req = kbase_csf_firmware_cs_input_read( - stream, CS_REQ); - u32 const cs_ack = - kbase_csf_firmware_cs_output(stream, CS_ACK); + struct kbase_csf_cmd_stream_info const *const stream = &ginfo->streams[i]; + u32 const cs_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ); + u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK); struct workqueue_struct *wq = group->kctx->csf.wq; if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) { - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, - group, queue, cs_req ^ cs_ack); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, group, queue, + cs_req ^ cs_ack); handle_fatal_event(queue, stream, cs_ack); } if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, - group, queue, cs_req ^ cs_ack); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, group, queue, + cs_req ^ cs_ack); handle_fault_event(queue, cs_ack); } @@ -2643,17 +2545,15 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK; KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, - CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED, - group, queue, - cs_req_remain ^ cs_ack_remain); + CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED, + group, queue, + cs_req_remain ^ cs_ack_remain); continue; } - if (((cs_req & CS_REQ_TILER_OOM_MASK) ^ - (cs_ack & CS_ACK_TILER_OOM_MASK))) { - get_queue(queue); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM, - group, queue, cs_req ^ cs_ack); + if (((cs_req & CS_REQ_TILER_OOM_MASK) ^ (cs_ack & CS_ACK_TILER_OOM_MASK))) { + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM, group, + queue, cs_req ^ cs_ack); if (!queue_work(wq, &queue->oom_event_work)) { /* The work item shall not have been * already queued, there can be only @@ -2665,19 +2565,16 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, "Tiler OOM work pending: queue %d group %d (ctx %d_%d)", queue->csi_index, group->handle, queue->kctx->tgid, queue->kctx->id); - release_queue(queue); } } - if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^ - (cs_ack & CS_ACK_PROTM_PEND_MASK)) { - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND, - group, queue, cs_req ^ cs_ack); + if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^ (cs_ack & CS_ACK_PROTM_PEND_MASK)) { + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND, group, + queue, cs_req ^ cs_ack); dev_dbg(kbdev->dev, "Protected mode entry request for queue on csi %d bound to group-%d on slot %d", - queue->csi_index, group->handle, - group->csg_nr); + queue->csi_index, group->handle, group->csg_nr); bitmap_set(group->protm_pending_bitmap, i, 1); KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue, @@ -2699,12 +2596,10 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, queue_work(group->kctx->csf.wq, &group->protm_event_work); if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { - clear_bit(group->csg_nr, - scheduler->csg_slots_idle_mask); + clear_bit(group->csg_nr, scheduler->csg_slots_idle_mask); KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, - scheduler->csg_slots_idle_mask[0]); - dev_dbg(kbdev->dev, - "Group-%d on slot %d de-idled by protm request", + scheduler->csg_slots_idle_mask[0]); + dev_dbg(kbdev->dev, "Group-%d on slot %d de-idled by protm request", group->handle, group->csg_nr); } } @@ -2736,7 +2631,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c kbase_csf_scheduler_spin_lock_assert_held(kbdev); - if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) + if (WARN_ON((u32)csg_nr >= kbdev->csf.global_iface.group_num)) return; ginfo = &kbdev->csf.global_iface.groups[csg_nr]; @@ -2775,44 +2670,32 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); - if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { - kbase_csf_firmware_csg_input_mask(ginfo, - CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK); - - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack); - - /* SYNC_UPDATE events shall invalidate GPU idle event */ - atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true); - - kbase_csf_event_signal_cpu_only(group->kctx); - } + kbase_csf_handle_csg_sync_update(kbdev, ginfo, group, req, ack); if ((req ^ ack) & CSG_REQ_IDLE_MASK) { - struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( - kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr); + KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(kbdev, kbdev->id, csg_nr); - kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, - CSG_REQ_IDLE_MASK); + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, CSG_REQ_IDLE_MASK); set_bit(csg_nr, scheduler->csg_slots_idle_mask); KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group, scheduler->csg_slots_idle_mask[0]); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack); dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n", - group->handle, csg_nr); + group->handle, csg_nr); if (atomic_read(&scheduler->non_idle_offslot_grps)) { /* If there are non-idle CSGs waiting for a slot, fire * a tock for a replacement. */ - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS, - group, req ^ ack); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS, group, + req ^ ack); kbase_csf_scheduler_invoke_tock(kbdev); } else { - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS, - group, req ^ ack); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS, group, + req ^ ack); } if (group->scan_seq_num < track->idle_seq) { @@ -2854,75 +2737,63 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c * expected that the scheduler spinlock is already held on calling this * function. */ -static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, - u32 glb_ack) +static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, u32 glb_ack) { - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); /* Process PRFCNT_SAMPLE interrupt. */ if (kbdev->csf.hwcnt.request_pending && - ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) == - (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) { + ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) == (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) { kbdev->csf.hwcnt.request_pending = false; dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received."); - kbase_hwcnt_backend_csf_on_prfcnt_sample( - &kbdev->hwcnt_gpu_iface); + kbase_hwcnt_backend_csf_on_prfcnt_sample(&kbdev->hwcnt_gpu_iface); } /* Process PRFCNT_ENABLE interrupt. */ if (kbdev->csf.hwcnt.enable_pending && - ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) == - (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) { + ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) == (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) { kbdev->csf.hwcnt.enable_pending = false; - dev_dbg(kbdev->dev, - "PRFCNT_ENABLE status changed interrupt received."); + dev_dbg(kbdev->dev, "PRFCNT_ENABLE status changed interrupt received."); if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK) - kbase_hwcnt_backend_csf_on_prfcnt_enable( - &kbdev->hwcnt_gpu_iface); + kbase_hwcnt_backend_csf_on_prfcnt_enable(&kbdev->hwcnt_gpu_iface); else - kbase_hwcnt_backend_csf_on_prfcnt_disable( - &kbdev->hwcnt_gpu_iface); + kbase_hwcnt_backend_csf_on_prfcnt_disable(&kbdev->hwcnt_gpu_iface); } /* Process PRFCNT_THRESHOLD interrupt. */ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) { dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received."); - kbase_hwcnt_backend_csf_on_prfcnt_threshold( - &kbdev->hwcnt_gpu_iface); + kbase_hwcnt_backend_csf_on_prfcnt_threshold(&kbdev->hwcnt_gpu_iface); /* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to * the same value as GLB_ACK.PRFCNT_THRESHOLD * flag in order to enable reporting of another * PRFCNT_THRESHOLD event. */ - kbase_csf_firmware_global_input_mask( - global_iface, GLB_REQ, glb_ack, - GLB_REQ_PRFCNT_THRESHOLD_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack, + GLB_REQ_PRFCNT_THRESHOLD_MASK); } /* Process PRFCNT_OVERFLOW interrupt. */ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) { dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received."); - kbase_hwcnt_backend_csf_on_prfcnt_overflow( - &kbdev->hwcnt_gpu_iface); + kbase_hwcnt_backend_csf_on_prfcnt_overflow(&kbdev->hwcnt_gpu_iface); /* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to * the same value as GLB_ACK.PRFCNT_OVERFLOW * flag in order to enable reporting of another * PRFCNT_OVERFLOW event. */ - kbase_csf_firmware_global_input_mask( - global_iface, GLB_REQ, glb_ack, - GLB_REQ_PRFCNT_OVERFLOW_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack, + GLB_REQ_PRFCNT_OVERFLOW_MASK); } } @@ -2937,8 +2808,8 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, * appropriately sends notification about the protected mode entry to components * like IPA, HWC, IPA_CONTROL. */ -static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, - u32 glb_req, u32 glb_ack) +static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, u32 glb_req, + u32 glb_ack) { lockdep_assert_held(&kbdev->hwaccess_lock); kbase_csf_scheduler_spin_lock_assert_held(kbdev); @@ -2949,8 +2820,7 @@ static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, if (kbdev->protected_mode) return; - if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) != - (glb_ack & GLB_REQ_PROTM_ENTER_MASK)) + if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) != (glb_ack & GLB_REQ_PROTM_ENTER_MASK)) return; dev_dbg(kbdev->dev, "Protected mode entry interrupt received"); @@ -2972,9 +2842,8 @@ static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, */ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack) { - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; - struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; lockdep_assert_held(&kbdev->hwaccess_lock); kbase_csf_scheduler_spin_lock_assert_held(kbdev); @@ -2985,8 +2854,8 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack) GLB_REQ_PROTM_EXIT_MASK); if (likely(scheduler->active_protm_grp)) { - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, - scheduler->active_protm_grp, 0u); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp, + 0u); scheduler->active_protm_grp = NULL; } else { dev_warn(kbdev->dev, "PROTM_EXIT interrupt after no pmode group"); @@ -3031,7 +2900,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, * previously notified idle CSGs in the current tick/tock cycle. */ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { - if (i == track->idle_slot) + if (i == (u32)track->idle_slot) continue; grp = kbase_csf_scheduler_get_group_on_slot(kbdev, i); /* If not NULL then the group pointer cannot disappear as the @@ -3050,8 +2919,8 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, } if (!tock_triggered) { - dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n", - group->handle, group->csg_nr); + dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n", group->handle, + group->csg_nr); queue_work(group->kctx->csf.wq, &group->protm_event_work); } } @@ -3092,13 +2961,16 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) do { unsigned long flags; u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF; - struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX }; bool glb_idle_irq_received = false; - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), val); order_job_irq_clear_with_iface_mem_read(); if (csg_interrupts != 0) { + struct irq_idle_and_protm_track track = { .protm_grp = NULL, + .idle_seq = U32_MAX, + .idle_slot = S8_MAX }; + kbase_csf_scheduler_spin_lock(kbdev, &flags); /* Looping through and track the highest idle and protm groups */ while (csg_interrupts != 0) { @@ -3171,7 +3043,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) * idle, the GPU would be treated as no longer idle and left * powered on. */ - val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + val = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_STATUS)); } while (val); if (deferred_handling_glb_idle_irq) { @@ -3187,14 +3059,30 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); } +void kbase_csf_handle_csg_sync_update(struct kbase_device *const kbdev, + struct kbase_csf_cmd_stream_group_info *ginfo, + struct kbase_queue_group *group, u32 req, u32 ack) +{ + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK); + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack); + + /* SYNC_UPDATE events shall invalidate GPU idle event */ + atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true); + + kbase_csf_event_signal_cpu_only(group->kctx); + } +} + void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev) { if (kbdev->csf.db_filp) { struct page *page = as_page(kbdev->csf.dummy_db_page); - kbase_mem_pool_free( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - page, false); + kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); fput(kbdev->csf.db_filp); } @@ -3225,6 +3113,28 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) return 0; } +void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev) +{ + size_t i; + + for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) + INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kicks[i]); + spin_lock_init(&kbdev->csf.pending_gpuq_kicks_lock); +} + +void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev) +{ + size_t i; + + spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) { + if (!list_empty(&kbdev->csf.pending_gpuq_kicks[i])) + dev_warn(kbdev->dev, + "Some GPU queue kicks for priority %zu were not handled", i); + } + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); +} + void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) { if (kbdev->csf.user_reg.filp) { @@ -3257,7 +3167,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) } page = as_page(phys); - addr = kmap_atomic(page); + addr = kbase_kmap_atomic(page); /* Write a special value for the latest flush register inside the * dummy page @@ -3266,7 +3176,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32), DMA_BIDIRECTIONAL); - kunmap_atomic(addr); + kbase_kunmap_atomic(addr); kbdev->csf.user_reg.filp = filp; kbdev->csf.user_reg.dummy_page = phys; @@ -3281,9 +3191,67 @@ u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority) if (pcm_device) { req_priority = kbase_csf_priority_queue_group_priority_to_relative(req_priority); - out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority); + out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, + req_priority); out_priority = kbase_csf_priority_relative_to_queue_group_priority(out_priority); } return out_priority; } + +void kbase_csf_process_queue_kick(struct kbase_queue *queue) +{ + struct kbase_context *kctx = queue->kctx; + struct kbase_device *kbdev = kctx->kbdev; + bool retry_kick = false; + int err = kbase_reset_gpu_prevent_and_wait(kbdev); + + if (err) { + dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue"); + goto out_release_queue; + } + + mutex_lock(&kctx->csf.lock); + + if (queue->bind_state != KBASE_CSF_QUEUE_BOUND) + goto out_allow_gpu_reset; + + err = kbase_csf_scheduler_queue_start(queue); + if (unlikely(err)) { + dev_dbg(kbdev->dev, "Failed to start queue"); + if (err == -EBUSY) { + retry_kick = true; + + spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + if (list_empty(&queue->pending_kick_link)) { + /* A failed queue kick shall be pushed to the + * back of the queue to avoid potential abuse. + */ + list_add_tail( + &queue->pending_kick_link, + &kbdev->csf.pending_gpuq_kicks[queue->group_priority]); + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + } else { + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + WARN_ON(atomic_read(&queue->pending_kick) == 0); + } + + complete(&kbdev->csf.scheduler.kthread_signal); + } + } + +out_allow_gpu_reset: + if (likely(!retry_kick)) { + WARN_ON(atomic_read(&queue->pending_kick) == 0); + atomic_dec(&queue->pending_kick); + } + + mutex_unlock(&kctx->csf.lock); + + kbase_reset_gpu_allow(kbdev); + + return; +out_release_queue: + WARN_ON(atomic_read(&queue->pending_kick) == 0); + atomic_dec(&queue->pending_kick); +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h index dd947dcbab1c..b2f6ab2c4a27 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h @@ -48,7 +48,7 @@ */ #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX) -#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */ +#define FIRMWARE_IDLE_HYSTERESIS_TIME_NS (10 * 1000 * 1000) /* Default 10 milliseconds */ /* Idle hysteresis time can be scaled down when GPU sleep feature is used */ #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5) @@ -72,8 +72,19 @@ int kbase_csf_ctx_init(struct kbase_context *kctx); * This function terminates all GPU command queue groups in the context and * notifies the event notification thread of the fault. */ -void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, - struct kbase_fault *fault); +void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, struct kbase_fault *fault); + +/** + * kbase_csf_ctx_report_page_fault_for_active_groups - Notify Userspace about GPU page fault + * for active groups of the faulty context. + * + * @kctx: Pointer to faulty kbase context. + * @fault: Pointer to the fault. + * + * This function notifies the event notification thread of the GPU page fault. + */ +void kbase_csf_ctx_report_page_fault_for_active_groups(struct kbase_context *kctx, + struct kbase_fault *fault); /** * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space. @@ -96,8 +107,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx); * * Return: 0 on success, or negative on failure. */ -int kbase_csf_queue_register(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_register *reg); +int kbase_csf_queue_register(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_register *reg); /** * kbase_csf_queue_register_ex - Register a GPU command queue with @@ -113,7 +123,7 @@ int kbase_csf_queue_register(struct kbase_context *kctx, * Return: 0 on success, or negative on failure. */ int kbase_csf_queue_register_ex(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_register_ex *reg); + struct kbase_ioctl_cs_queue_register_ex *reg); /** * kbase_csf_queue_terminate - Terminate a GPU command queue. @@ -124,7 +134,7 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx, * queue is to be terminated. */ void kbase_csf_queue_terminate(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_terminate *term); + struct kbase_ioctl_cs_queue_terminate *term); /** * kbase_csf_free_command_stream_user_pages() - Free the resources allocated @@ -160,7 +170,7 @@ void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, * Return: 0 on success, or negative on failure. */ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, - struct kbase_queue *queue); + struct kbase_queue *queue); /** * kbase_csf_queue_bind - Bind a GPU command queue to a queue group. @@ -171,8 +181,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, * * Return: 0 on success, or negative on failure. */ -int kbase_csf_queue_bind(struct kbase_context *kctx, - union kbase_ioctl_cs_queue_bind *bind); +int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind); /** * kbase_csf_queue_unbind - Unbind a GPU command queue from a queue group @@ -204,11 +213,10 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue); * * Return: 0 on success, or negative on failure. */ -int kbase_csf_queue_kick(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_kick *kick); +int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick); /** - * kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding + * kbase_csf_find_queue_group - Find the queue group corresponding * to the indicated handle. * * @kctx: The kbase context under which the queue group exists. @@ -233,8 +241,7 @@ struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, * * Return: 0 on success, or negative on failure. */ -int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, - u8 group_handle); +int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle); /** * kbase_csf_queue_group_create - Create a GPU command queue group. @@ -248,7 +255,7 @@ int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, * Return: 0 on success, or negative on failure. */ int kbase_csf_queue_group_create(struct kbase_context *kctx, - union kbase_ioctl_cs_queue_group_create *create); + union kbase_ioctl_cs_queue_group_create *create); /** * kbase_csf_queue_group_terminate - Terminate a GPU command queue group. @@ -258,8 +265,7 @@ int kbase_csf_queue_group_create(struct kbase_context *kctx, * @group_handle: Pointer to the structure which identifies the queue * group which is to be terminated. */ -void kbase_csf_queue_group_terminate(struct kbase_context *kctx, - u8 group_handle); +void kbase_csf_queue_group_terminate(struct kbase_context *kctx, u8 group_handle); /** * kbase_csf_term_descheduled_queue_group - Terminate a GPU command queue @@ -291,7 +297,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group); * queue group and copy suspend buffer contents. */ int kbase_csf_queue_group_suspend(struct kbase_context *kctx, - struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle); + struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle); #endif /** @@ -300,9 +306,8 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx, * @group: GPU command queue group. * @err_payload: Error payload to report. */ -void kbase_csf_add_group_fatal_error( - struct kbase_queue_group *const group, - struct base_gpu_queue_group_error const *const err_payload); +void kbase_csf_add_group_fatal_error(struct kbase_queue_group *const group, + struct base_gpu_queue_group_error const *const err_payload); /** * kbase_csf_interrupt - Handle interrupts issued by CSF firmware. @@ -312,6 +317,19 @@ void kbase_csf_add_group_fatal_error( */ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val); +/** + * kbase_csf_handle_csg_sync_update - Handle SYNC_UPDATE notification for the group. + * + * @kbdev: The kbase device to handle the SYNC_UPDATE interrupt. + * @ginfo: Pointer to the CSG interface used by the @group + * @group: Pointer to the GPU command queue group. + * @req: CSG_REQ register value corresponding to @group. + * @ack: CSG_ACK register value corresponding to @group. + */ +void kbase_csf_handle_csg_sync_update(struct kbase_device *const kbdev, + struct kbase_csf_cmd_stream_group_info *ginfo, + struct kbase_queue_group *group, u32 req, u32 ack); + /** * kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates * the update of userspace mapping of HW @@ -360,6 +378,22 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev); */ void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev); +/** + * kbase_csf_pending_gpuq_kicks_init - Initialize the data used for handling + * GPU queue kicks. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev); + +/** + * kbase_csf_pending_gpuq_kicks_term - De-initialize the data used for handling + * GPU queue kicks. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev); + /** * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface. * @@ -379,8 +413,7 @@ void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot); * * The function kicks a notification on a set of CSG interfaces to firmware. */ -void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, - u32 slot_bitmap); +void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, u32 slot_bitmap); /** * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI @@ -400,8 +433,7 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, * The function sends a doorbell interrupt notification to the firmware for * a CSI assigned to a GPU queue. */ -void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, - int csi_index, int csg_nr, +void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, int csi_index, int csg_nr, bool ring_csg_doorbell); /** @@ -414,8 +446,7 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, * The function kicks a notification to the firmware on the doorbell assigned * to the queue. */ -void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, - struct kbase_queue *queue); +void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, struct kbase_queue *queue); /** * kbase_csf_active_queue_groups_reset - Reset the state of all active GPU @@ -431,8 +462,7 @@ void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, * * This is similar to the action taken in response to an unexpected OoM event. */ -void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, - struct kbase_context *kctx); +void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, struct kbase_context *kctx); /** * kbase_csf_priority_check - Check the priority requested @@ -484,12 +514,12 @@ static inline u8 kbase_csf_priority_queue_group_priority_to_relative(u8 priority } /** - * kbase_csf_ktrace_gpu_cycle_cnt - Wrapper to retreive the GPU cycle counter + * kbase_csf_ktrace_gpu_cycle_cnt - Wrapper to retrieve the GPU cycle counter * value for Ktrace purpose. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * - * This function is just a wrapper to retreive the GPU cycle counter value, to + * This function is just a wrapper to retrieve the GPU cycle counter value, to * avoid any overhead on Release builds where Ktrace is disabled by default. * * Return: Snapshot of the GPU cycle count register. @@ -499,8 +529,21 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) #if KBASE_KTRACE_ENABLE return kbase_backend_get_cycle_cnt(kbdev); #else + CSTD_UNUSED(kbdev); return 0; #endif } +/** + * kbase_csf_process_queue_kick() - Process a pending kicked GPU command queue. + * + * @queue: Pointer to the queue to process. + * + * This function starts the pending queue, for which the work + * was previously submitted via ioctl call from application thread. + * If the queue is already scheduled and resident, it will be started + * right away, otherwise once the group is made resident. + */ +void kbase_csf_process_queue_kick(struct kbase_queue *queue); + #endif /* _KBASE_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.c new file mode 100644 index 000000000000..8a112477e256 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_cpu_queue.h" +#include "mali_kbase_csf_util.h" +#include +#include + +void kbase_csf_cpu_queue_init(struct kbase_context *kctx) +{ + if (WARN_ON(!kctx)) + return; + + kctx->csf.cpu_queue.buffer = NULL; + kctx->csf.cpu_queue.buffer_size = 0; + atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); +} + +bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, + struct base_csf_notification *req) +{ + if (atomic_cmpxchg(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED, + BASE_CSF_CPU_QUEUE_DUMP_PENDING) != BASE_CSF_CPU_QUEUE_DUMP_ISSUED) { + return false; + } + + req->type = BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP; + return true; +} + +bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx) +{ + return (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == + BASE_CSF_CPU_QUEUE_DUMP_ISSUED); +} + +int kbase_csf_cpu_queue_dump_buffer(struct kbase_context *kctx, u64 buffer, size_t buf_size) +{ + size_t alloc_size = buf_size; + char *dump_buffer; + + if (!buffer || !buf_size) + return 0; + + if (alloc_size > KBASE_MEM_ALLOC_MAX_SIZE) + return -EINVAL; + + alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1); + dump_buffer = kzalloc(alloc_size, GFP_KERNEL); + if (!dump_buffer) + return -ENOMEM; + + WARN_ON(kctx->csf.cpu_queue.buffer != NULL); + + if (copy_from_user(dump_buffer, u64_to_user_ptr(buffer), buf_size)) { + kfree(dump_buffer); + return -EFAULT; + } + + mutex_lock(&kctx->csf.lock); + + kfree(kctx->csf.cpu_queue.buffer); + + if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == BASE_CSF_CPU_QUEUE_DUMP_PENDING) { + kctx->csf.cpu_queue.buffer = dump_buffer; + kctx->csf.cpu_queue.buffer_size = buf_size; + complete_all(&kctx->csf.cpu_queue.dump_cmp); + } else + kfree(dump_buffer); + + mutex_unlock(&kctx->csf.lock); + + return 0; +} + +int kbasep_csf_cpu_queue_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr) +{ + mutex_lock(&kctx->csf.lock); + if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) != BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) { + kbasep_print(kbpr, "Dump request already started! (try again)\n"); + mutex_unlock(&kctx->csf.lock); + return -EBUSY; + } + + atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED); + init_completion(&kctx->csf.cpu_queue.dump_cmp); + kbase_event_wakeup(kctx); + mutex_unlock(&kctx->csf.lock); + + kbasep_print(kbpr, "CPU Queues table (version:v" __stringify( + MALI_CSF_CPU_QUEUE_DUMP_VERSION) "):\n"); + + wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp, msecs_to_jiffies(3000)); + + mutex_lock(&kctx->csf.lock); + if (kctx->csf.cpu_queue.buffer) { + WARN_ON(atomic_read(&kctx->csf.cpu_queue.dump_req_status) != + BASE_CSF_CPU_QUEUE_DUMP_PENDING); + + /* The CPU queue dump is returned as a single formatted string */ + kbasep_puts(kbpr, kctx->csf.cpu_queue.buffer); + kbasep_puts(kbpr, "\n"); + + kfree(kctx->csf.cpu_queue.buffer); + kctx->csf.cpu_queue.buffer = NULL; + kctx->csf.cpu_queue.buffer_size = 0; + } else + kbasep_print(kbpr, "Dump error! (time out)\n"); + + atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); + + mutex_unlock(&kctx->csf.lock); + return 0; +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.h new file mode 100644 index 000000000000..3fae7051fe1a --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_CPU_QUEUE_H_ +#define _KBASE_CSF_CPU_QUEUE_H_ + +#include + +/* Forward declaration */ +struct base_csf_notification; +struct kbase_context; +struct kbasep_printer; + +#define MALI_CSF_CPU_QUEUE_DUMP_VERSION 0 + +/* CPU queue dump status */ +/* Dumping is done or no dumping is in progress. */ +#define BASE_CSF_CPU_QUEUE_DUMP_COMPLETE 0 +/* Dumping request is pending. */ +#define BASE_CSF_CPU_QUEUE_DUMP_PENDING 1 +/* Dumping request is issued to Userspace */ +#define BASE_CSF_CPU_QUEUE_DUMP_ISSUED 2 + +/** + * kbase_csf_cpu_queue_init() - Initialise cpu queue handling per context cpu queue(s) + * + * @kctx: The kbase_context + */ +void kbase_csf_cpu_queue_init(struct kbase_context *kctx); + +/** + * kbase_csf_cpu_queue_read_dump_req() - Read cpu queue dump request event + * + * @kctx: The kbase_context which cpu queue dumped belongs to. + * @req: Notification with cpu queue dump request. + * + * Return: true if needs CPU queue dump, or false otherwise. + */ +bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, + struct base_csf_notification *req); + +/** + * kbase_csf_cpu_queue_dump_needed() - Check the requirement for cpu queue dump + * + * @kctx: The kbase_context which cpu queue dumped belongs to. + * + * Return: true if it needs cpu queue dump, or false otherwise. + */ +bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx); + +/** + * kbase_csf_cpu_queue_dump_buffer() - dump buffer containing cpu queue information + * + * @kctx: The kbase_context which cpu queue dumped belongs to. + * @buffer: Buffer containing the cpu queue information. + * @buf_size: Buffer size. + * + * Return: Return 0 for dump successfully, or error code. + */ +int kbase_csf_cpu_queue_dump_buffer(struct kbase_context *kctx, u64 buffer, size_t buf_size); + +/** + * kbasep_csf_cpu_queue_dump_print() - Dump cpu queue information to file + * + * @kctx: The kbase_context which cpu queue dumped belongs to. + * @kbpr: Pointer to printer instance. + * + * Return: Return 0 for dump successfully, or error code. + */ +int kbasep_csf_cpu_queue_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr); + +#endif /* _KBASE_CSF_CPU_QUEUE_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c index 516a33ff7465..56742d09136b 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,24 +20,12 @@ */ #include "mali_kbase_csf_cpu_queue_debugfs.h" -#include -#include #if IS_ENABLED(CONFIG_DEBUG_FS) - -bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, - struct base_csf_notification *req) -{ - if (atomic_cmpxchg(&kctx->csf.cpu_queue.dump_req_status, - BASE_CSF_CPU_QUEUE_DUMP_ISSUED, - BASE_CSF_CPU_QUEUE_DUMP_PENDING) != - BASE_CSF_CPU_QUEUE_DUMP_ISSUED) { - return false; - } - - req->type = BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP; - return true; -} +#include "mali_kbase_csf_cpu_queue.h" +#include "mali_kbase_csf_util.h" +#include +#include /** * kbasep_csf_cpu_queue_debugfs_show() - Print cpu queue information for per context @@ -49,45 +37,18 @@ bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, */ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data) { - struct kbase_context *kctx = file->private; + struct kbasep_printer *kbpr; + struct kbase_context *const kctx = file->private; + int ret = -EINVAL; + CSTD_UNUSED(data); - mutex_lock(&kctx->csf.lock); - if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) != - BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) { - seq_puts(file, "Dump request already started! (try again)\n"); - mutex_unlock(&kctx->csf.lock); - return -EBUSY; + kbpr = kbasep_printer_file_init(file); + if (kbpr != NULL) { + ret = kbasep_csf_cpu_queue_dump_print(kctx, kbpr); + kbasep_printer_term(kbpr); } - atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED); - init_completion(&kctx->csf.cpu_queue.dump_cmp); - kbase_event_wakeup(kctx); - mutex_unlock(&kctx->csf.lock); - - seq_puts(file, - "CPU Queues table (version:v" __stringify(MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION) "):\n"); - - wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp, - msecs_to_jiffies(3000)); - - mutex_lock(&kctx->csf.lock); - if (kctx->csf.cpu_queue.buffer) { - WARN_ON(atomic_read(&kctx->csf.cpu_queue.dump_req_status) != - BASE_CSF_CPU_QUEUE_DUMP_PENDING); - - seq_printf(file, "%s\n", kctx->csf.cpu_queue.buffer); - - kfree(kctx->csf.cpu_queue.buffer); - kctx->csf.cpu_queue.buffer = NULL; - kctx->csf.cpu_queue.buffer_size = 0; - } else - seq_puts(file, "Dump error! (time out)\n"); - - atomic_set(&kctx->csf.cpu_queue.dump_req_status, - BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); - - mutex_unlock(&kctx->csf.lock); - return 0; + return ret; } static int kbasep_csf_cpu_queue_debugfs_open(struct inode *in, struct file *file) @@ -109,66 +70,14 @@ void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) return; - file = debugfs_create_file("cpu_queue", 0444, kctx->kctx_dentry, - kctx, &kbasep_csf_cpu_queue_debugfs_fops); + file = debugfs_create_file("cpu_queue", 0444, kctx->kctx_dentry, kctx, + &kbasep_csf_cpu_queue_debugfs_fops); if (IS_ERR_OR_NULL(file)) { - dev_warn(kctx->kbdev->dev, - "Unable to create cpu queue debugfs entry"); + dev_warn(kctx->kbdev->dev, "Unable to create cpu queue debugfs entry"); } - - kctx->csf.cpu_queue.buffer = NULL; - kctx->csf.cpu_queue.buffer_size = 0; - atomic_set(&kctx->csf.cpu_queue.dump_req_status, - BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); } -int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, - u64 buffer, size_t buf_size) -{ - int err = 0; - - size_t alloc_size = buf_size; - char *dump_buffer; - - if (!buffer || !alloc_size) - goto done; - - alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1); - dump_buffer = kzalloc(alloc_size, GFP_KERNEL); - if (ZERO_OR_NULL_PTR(dump_buffer)) { - err = -ENOMEM; - goto done; - } - - WARN_ON(kctx->csf.cpu_queue.buffer != NULL); - - err = copy_from_user(dump_buffer, - u64_to_user_ptr(buffer), - buf_size); - if (err) { - kfree(dump_buffer); - err = -EFAULT; - goto done; - } - - mutex_lock(&kctx->csf.lock); - - kfree(kctx->csf.cpu_queue.buffer); - - if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == - BASE_CSF_CPU_QUEUE_DUMP_PENDING) { - kctx->csf.cpu_queue.buffer = dump_buffer; - kctx->csf.cpu_queue.buffer_size = buf_size; - complete_all(&kctx->csf.cpu_queue.dump_cmp); - } else { - kfree(dump_buffer); - } - - mutex_unlock(&kctx->csf.lock); -done: - return err; -} #else /* * Stub functions for when debugfs is disabled @@ -177,15 +86,4 @@ void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) { } -bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, - struct base_csf_notification *req) -{ - return false; -} - -int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, - u64 buffer, size_t buf_size) -{ - return 0; -} #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h index 435a993955fe..f2f983268aae 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,24 +22,8 @@ #ifndef _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ #define _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ -#include -#include - -#include "mali_kbase.h" - /* Forward declaration */ -struct base_csf_notification; - -#define MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION 0 - -/* CPU queue dump status */ -/* Dumping is done or no dumping is in progress. */ -#define BASE_CSF_CPU_QUEUE_DUMP_COMPLETE 0 -/* Dumping request is pending. */ -#define BASE_CSF_CPU_QUEUE_DUMP_PENDING 1 -/* Dumping request is issued to Userspace */ -#define BASE_CSF_CPU_QUEUE_DUMP_ISSUED 2 - +struct kbase_context; /** * kbase_csf_cpu_queue_debugfs_init() - Create a debugfs entry for per context cpu queue(s) @@ -48,43 +32,4 @@ struct base_csf_notification; */ void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx); -/** - * kbase_csf_cpu_queue_read_dump_req - Read cpu queue dump request event - * - * @kctx: The kbase_context which cpu queue dumpped belongs to - * @req: Notification with cpu queue dump request. - * - * Return: true if needs CPU queue dump, or false otherwise. - */ -bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, - struct base_csf_notification *req); - -/** - * kbase_csf_cpu_queue_dump_needed - Check the requirement for cpu queue dump - * - * @kctx: The kbase_context which cpu queue dumpped belongs to - * - * Return: true if it needs cpu queue dump, or false otherwise. - */ -static inline bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx) -{ -#if IS_ENABLED(CONFIG_DEBUG_FS) - return (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == - BASE_CSF_CPU_QUEUE_DUMP_ISSUED); -#else - return false; -#endif -} - -/** - * kbase_csf_cpu_queue_dump - dump buffer containing cpu queue information to debugfs - * - * @kctx: The kbase_context which cpu queue dumpped belongs to - * @buffer: Buffer containing the cpu queue information. - * @buf_size: Buffer size. - * - * Return: Return 0 for dump successfully, or error code. - */ -int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, - u64 buffer, size_t buf_size); #endif /* _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.c new file mode 100644 index 000000000000..6e35eabc8fef --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.c @@ -0,0 +1,648 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_csg.h" +#include "mali_kbase_csf_scheduler.h" +#include "mali_kbase_csf_util.h" +#include +#include +#include + +/* Wait time to be used cumulatively for all the CSG slots. + * Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be + * any other Host request pending on the FW side and usually FW would be responsive + * to the Doorbell IRQs as it won't do any polling for a long time and also it won't + * have to wait for any HW state transition to complete for publishing the status. + * So it is reasonable to expect that handling of STATUS_UPDATE request would be + * relatively very quick. + */ +#define STATUS_UPDATE_WAIT_TIMEOUT_NS 500 + +/* Number of nearby commands around the "cmd_ptr" of GPU queues. + * + * [cmd_ptr - MAX_NR_NEARBY_INSTR, cmd_ptr + MAX_NR_NEARBY_INSTR]. + */ +#define MAX_NR_NEARBY_INSTR 32 + +/* The bitmask of CSG slots for which the STATUS_UPDATE request completed. + * The access to it is serialized with scheduler lock, so at a time it would + * get used either for "active_groups" or per context "groups". + */ +static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS); + +/* String header for dumping cs user I/O status information */ +#define KBASEP_CSF_CSG_DUMP_CS_HEADER_USER_IO \ + "Bind Idx, Ringbuf addr, Size, Prio, Insert offset, Extract offset, Active, Doorbell\n" + +/* String representation of WAITING */ +#define WAITING "Waiting" + +/* String representation of NOT_WAITING */ +#define NOT_WAITING "Not waiting" + +/** + * csg_slot_status_update_finish() - Complete STATUS_UPDATE request for a group slot. + * + * @kbdev: Pointer to kbase device. + * @csg_nr: The group slot number. + * + * Return: Non-zero if not complete, otherwise zero. + */ +static bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr) +{ + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[csg_nr]; + + return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^ + kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) & + CSG_REQ_STATUS_UPDATE_MASK); +} + +/** + * csg_slots_status_update_finish() - Complete STATUS_UPDATE requests for all group slots. + * + * @kbdev: Pointer to kbase device. + * @slots_mask: The group slots mask. + * + * Return: Non-zero if not complete, otherwise zero. + */ +static bool csg_slots_status_update_finish(struct kbase_device *kbdev, + const unsigned long *slots_mask) +{ + const u32 max_csg_slots = kbdev->csf.global_iface.group_num; + bool changed = false; + u32 csg_nr; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + for_each_set_bit(csg_nr, slots_mask, max_csg_slots) { + if (csg_slot_status_update_finish(kbdev, csg_nr)) { + set_bit(csg_nr, csg_slots_status_updated); + changed = true; + } + } + + return changed; +} + +/** + * wait_csg_slots_status_update_finish() - Wait completion of STATUS_UPDATE requests for all + * group slots. + * + * @kbdev: Pointer to kbase device. + * @slots_mask: The group slots mask. + */ +static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev, + unsigned long *slots_mask) +{ + const u32 max_csg_slots = kbdev->csf.global_iface.group_num; + long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT_NS); + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + bitmap_zero(csg_slots_status_updated, max_csg_slots); + + while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) { + remaining = wait_event_timeout(kbdev->csf.event_wait, + csg_slots_status_update_finish(kbdev, slots_mask), + remaining); + if (likely(remaining)) { + bitmap_andnot(slots_mask, slots_mask, csg_slots_status_updated, + max_csg_slots); + } else { + dev_warn(kbdev->dev, "STATUS_UPDATE request timed out for slots 0x%lx", + slots_mask[0]); + } + } +} + +/** + * blocked_reason_to_string() - Convert blocking reason id to a string + * + * @reason_id: blocked_reason + * + * Return: Suitable string + */ +static const char *blocked_reason_to_string(u32 reason_id) +{ + /* possible blocking reasons of a cs */ + static const char *const cs_blocked_reason[] = { + [CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED] = "UNBLOCKED", + [CS_STATUS_BLOCKED_REASON_REASON_WAIT] = "WAIT", + [CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT] = "PROGRESS_WAIT", + [CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT] = "SYNC_WAIT", + [CS_STATUS_BLOCKED_REASON_REASON_DEFERRED] = "DEFERRED", + [CS_STATUS_BLOCKED_REASON_REASON_RESOURCE] = "RESOURCE", + [CS_STATUS_BLOCKED_REASON_REASON_FLUSH] = "FLUSH" + }; + + if (WARN_ON(reason_id >= ARRAY_SIZE(cs_blocked_reason))) + return "UNKNOWN_BLOCKED_REASON_ID"; + + return cs_blocked_reason[reason_id]; +} + +/** + * sb_source_supported() - Check SB_SOURCE GLB version support + * + * @glb_version: The GLB version + * + * Return: False or true on success. + */ +static bool sb_source_supported(u32 glb_version) +{ + bool supported = false; + + if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) && + (GLB_VERSION_MINOR_GET(glb_version) >= 5)) || + ((GLB_VERSION_MAJOR_GET(glb_version) == 2) && + (GLB_VERSION_MINOR_GET(glb_version) >= 6)) || + ((GLB_VERSION_MAJOR_GET(glb_version) == 1) && + (GLB_VERSION_MINOR_GET(glb_version) >= 3))) + supported = true; + + return supported; +} + + +/** + * kbasep_csf_csg_active_dump_cs_status_wait() - Dump active queue sync status information. + * + * @kctx: Pointer to kbase context. + * @kbpr: Pointer to printer instance. + * @glb_version: The GLB version. + * @wait_status: The CS_STATUS_WAIT value. + * @wait_sync_value: The queue's cached sync value. + * @wait_sync_live_value: The queue's sync object current value. + * @wait_sync_pointer: The queue's sync object pointer. + * @sb_status: The CS_STATUS_SCOREBOARDS value. + * @blocked_reason: The CS_STATUS_BLCOKED_REASON value. + */ +static void kbasep_csf_csg_active_dump_cs_status_wait(struct kbase_context *kctx, + struct kbasep_printer *kbpr, u32 glb_version, + u32 wait_status, u32 wait_sync_value, + u64 wait_sync_live_value, + u64 wait_sync_pointer, u32 sb_status, + u32 blocked_reason) +{ + kbasep_print(kbpr, "SB_MASK: %d\n", CS_STATUS_WAIT_SB_MASK_GET(wait_status)); + if (sb_source_supported(glb_version)) + kbasep_print(kbpr, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status)); + + { + kbasep_print(kbpr, "PROGRESS_WAIT: %s\n", + CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ? WAITING : NOT_WAITING); + } + kbasep_print(kbpr, "PROTM_PEND: %s\n", + CS_STATUS_WAIT_PROTM_PEND_GET(wait_status) ? WAITING : NOT_WAITING); + kbasep_print(kbpr, "SYNC_WAIT: %s\n", + CS_STATUS_WAIT_SYNC_WAIT_GET(wait_status) ? WAITING : NOT_WAITING); + kbasep_print(kbpr, "WAIT_CONDITION: %s\n", + CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(wait_status) ? "greater than" : + "less or equal"); + kbasep_print(kbpr, "SYNC_POINTER: 0x%llx\n", wait_sync_pointer); + kbasep_print(kbpr, "SYNC_VALUE: %d\n", wait_sync_value); + kbasep_print(kbpr, "SYNC_LIVE_VALUE: 0x%016llx\n", wait_sync_live_value); + kbasep_print(kbpr, "SB_STATUS: %u\n", CS_STATUS_SCOREBOARDS_NONZERO_GET(sb_status)); + kbasep_print(kbpr, "BLOCKED_REASON: %s\n", + blocked_reason_to_string(CS_STATUS_BLOCKED_REASON_REASON_GET(blocked_reason))); +} + +/** + * kbasep_csf_csg_active_dump_cs_trace() - Dump active queue CS trace information. + * + * @kctx: Pointer to kbase context. + * @kbpr: Pointer to printer instance. + * @stream: Pointer to command stream information. + */ +static void +kbasep_csf_csg_active_dump_cs_trace(struct kbase_context *kctx, struct kbasep_printer *kbpr, + struct kbase_csf_cmd_stream_info const *const stream) +{ + u32 val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_BUFFER_BASE_LO); + u64 addr = ((u64)kbase_csf_firmware_cs_input_read(stream, CS_INSTR_BUFFER_BASE_HI) << 32) | + val; + val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_BUFFER_SIZE); + + kbasep_print(kbpr, "CS_TRACE_BUF_ADDR: 0x%16llx, SIZE: %u\n", addr, val); + + /* Write offset variable address (pointer) */ + val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO); + addr = ((u64)kbase_csf_firmware_cs_input_read(stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI) + << 32) | + val; + kbasep_print(kbpr, "CS_TRACE_BUF_OFFSET_PTR: 0x%16llx\n", addr); + + /* EVENT_SIZE and EVENT_STATEs */ + val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_CONFIG); + kbasep_print(kbpr, "TRACE_EVENT_SIZE: 0x%x, TRACE_EVENT_STATES 0x%x\n", + CS_INSTR_CONFIG_EVENT_SIZE_GET(val), CS_INSTR_CONFIG_EVENT_STATE_GET(val)); +} + +/** + * kbasep_csf_read_cmdbuff_value() - Read a command from a queue offset. + * + * @queue: Address of a GPU command queue to examine. + * @cmdbuff_offset: GPU address offset in queue's memory buffer. + * + * Return: Encoded CSF command (64-bit) + */ +static u64 kbasep_csf_read_cmdbuff_value(struct kbase_queue *queue, u32 cmdbuff_offset) +{ + u64 page_off = cmdbuff_offset >> PAGE_SHIFT; + u64 offset_within_page = cmdbuff_offset & ~PAGE_MASK; + struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]); + u64 *cmdbuff = vmap(&page, 1, VM_MAP, pgprot_noncached(PAGE_KERNEL)); + u64 value; + + if (!cmdbuff) { + struct kbase_context *kctx = queue->kctx; + + dev_info(kctx->kbdev->dev, "%s failed to map the buffer page for read a command!", + __func__); + /* Return an alternative 0 for dumping operation*/ + value = 0; + } else { + value = cmdbuff[offset_within_page / sizeof(u64)]; + vunmap(cmdbuff); + } + + return value; +} + +/** + * kbasep_csf_csg_active_dump_cs_status_cmd_ptr() - Dump CMD_PTR information and nearby commands. + * + * @kbpr: Pointer to printer instance. + * @queue: Address of a GPU command queue to examine. + * @cmd_ptr: CMD_PTR address. + */ +static void kbasep_csf_csg_active_dump_cs_status_cmd_ptr(struct kbasep_printer *kbpr, + struct kbase_queue *queue, u64 cmd_ptr) +{ + u64 cmd_ptr_offset; + u64 cursor, end_cursor, instr; + u32 nr_nearby_instr_size; + struct kbase_va_region *reg; + + kbase_gpu_vm_lock(queue->kctx); + reg = kbase_region_tracker_find_region_enclosing_address(queue->kctx, cmd_ptr); + if (reg && !(reg->flags & KBASE_REG_FREE) && (reg->flags & KBASE_REG_CPU_RD) && + (reg->gpu_alloc->type == KBASE_MEM_TYPE_NATIVE)) { + kbasep_print(kbpr, "CMD_PTR region nr_pages: %zu\n", reg->nr_pages); + nr_nearby_instr_size = MAX_NR_NEARBY_INSTR * sizeof(u64); + cmd_ptr_offset = cmd_ptr - queue->base_addr; + cursor = (cmd_ptr_offset > nr_nearby_instr_size) ? + cmd_ptr_offset - nr_nearby_instr_size : + 0; + end_cursor = cmd_ptr_offset + nr_nearby_instr_size; + if (end_cursor > queue->size) + end_cursor = queue->size; + kbasep_print(kbpr, + "queue:GPU-%u-%u-%u at:0x%.16llx cmd_ptr:0x%.16llx " + "dump_begin:0x%.16llx dump_end:0x%.16llx\n", + queue->kctx->id, queue->group->handle, queue->csi_index, + (queue->base_addr + cursor), cmd_ptr, (queue->base_addr + cursor), + (queue->base_addr + end_cursor)); + while ((cursor < end_cursor)) { + instr = kbasep_csf_read_cmdbuff_value(queue, (u32)cursor); + if (instr != 0) + kbasep_print(kbpr, + "queue:GPU-%u-%u-%u at:0x%.16llx cmd:0x%.16llx\n", + queue->kctx->id, queue->group->handle, + queue->csi_index, (queue->base_addr + cursor), instr); + cursor += sizeof(u64); + } + } + kbase_gpu_vm_unlock(queue->kctx); +} + +/** + * kbasep_csf_csg_active_dump_queue() - Dump GPU command queue debug information. + * + * @kbpr: Pointer to printer instance. + * @queue: Address of a GPU command queue to examine + */ +static void kbasep_csf_csg_active_dump_queue(struct kbasep_printer *kbpr, struct kbase_queue *queue) +{ + u64 *addr; + u32 *addr32; + u64 cs_extract; + u64 cs_insert; + u32 cs_active; + u64 wait_sync_pointer; + u32 wait_status, wait_sync_value; + u32 sb_status; + u32 blocked_reason; + struct kbase_vmap_struct *mapping; + u64 *evt; + u64 wait_sync_live_value; + u32 glb_version; + u64 cmd_ptr; + + if (!queue) + return; + + glb_version = queue->kctx->kbdev->csf.global_iface.version; + + if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID || !queue->group)) + return; + + addr = queue->user_io_addr; + cs_insert = addr[CS_INSERT_LO / sizeof(*addr)]; + + addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr); + cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)]; + + addr32 = (u32 *)(queue->user_io_addr + PAGE_SIZE / sizeof(*addr)); + cs_active = addr32[CS_ACTIVE / sizeof(*addr32)]; + + kbasep_puts(kbpr, KBASEP_CSF_CSG_DUMP_CS_HEADER_USER_IO); + kbasep_print(kbpr, "%8d, %16llx, %8x, %4u, %16llx, %16llx, %6u, %8d\n", queue->csi_index, + queue->base_addr, queue->size, queue->priority, cs_insert, cs_extract, + cs_active, queue->doorbell_nr); + + /* Print status information for blocked group waiting for sync object. For on-slot queues, + * if cs_trace is enabled, dump the interface's cs_trace configuration. + */ + if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) { + kbasep_print(kbpr, "SAVED_CMD_PTR: 0x%llx\n", queue->saved_cmd_ptr); + if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { + wait_status = queue->status_wait; + wait_sync_value = queue->sync_value; + wait_sync_pointer = queue->sync_ptr; + sb_status = queue->sb_status; + blocked_reason = queue->blocked_reason; + + evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, + &mapping); + if (evt) { + wait_sync_live_value = evt[0]; + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + } else { + wait_sync_live_value = U64_MAX; + } + + kbasep_csf_csg_active_dump_cs_status_wait( + queue->kctx, kbpr, glb_version, wait_status, wait_sync_value, + wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason); + } + kbasep_csf_csg_active_dump_cs_status_cmd_ptr(kbpr, queue, queue->saved_cmd_ptr); + } else { + struct kbase_device const *const kbdev = queue->group->kctx->kbdev; + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[queue->group->csg_nr]; + struct kbase_csf_cmd_stream_info const *const stream = + &ginfo->streams[queue->csi_index]; + u32 req_res; + + if (WARN_ON(!stream)) + return; + + cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO); + cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32; + req_res = kbase_csf_firmware_cs_output(stream, CS_STATUS_REQ_RESOURCE); + + kbasep_print(kbpr, "CMD_PTR: 0x%llx\n", cmd_ptr); + kbasep_print(kbpr, "REQ_RESOURCE [COMPUTE]: %d\n", + CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(req_res)); + kbasep_print(kbpr, "REQ_RESOURCE [FRAGMENT]: %d\n", + CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(req_res)); + kbasep_print(kbpr, "REQ_RESOURCE [TILER]: %d\n", + CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(req_res)); + kbasep_print(kbpr, "REQ_RESOURCE [IDVS]: %d\n", + CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(req_res)); + + wait_status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT); + wait_sync_value = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_VALUE); + wait_sync_pointer = + kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_POINTER_LO); + wait_sync_pointer |= + (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_POINTER_HI) + << 32; + + sb_status = kbase_csf_firmware_cs_output(stream, CS_STATUS_SCOREBOARDS); + blocked_reason = kbase_csf_firmware_cs_output(stream, CS_STATUS_BLOCKED_REASON); + + evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); + if (evt) { + wait_sync_live_value = evt[0]; + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + } else { + wait_sync_live_value = U64_MAX; + } + + kbasep_csf_csg_active_dump_cs_status_wait(queue->kctx, kbpr, glb_version, + wait_status, wait_sync_value, + wait_sync_live_value, wait_sync_pointer, + sb_status, blocked_reason); + /* Dealing with cs_trace */ + if (kbase_csf_scheduler_queue_has_trace(queue)) + kbasep_csf_csg_active_dump_cs_trace(queue->kctx, kbpr, stream); + else + kbasep_print(kbpr, "NO CS_TRACE\n"); + kbasep_csf_csg_active_dump_cs_status_cmd_ptr(kbpr, queue, cmd_ptr); + } +} + +/** + * kbasep_csf_csg_active_dump_group() - Dump an active group. + * + * @kbpr: Pointer to printer instance. + * @group: GPU group. + */ +static void kbasep_csf_csg_active_dump_group(struct kbasep_printer *kbpr, + struct kbase_queue_group *const group) +{ + if (kbase_csf_scheduler_group_get_slot(group) >= 0) { + struct kbase_device *const kbdev = group->kctx->kbdev; + u32 ep_c, ep_r; + char exclusive; + char idle = 'N'; + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[group->csg_nr]; + u8 slot_priority = kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; + + ep_c = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_CURRENT); + ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ); + + if (CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(ep_r)) + exclusive = 'C'; + else if (CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(ep_r)) + exclusive = 'F'; + else + exclusive = '0'; + + if (kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & + CSG_STATUS_STATE_IDLE_MASK) + idle = 'Y'; + + if (!test_bit(group->csg_nr, csg_slots_status_updated)) { + kbasep_print(kbpr, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n", + group->csg_nr); + kbasep_print(kbpr, "*** The following group-record is likely stale\n"); + } + kbasep_print( + kbpr, + "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req)," + " F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n"); + kbasep_print( + kbpr, + "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n", + group->handle, group->csg_nr, slot_priority, group->run_state, + group->priority, CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c), + CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r), + CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c), + CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r), + CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c), + CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), exclusive, idle); + + } else { + kbasep_print(kbpr, "GroupID, CSG NR, Run State, Priority\n"); + kbasep_print(kbpr, "%7d, %6d, %9d, %8d\n", group->handle, group->csg_nr, + group->run_state, group->priority); + } + + if (group->run_state != KBASE_CSF_GROUP_TERMINATED) { + unsigned int i; + + kbasep_print(kbpr, "Bound queues:\n"); + + for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) + kbasep_csf_csg_active_dump_queue(kbpr, group->bound_queues[i]); + } +} + +void kbase_csf_csg_update_status(struct kbase_device *kbdev) +{ + u32 max_csg_slots = kbdev->csf.global_iface.group_num; + DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 }; + u32 csg_nr; + unsigned long flags; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell + * ring for Extract offset update, shall not be made when MCU has been + * put to sleep otherwise it will undesirably make MCU exit the sleep + * state. Also it isn't really needed as FW will implicitly update the + * status of all on-slot groups when MCU sleep request is sent to it. + */ + if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { + /* Wait for the MCU sleep request to complete. */ + kbase_pm_wait_for_desired_state(kbdev); + bitmap_copy(csg_slots_status_updated, kbdev->csf.scheduler.csg_inuse_bitmap, + max_csg_slots); + return; + } + + for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + if (!group) + continue; + /* Ring the User doorbell for FW to update the Extract offset */ + kbase_csf_ring_doorbell(kbdev, group->doorbell_nr); + set_bit(csg_nr, used_csgs); + } + + /* Return early if there are no on-slot groups */ + if (bitmap_empty(used_csgs, max_csg_slots)) + return; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + for_each_set_bit(csg_nr, used_csgs, max_csg_slots) { + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[csg_nr]; + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, + ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK), + CSG_REQ_STATUS_UPDATE_MASK); + } + + BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE)); + kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + wait_csg_slots_status_update_finish(kbdev, used_csgs); + /* Wait for the user doorbell ring to take effect */ + msleep(100); +} + +int kbasep_csf_csg_dump_print(struct kbase_context *const kctx, struct kbasep_printer *kbpr) +{ + u32 gr; + struct kbase_device *kbdev; + + if (WARN_ON(!kctx)) + return -EINVAL; + + kbdev = kctx->kbdev; + + kbasep_print(kbpr, + "CSF groups status (version: v" __stringify(MALI_CSF_CSG_DUMP_VERSION) "):\n"); + + mutex_lock(&kctx->csf.lock); + kbase_csf_scheduler_lock(kbdev); + kbase_csf_csg_update_status(kbdev); + kbasep_print(kbpr, "Ctx %d_%d\n", kctx->tgid, kctx->id); + for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { + struct kbase_queue_group *const group = kctx->csf.queue_groups[gr]; + + if (!group) + continue; + + kbasep_csf_csg_active_dump_group(kbpr, group); + } + kbase_csf_scheduler_unlock(kbdev); + mutex_unlock(&kctx->csf.lock); + + return 0; +} + +int kbasep_csf_csg_active_dump_print(struct kbase_device *kbdev, struct kbasep_printer *kbpr) +{ + u32 csg_nr; + u32 num_groups; + + if (WARN_ON(!kbdev)) + return -EINVAL; + + num_groups = kbdev->csf.global_iface.group_num; + + kbasep_print(kbpr, "CSF active groups status (version: v" __stringify( + MALI_CSF_CSG_DUMP_VERSION) "):\n"); + + kbase_csf_scheduler_lock(kbdev); + kbase_csf_csg_update_status(kbdev); + for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + + if (!group) + continue; + + kbasep_print(kbpr, "Ctx %d_%d\n", group->kctx->tgid, group->kctx->id); + + kbasep_csf_csg_active_dump_group(kbpr, group); + } + kbase_csf_scheduler_unlock(kbdev); + + return 0; +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.h new file mode 100644 index 000000000000..bc32042ef3fa --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_CSG_H_ +#define _KBASE_CSF_CSG_H_ + +/* Forward declaration */ +struct kbase_context; +struct kbase_device; +struct kbasep_printer; + +#define MALI_CSF_CSG_DUMP_VERSION 0 + +/** + * kbase_csf_csg_update_status() - Update on-slot gpu group statuses + * + * @kbdev: Pointer to the device. + */ +void kbase_csf_csg_update_status(struct kbase_device *kbdev); + +/** + * kbasep_csf_csg_dump_print() - Dump all gpu groups information to file + * + * @kctx: The kbase_context which gpu group dumped belongs to. + * @kbpr: Pointer to printer instance. + * + * Return: Return 0 for dump successfully, or error code. + */ +int kbasep_csf_csg_dump_print(struct kbase_context *const kctx, struct kbasep_printer *kbpr); + +/** + * kbasep_csf_csg_active_dump_print() - Dump on-slot gpu groups information to file + * + * @kbdev: Pointer to the device. + * @kbpr: Pointer to printer instance. + * + * Return: Return 0 for dump successfully, or error code. + */ +int kbasep_csf_csg_active_dump_print(struct kbase_device *kbdev, struct kbasep_printer *kbpr); + +#endif /* _KBASE_CSF_CSG_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c index e96044ae6239..736545c86c99 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,143 +20,26 @@ */ #include "mali_kbase_csf_csg_debugfs.h" -#include -#include -#include -#include #if IS_ENABLED(CONFIG_DEBUG_FS) +#include "mali_kbase_csf_csg.h" #include "mali_kbase_csf_tl_reader.h" - -/* Wait time to be used cumulatively for all the CSG slots. - * Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be - * any other Host request pending on the FW side and usually FW would be responsive - * to the Doorbell IRQs as it won't do any polling for a long time and also it won't - * have to wait for any HW state transition to complete for publishing the status. - * So it is reasonable to expect that handling of STATUS_UPDATE request would be - * relatively very quick. - */ -#define STATUS_UPDATE_WAIT_TIMEOUT 500 - -/* The bitmask of CSG slots for which the STATUS_UPDATE request completed. - * The access to it is serialized with scheduler lock, so at a time it would - * get used either for "active_groups" or per context "groups" debugfs file. - */ -static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS); - -static -bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr) -{ - struct kbase_csf_cmd_stream_group_info const *const ginfo = - &kbdev->csf.global_iface.groups[csg_nr]; - - return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^ - kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) & - CSG_REQ_STATUS_UPDATE_MASK); -} - -static -bool csg_slots_status_update_finish(struct kbase_device *kbdev, - const unsigned long *slots_mask) -{ - const u32 max_csg_slots = kbdev->csf.global_iface.group_num; - bool changed = false; - u32 csg_nr; - - lockdep_assert_held(&kbdev->csf.scheduler.lock); - - for_each_set_bit(csg_nr, slots_mask, max_csg_slots) { - if (csg_slot_status_update_finish(kbdev, csg_nr)) { - set_bit(csg_nr, csg_slots_status_updated); - changed = true; - } - } - - return changed; -} - -static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev, - unsigned long *slots_mask) -{ - const u32 max_csg_slots = kbdev->csf.global_iface.group_num; - long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT); - - lockdep_assert_held(&kbdev->csf.scheduler.lock); - - bitmap_zero(csg_slots_status_updated, max_csg_slots); - - while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) { - remaining = wait_event_timeout(kbdev->csf.event_wait, - csg_slots_status_update_finish(kbdev, slots_mask), - remaining); - if (likely(remaining)) { - bitmap_andnot(slots_mask, slots_mask, - csg_slots_status_updated, max_csg_slots); - } else { - dev_warn(kbdev->dev, - "STATUS_UPDATE request timed out for slots 0x%lx", - slots_mask[0]); - } - } -} - -void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev) -{ - u32 max_csg_slots = kbdev->csf.global_iface.group_num; - DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 }; - u32 csg_nr; - unsigned long flags; - - lockdep_assert_held(&kbdev->csf.scheduler.lock); - - /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell - * ring for Extract offset update, shall not be made when MCU has been - * put to sleep otherwise it will undesirably make MCU exit the sleep - * state. Also it isn't really needed as FW will implicitly update the - * status of all on-slot groups when MCU sleep request is sent to it. - */ - if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { - /* Wait for the MCU sleep request to complete. */ - kbase_pm_wait_for_desired_state(kbdev); - bitmap_copy(csg_slots_status_updated, - kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots); - return; - } - - for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) { - struct kbase_queue_group *const group = - kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; - if (!group) - continue; - /* Ring the User doorbell for FW to update the Extract offset */ - kbase_csf_ring_doorbell(kbdev, group->doorbell_nr); - set_bit(csg_nr, used_csgs); - } - - /* Return early if there are no on-slot groups */ - if (bitmap_empty(used_csgs, max_csg_slots)) - return; - - kbase_csf_scheduler_spin_lock(kbdev, &flags); - for_each_set_bit(csg_nr, used_csgs, max_csg_slots) { - struct kbase_csf_cmd_stream_group_info const *const ginfo = - &kbdev->csf.global_iface.groups[csg_nr]; - kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, - ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK), - CSG_REQ_STATUS_UPDATE_MASK); - } - - BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE)); - kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - wait_csg_slots_status_update_finish(kbdev, used_csgs); - /* Wait for the User doobell ring to take effect */ - msleep(100); -} +#include "mali_kbase_csf_util.h" +#include +#include +#include #define MAX_SCHED_STATE_STRING_LEN (16) +/** + * scheduler_state_to_string() - Get string name of scheduler state. + * + * @kbdev: Pointer to kbase device. + * @sched_state: Scheduler state. + * + * Return: Suitable string. + */ static const char *scheduler_state_to_string(struct kbase_device *kbdev, - enum kbase_csf_scheduler_state sched_state) + enum kbase_csf_scheduler_state sched_state) { switch (sched_state) { case SCHED_BUSY: @@ -176,404 +59,62 @@ static const char *scheduler_state_to_string(struct kbase_device *kbdev, } /** - * blocked_reason_to_string() - Convert blocking reason id to a string - * - * @reason_id: blocked_reason - * - * Return: Suitable string - */ -static const char *blocked_reason_to_string(u32 reason_id) -{ - /* possible blocking reasons of a cs */ - static const char *const cs_blocked_reason[] = { - [CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED] = "UNBLOCKED", - [CS_STATUS_BLOCKED_REASON_REASON_WAIT] = "WAIT", - [CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT] = - "PROGRESS_WAIT", - [CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT] = "SYNC_WAIT", - [CS_STATUS_BLOCKED_REASON_REASON_DEFERRED] = "DEFERRED", - [CS_STATUS_BLOCKED_REASON_REASON_RESOURCE] = "RESOURCE", - [CS_STATUS_BLOCKED_REASON_REASON_FLUSH] = "FLUSH" - }; - - if (WARN_ON(reason_id >= ARRAY_SIZE(cs_blocked_reason))) - return "UNKNOWN_BLOCKED_REASON_ID"; - - return cs_blocked_reason[reason_id]; -} - -static bool sb_source_supported(u32 glb_version) -{ - bool supported = false; - - if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) && - (GLB_VERSION_MINOR_GET(glb_version) >= 5)) || - ((GLB_VERSION_MAJOR_GET(glb_version) == 2) && - (GLB_VERSION_MINOR_GET(glb_version) >= 6)) || - ((GLB_VERSION_MAJOR_GET(glb_version) == 1) && - (GLB_VERSION_MINOR_GET(glb_version) >= 3))) - supported = true; - - return supported; -} - -static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - struct seq_file *file, u32 glb_version, u32 wait_status, u32 wait_sync_value, - u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, u32 blocked_reason) -{ -#define WAITING "Waiting" -#define NOT_WAITING "Not waiting" - - seq_printf(file, "SB_MASK: %d\n", - CS_STATUS_WAIT_SB_MASK_GET(wait_status)); - if (sb_source_supported(glb_version)) - seq_printf(file, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status)); - seq_printf(file, "PROGRESS_WAIT: %s\n", - CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ? - WAITING : NOT_WAITING); - seq_printf(file, "PROTM_PEND: %s\n", - CS_STATUS_WAIT_PROTM_PEND_GET(wait_status) ? - WAITING : NOT_WAITING); - seq_printf(file, "SYNC_WAIT: %s\n", - CS_STATUS_WAIT_SYNC_WAIT_GET(wait_status) ? - WAITING : NOT_WAITING); - seq_printf(file, "WAIT_CONDITION: %s\n", - CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(wait_status) ? - "greater than" : "less or equal"); - seq_printf(file, "SYNC_POINTER: 0x%llx\n", wait_sync_pointer); - seq_printf(file, "SYNC_VALUE: %d\n", wait_sync_value); - seq_printf(file, "SYNC_LIVE_VALUE: 0x%016llx\n", wait_sync_live_value); - seq_printf(file, "SB_STATUS: %u\n", - CS_STATUS_SCOREBOARDS_NONZERO_GET(sb_status)); - seq_printf(file, "BLOCKED_REASON: %s\n", - blocked_reason_to_string(CS_STATUS_BLOCKED_REASON_REASON_GET( - blocked_reason))); -} - -static void kbasep_csf_scheduler_dump_active_cs_trace(struct seq_file *file, - struct kbase_csf_cmd_stream_info const *const stream) -{ - u32 val = kbase_csf_firmware_cs_input_read(stream, - CS_INSTR_BUFFER_BASE_LO); - u64 addr = ((u64)kbase_csf_firmware_cs_input_read(stream, - CS_INSTR_BUFFER_BASE_HI) << 32) | val; - val = kbase_csf_firmware_cs_input_read(stream, - CS_INSTR_BUFFER_SIZE); - - seq_printf(file, "CS_TRACE_BUF_ADDR: 0x%16llx, SIZE: %u\n", addr, val); - - /* Write offset variable address (pointer) */ - val = kbase_csf_firmware_cs_input_read(stream, - CS_INSTR_BUFFER_OFFSET_POINTER_LO); - addr = ((u64)kbase_csf_firmware_cs_input_read(stream, - CS_INSTR_BUFFER_OFFSET_POINTER_HI) << 32) | val; - seq_printf(file, "CS_TRACE_BUF_OFFSET_PTR: 0x%16llx\n", addr); - - /* EVENT_SIZE and EVENT_STATEs */ - val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_CONFIG); - seq_printf(file, "TRACE_EVENT_SIZE: 0x%x, TRACE_EVENT_STAES 0x%x\n", - CS_INSTR_CONFIG_EVENT_SIZE_GET(val), - CS_INSTR_CONFIG_EVENT_STATE_GET(val)); -} - -/** - * kbasep_csf_scheduler_dump_active_queue() - Print GPU command queue - * debug information - * - * @file: seq_file for printing to - * @queue: Address of a GPU command queue to examine - */ -static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, - struct kbase_queue *queue) -{ - u32 *addr; - u64 cs_extract; - u64 cs_insert; - u32 cs_active; - u64 wait_sync_pointer; - u32 wait_status, wait_sync_value; - u32 sb_status; - u32 blocked_reason; - struct kbase_vmap_struct *mapping; - u64 *evt; - u64 wait_sync_live_value; - u32 glb_version; - - if (!queue) - return; - - glb_version = queue->kctx->kbdev->csf.global_iface.version; - - if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID || - !queue->group)) - return; - - addr = (u32 *)queue->user_io_addr; - cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32); - - addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); - cs_extract = addr[CS_EXTRACT_LO/4] | ((u64)addr[CS_EXTRACT_HI/4] << 32); - cs_active = addr[CS_ACTIVE/4]; - -#define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \ - "Bind Idx, Ringbuf addr, Size, Prio, Insert offset, Extract offset, Active, Doorbell\n" - - seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %8x, %4u, %16llx, %16llx, %6u, %8d\n", - queue->csi_index, queue->base_addr, - queue->size, - queue->priority, cs_insert, cs_extract, cs_active, queue->doorbell_nr); - - /* Print status information for blocked group waiting for sync object. For on-slot queues, - * if cs_trace is enabled, dump the interface's cs_trace configuration. - */ - if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) { - seq_printf(file, "SAVED_CMD_PTR: 0x%llx\n", queue->saved_cmd_ptr); - if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { - wait_status = queue->status_wait; - wait_sync_value = queue->sync_value; - wait_sync_pointer = queue->sync_ptr; - sb_status = queue->sb_status; - blocked_reason = queue->blocked_reason; - - evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); - if (evt) { - wait_sync_live_value = evt[0]; - kbase_phy_alloc_mapping_put(queue->kctx, mapping); - } else { - wait_sync_live_value = U64_MAX; - } - - kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - file, glb_version, wait_status, wait_sync_value, - wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason); - } - } else { - struct kbase_device const *const kbdev = - queue->group->kctx->kbdev; - struct kbase_csf_cmd_stream_group_info const *const ginfo = - &kbdev->csf.global_iface.groups[queue->group->csg_nr]; - struct kbase_csf_cmd_stream_info const *const stream = - &ginfo->streams[queue->csi_index]; - u64 cmd_ptr; - u32 req_res; - - if (WARN_ON(!stream)) - return; - - cmd_ptr = kbase_csf_firmware_cs_output(stream, - CS_STATUS_CMD_PTR_LO); - cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, - CS_STATUS_CMD_PTR_HI) << 32; - req_res = kbase_csf_firmware_cs_output(stream, - CS_STATUS_REQ_RESOURCE); - - seq_printf(file, "CMD_PTR: 0x%llx\n", cmd_ptr); - seq_printf(file, "REQ_RESOURCE [COMPUTE]: %d\n", - CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(req_res)); - seq_printf(file, "REQ_RESOURCE [FRAGMENT]: %d\n", - CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(req_res)); - seq_printf(file, "REQ_RESOURCE [TILER]: %d\n", - CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(req_res)); - seq_printf(file, "REQ_RESOURCE [IDVS]: %d\n", - CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(req_res)); - - wait_status = kbase_csf_firmware_cs_output(stream, - CS_STATUS_WAIT); - wait_sync_value = kbase_csf_firmware_cs_output(stream, - CS_STATUS_WAIT_SYNC_VALUE); - wait_sync_pointer = kbase_csf_firmware_cs_output(stream, - CS_STATUS_WAIT_SYNC_POINTER_LO); - wait_sync_pointer |= (u64)kbase_csf_firmware_cs_output(stream, - CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; - - sb_status = kbase_csf_firmware_cs_output(stream, - CS_STATUS_SCOREBOARDS); - blocked_reason = kbase_csf_firmware_cs_output( - stream, CS_STATUS_BLOCKED_REASON); - - evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); - if (evt) { - wait_sync_live_value = evt[0]; - kbase_phy_alloc_mapping_put(queue->kctx, mapping); - } else { - wait_sync_live_value = U64_MAX; - } - - kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - file, glb_version, wait_status, wait_sync_value, wait_sync_live_value, - wait_sync_pointer, sb_status, blocked_reason); - /* Dealing with cs_trace */ - if (kbase_csf_scheduler_queue_has_trace(queue)) - kbasep_csf_scheduler_dump_active_cs_trace(file, stream); - else - seq_puts(file, "NO CS_TRACE\n"); - } - - seq_puts(file, "\n"); -} - -static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, - struct kbase_queue_group *const group) -{ - if (kbase_csf_scheduler_group_get_slot(group) >= 0) { - struct kbase_device *const kbdev = group->kctx->kbdev; - u32 ep_c, ep_r; - char exclusive; - char idle = 'N'; - struct kbase_csf_cmd_stream_group_info const *const ginfo = - &kbdev->csf.global_iface.groups[group->csg_nr]; - u8 slot_priority = - kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; - - ep_c = kbase_csf_firmware_csg_output(ginfo, - CSG_STATUS_EP_CURRENT); - ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ); - - if (CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(ep_r)) - exclusive = 'C'; - else if (CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(ep_r)) - exclusive = 'F'; - else - exclusive = '0'; - - if (kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & - CSG_STATUS_STATE_IDLE_MASK) - idle = 'Y'; - - if (!test_bit(group->csg_nr, csg_slots_status_updated)) { - seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n", - group->csg_nr); - seq_puts(file, "*** The following group-record is likely stale\n"); - } - - seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n"); - seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n", - group->handle, - group->csg_nr, - slot_priority, - group->run_state, - group->priority, - CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c), - CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r), - CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c), - CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r), - CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c), - CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), - exclusive, - idle); - } else { - seq_puts(file, "GroupID, CSG NR, Run State, Priority\n"); - seq_printf(file, "%7d, %6d, %9d, %8d\n", - group->handle, - group->csg_nr, - group->run_state, - group->priority); - } - - if (group->run_state != KBASE_CSF_GROUP_TERMINATED) { - unsigned int i; - - seq_puts(file, "Bound queues:\n"); - - for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { - kbasep_csf_scheduler_dump_active_queue(file, - group->bound_queues[i]); - } - } - - seq_puts(file, "\n"); -} - -/** - * kbasep_csf_queue_group_debugfs_show() - Print per-context GPU command queue - * group debug information + * kbasep_csf_queue_show_groups() - Print per-context GPU command queue + * group debug information * * @file: The seq_file for printing to * @data: The debugfs dentry private data, a pointer to kbase context * * Return: Negative error code or 0 on success. */ -static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file, - void *data) +static int kbasep_csf_queue_show_groups(struct seq_file *file, void *data) { - u32 gr; + struct kbasep_printer *kbpr; struct kbase_context *const kctx = file->private; - struct kbase_device *kbdev; + int ret = -EINVAL; + CSTD_UNUSED(data); - if (WARN_ON(!kctx)) - return -EINVAL; - - kbdev = kctx->kbdev; - - seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", - MALI_CSF_CSG_DEBUGFS_VERSION); - - mutex_lock(&kctx->csf.lock); - kbase_csf_scheduler_lock(kbdev); - kbase_csf_debugfs_update_active_groups_status(kbdev); - for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { - struct kbase_queue_group *const group = - kctx->csf.queue_groups[gr]; - - if (group) - kbasep_csf_scheduler_dump_active_group(file, group); + kbpr = kbasep_printer_file_init(file); + if (kbpr != NULL) { + ret = kbasep_csf_csg_dump_print(kctx, kbpr); + kbasep_printer_term(kbpr); } - kbase_csf_scheduler_unlock(kbdev); - mutex_unlock(&kctx->csf.lock); - return 0; + return ret; } /** - * kbasep_csf_scheduler_dump_active_groups() - Print debug info for active - * GPU command queue groups + * kbasep_csf_csg_active_show_groups() - Print debug info for active GPU command queue groups * * @file: The seq_file for printing to * @data: The debugfs dentry private data, a pointer to kbase_device * * Return: Negative error code or 0 on success. */ -static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file, - void *data) +static int kbasep_csf_csg_active_show_groups(struct seq_file *file, void *data) { - u32 csg_nr; struct kbase_device *kbdev = file->private; - u32 num_groups = kbdev->csf.global_iface.group_num; + struct kbasep_printer *kbpr; + int ret = -EINVAL; + CSTD_UNUSED(data); - seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", - MALI_CSF_CSG_DEBUGFS_VERSION); - - kbase_csf_scheduler_lock(kbdev); - kbase_csf_debugfs_update_active_groups_status(kbdev); - for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { - struct kbase_queue_group *const group = - kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; - - if (!group) - continue; - - seq_printf(file, "\nCtx %d_%d\n", group->kctx->tgid, - group->kctx->id); - - kbasep_csf_scheduler_dump_active_group(file, group); + kbpr = kbasep_printer_file_init(file); + if (kbpr != NULL) { + ret = kbasep_csf_csg_active_dump_print(kbdev, kbpr); + kbasep_printer_term(kbpr); } - kbase_csf_scheduler_unlock(kbdev); - return 0; + return ret; } -static int kbasep_csf_queue_group_debugfs_open(struct inode *in, - struct file *file) +static int kbasep_csf_queue_group_debugfs_open(struct inode *in, struct file *file) { - return single_open(file, kbasep_csf_queue_group_debugfs_show, - in->i_private); + return single_open(file, kbasep_csf_queue_show_groups, in->i_private); } -static int kbasep_csf_active_queue_groups_debugfs_open(struct inode *in, - struct file *file) +static int kbasep_csf_active_queue_groups_debugfs_open(struct inode *in, struct file *file) { - return single_open(file, kbasep_csf_scheduler_dump_active_groups, - in->i_private); + return single_open(file, kbasep_csf_csg_active_show_groups, in->i_private); } static const struct file_operations kbasep_csf_queue_group_debugfs_fops = { @@ -591,25 +132,23 @@ void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) return; - file = debugfs_create_file("groups", mode, - kctx->kctx_dentry, kctx, &kbasep_csf_queue_group_debugfs_fops); + file = debugfs_create_file("groups", mode, kctx->kctx_dentry, kctx, + &kbasep_csf_queue_group_debugfs_fops); if (IS_ERR_OR_NULL(file)) { dev_warn(kctx->kbdev->dev, - "Unable to create per context queue groups debugfs entry"); + "Unable to create per context queue groups debugfs entry"); } } -static const struct file_operations - kbasep_csf_active_queue_groups_debugfs_fops = { +static const struct file_operations kbasep_csf_active_queue_groups_debugfs_fops = { .open = kbasep_csf_active_queue_groups_debugfs_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; -static int kbasep_csf_debugfs_scheduling_timer_enabled_get( - void *data, u64 *val) +static int kbasep_csf_debugfs_scheduling_timer_enabled_get(void *data, u64 *val) { struct kbase_device *const kbdev = data; @@ -618,8 +157,7 @@ static int kbasep_csf_debugfs_scheduling_timer_enabled_get( return 0; } -static int kbasep_csf_debugfs_scheduling_timer_enabled_set( - void *data, u64 val) +static int kbasep_csf_debugfs_scheduling_timer_enabled_set(void *data, u64 val) { struct kbase_device *const kbdev = data; @@ -628,10 +166,10 @@ static int kbasep_csf_debugfs_scheduling_timer_enabled_set( return 0; } -static int kbasep_csf_debugfs_scheduling_timer_kick_set( - void *data, u64 val) +static int kbasep_csf_debugfs_scheduling_timer_kick_set(void *data, u64 val) { struct kbase_device *const kbdev = data; + CSTD_UNUSED(val); kbase_csf_scheduler_kick(kbdev); @@ -660,8 +198,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, NULL, * size of the state string if it was copied successfully to the * User buffer or a negative value in case of an error. */ -static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file, - char __user *user_buf, size_t count, loff_t *ppos) +static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) { struct kbase_device *kbdev = file->private_data; struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; @@ -674,8 +212,7 @@ static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file, if (!state_string) count = 0; - return simple_read_from_buffer(user_buf, count, ppos, - state_string, strlen(state_string)); + return simple_read_from_buffer(user_buf, count, ppos, state_string, strlen(state_string)); } /** @@ -694,8 +231,8 @@ static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file, * state or if copy from user buffer failed, otherwise the length of * the User buffer. */ -static ssize_t kbase_csf_debugfs_scheduler_state_set(struct file *file, - const char __user *ubuf, size_t count, loff_t *ppos) +static ssize_t kbase_csf_debugfs_scheduler_state_set(struct file *file, const char __user *ubuf, + size_t count, loff_t *ppos) { struct kbase_device *kbdev = file->private_data; char buf[MAX_SCHED_STATE_STRING_LEN]; @@ -735,19 +272,15 @@ static const struct file_operations kbasep_csf_debugfs_scheduler_state_fops = { void kbase_csf_debugfs_init(struct kbase_device *kbdev) { - debugfs_create_file("active_groups", 0444, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_csf_active_queue_groups_debugfs_fops); + debugfs_create_file("active_groups", 0444, kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_active_queue_groups_debugfs_fops); - debugfs_create_file("scheduling_timer_enabled", 0644, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_csf_debugfs_scheduling_timer_enabled_fops); - debugfs_create_file("scheduling_timer_kick", 0200, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_csf_debugfs_scheduling_timer_kick_fops); - debugfs_create_file("scheduler_state", 0644, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_csf_debugfs_scheduler_state_fops); + debugfs_create_file("scheduling_timer_enabled", 0644, kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_debugfs_scheduling_timer_enabled_fops); + debugfs_create_file("scheduling_timer_kick", 0200, kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_debugfs_scheduling_timer_kick_fops); + debugfs_create_file("scheduler_state", 0644, kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_debugfs_scheduler_state_fops); kbase_csf_tl_reader_debugfs_init(kbdev); } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h index 16a548bf8acb..8b1590ff334e 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,12 +22,9 @@ #ifndef _KBASE_CSF_CSG_DEBUGFS_H_ #define _KBASE_CSF_CSG_DEBUGFS_H_ -/* Forward declarations */ -struct kbase_device; +/* Forward declaration */ struct kbase_context; -struct kbase_queue_group; - -#define MALI_CSF_CSG_DEBUGFS_VERSION 0 +struct kbase_device; /** * kbase_csf_queue_group_debugfs_init() - Add debugfs entry for queue groups @@ -44,11 +41,4 @@ void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx); */ void kbase_csf_debugfs_init(struct kbase_device *kbdev); -/** - * kbase_csf_debugfs_update_active_groups_status() - Update on-slot group statuses - * - * @kbdev: Pointer to the device - */ -void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev); - #endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h index 6fa0e27d657f..c90b531d36b7 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h @@ -29,6 +29,7 @@ #include #include +#include #include "mali_kbase_csf_firmware.h" #include "mali_kbase_refcount_defs.h" #include "mali_kbase_csf_event.h" @@ -52,13 +53,13 @@ */ #define MAX_TILER_HEAPS (128) -#define CSF_FIRMWARE_ENTRY_READ (1ul << 0) -#define CSF_FIRMWARE_ENTRY_WRITE (1ul << 1) -#define CSF_FIRMWARE_ENTRY_EXECUTE (1ul << 2) +#define CSF_FIRMWARE_ENTRY_READ (1ul << 0) +#define CSF_FIRMWARE_ENTRY_WRITE (1ul << 1) +#define CSF_FIRMWARE_ENTRY_EXECUTE (1ul << 2) #define CSF_FIRMWARE_ENTRY_CACHE_MODE (3ul << 3) -#define CSF_FIRMWARE_ENTRY_PROTECTED (1ul << 5) -#define CSF_FIRMWARE_ENTRY_SHARED (1ul << 30) -#define CSF_FIRMWARE_ENTRY_ZERO (1ul << 31) +#define CSF_FIRMWARE_ENTRY_PROTECTED (1ul << 5) +#define CSF_FIRMWARE_ENTRY_SHARED (1ul << 30) +#define CSF_FIRMWARE_ENTRY_ZERO (1ul << 31) /** * enum kbase_csf_queue_bind_state - bind state of the queue @@ -265,15 +266,18 @@ enum kbase_queue_group_priority { * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired * Shader, L2 and MCU state. * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete. - * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended. + * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for a CSG to be suspended. * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot. * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond * to a ping from KBase. * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang. * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion - * of a MMU operation + * of a MMU operation. + * @KCPU_FENCE_SIGNAL_TIMEOUT: Waiting time in ms for triggering a KCPU queue sync state dump * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in * the enum. + * @KBASE_DEFAULT_TIMEOUT: Default timeout used when an invalid selector is passed + * to the pre-computed timeout getter. */ enum kbase_timeout_selector { CSF_FIRMWARE_TIMEOUT, @@ -284,9 +288,11 @@ enum kbase_timeout_selector { CSF_FIRMWARE_PING_TIMEOUT, CSF_SCHED_PROTM_PROGRESS_TIMEOUT, MMU_AS_INACTIVE_WAIT_TIMEOUT, + KCPU_FENCE_SIGNAL_TIMEOUT, /* Must be the last in the enum */ - KBASE_TIMEOUT_SELECTOR_COUNT + KBASE_TIMEOUT_SELECTOR_COUNT, + KBASE_DEFAULT_TIMEOUT = CSF_FIRMWARE_TIMEOUT }; /** @@ -324,6 +330,14 @@ struct kbase_csf_notification { * It is in page units. * @link: Link to the linked list of GPU command queues created per * GPU address space. + * @pending_kick: Indicates whether there is a pending kick to be handled. + * @pending_kick_link: Link to the linked list of GPU command queues that have + * been kicked, but the kick has not yet been processed. + * This link would be deleted right before the kick is + * handled to allow for future kicks to occur in the mean + * time. For this reason, this must not be used to check + * for the presence of a pending queue kick. @pending_kick + * should be used instead. * @refcount: Reference count, stands for the number of times the queue * has been referenced. The reference is taken when it is * created, when it is bound to the group and also when the @@ -336,6 +350,7 @@ struct kbase_csf_notification { * @base_addr: Base address of the CS buffer. * @size: Size of the CS buffer. * @priority: Priority of this queue within the group. + * @group_priority: Priority of the group to which this queue has been bound. * @bind_state: Bind state of the queue as enum @kbase_csf_queue_bind_state * @csi_index: The ID of the assigned CS hardware interface. * @enabled: Indicating whether the CS is running, or not. @@ -363,7 +378,6 @@ struct kbase_csf_notification { * @trace_offset_ptr: Pointer to the CS trace buffer offset variable. * @trace_buffer_size: CS trace buffer size for the queue. * @trace_cfg: CS trace configuration parameters. - * @error: GPU command queue fatal information to pass to user space. * @cs_error_work: Work item to handle the CS fatal event reported for this * queue or the CS fault event if dump on fault is enabled * and acknowledgment for CS fault event needs to be done @@ -373,7 +387,6 @@ struct kbase_csf_notification { * @cs_error: Records information about the CS fatal event or * about CS fault event if dump on fault is enabled. * @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred. - * @pending: Indicating whether the queue has new submitted work. * @extract_ofs: The current EXTRACT offset, this is only updated when handling * the GLB IDLE IRQ if the idle timeout value is non-0 in order * to help detect a queue's true idle status. @@ -386,11 +399,13 @@ struct kbase_queue { struct kbase_context *kctx; u64 user_io_gpu_va; struct tagged_addr phys[2]; - char *user_io_addr; + u64 *user_io_addr; u64 handle; int doorbell_nr; unsigned long db_file_offset; struct list_head link; + atomic_t pending_kick; + struct list_head pending_kick_link; kbase_refcount_t refcount; struct kbase_queue_group *group; struct kbase_va_region *queue_reg; @@ -398,6 +413,7 @@ struct kbase_queue { u64 base_addr; u32 size; u8 priority; + u8 group_priority; s8 csi_index; enum kbase_csf_queue_bind_state bind_state; bool enabled; @@ -410,16 +426,12 @@ struct kbase_queue { u64 trace_offset_ptr; u32 trace_buffer_size; u32 trace_cfg; - struct kbase_csf_notification error; struct work_struct cs_error_work; u64 cs_error_info; u32 cs_error; bool cs_error_fatal; - atomic_t pending; u64 extract_ofs; -#if IS_ENABLED(CONFIG_DEBUG_FS) u64 saved_cmd_ptr; -#endif /* CONFIG_DEBUG_FS */ }; /** @@ -514,10 +526,6 @@ struct kbase_protected_suspend_buffer { * have pending protected mode entry requests. * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be * returned to userspace if such an error has occurred. - * @error_timeout: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT - * to be returned to userspace if such an error has occurred. - * @error_tiler_oom: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM - * to be returned to userspace if such an error has occurred. * @timer_event_work: Work item to handle the progress timeout fatal event * for the group. * @deschedule_deferred_cnt: Counter keeping a track of the number of threads @@ -544,6 +552,7 @@ struct kbase_queue_group { u8 compute_max; u8 csi_handlers; + u64 tiler_mask; u64 fragment_mask; u64 compute_mask; @@ -566,8 +575,6 @@ struct kbase_queue_group { DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); struct kbase_csf_notification error_fatal; - struct kbase_csf_notification error_timeout; - struct kbase_csf_notification error_tiler_oom; struct work_struct timer_event_work; @@ -582,6 +589,12 @@ struct kbase_queue_group { #endif void *csg_reg; u8 csg_reg_bind_retries; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @prev_act: Previous CSG activity transition in a GPU metrics. + */ + bool prev_act; +#endif }; /** @@ -834,7 +847,6 @@ struct kbase_csf_user_reg_context { * @link: Link to this csf context in the 'runnable_kctxs' list of * the scheduler instance * @sched: Object representing the scheduler's context - * @pending_submission_work: Work item to process pending kicked GPU command queues. * @cpu_queue: CPU queue information. Only be available when DEBUG_FS * is enabled. * @user_reg: Collective information to support mapping to USER Register page. @@ -842,8 +854,7 @@ struct kbase_csf_user_reg_context { struct kbase_csf_context { struct list_head event_pages_head; DECLARE_BITMAP(cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); - struct kbase_queue *user_pages_info[ - KBASE_CSF_NUM_USER_IO_PAGES_HANDLE]; + struct kbase_queue *user_pages_info[KBASE_CSF_NUM_USER_IO_PAGES_HANDLE]; struct mutex lock; struct kbase_queue_group *queue_groups[MAX_QUEUE_GROUP_NUM]; struct list_head queue_list; @@ -853,10 +864,7 @@ struct kbase_csf_context { struct workqueue_struct *wq; struct list_head link; struct kbase_csf_scheduler_context sched; - struct work_struct pending_submission_work; -#if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_csf_cpu_queue_context cpu_queue; -#endif struct kbase_csf_user_reg_context user_reg; }; @@ -1002,23 +1010,20 @@ struct kbase_csf_mcu_shared_regions { * periodic scheduling tasks. If this value is 0 * then it will only perform scheduling under the * influence of external factors e.g., IRQs, IOCTLs. - * @wq: Dedicated workqueue to execute the @tick_work. * @tick_timer: High-resolution timer employed to schedule tick * workqueue items (kernel-provided delayed_work * items do not use hrtimer and for some reason do * not provide sufficiently reliable periodicity). - * @tick_work: Work item that performs the "schedule on tick" - * operation to implement timeslice-based scheduling. - * @tock_work: Work item that would perform the schedule on tock - * operation to implement the asynchronous scheduling. - * @pending_tock_work: Indicates that the tock work item should re-execute - * once it's finished instead of going back to sleep. + * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should perform + * a scheduling tick. + * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should perform + * a scheduling tock. * @ping_work: Work item that would ping the firmware at regular * intervals, only if there is a single active CSG * slot, to check if firmware is alive and would * initiate a reset if the ping request isn't * acknowledged. - * @top_ctx: Pointer to the Kbase context corresponding to the + * @top_kctx: Pointer to the Kbase context corresponding to the * @top_grp. * @top_grp: Pointer to queue group inside @groups_to_schedule * list that was assigned the highest slot priority. @@ -1058,13 +1063,6 @@ struct kbase_csf_mcu_shared_regions { * after GPU and L2 cache have been powered up. So when * this count is zero, MCU will not be powered up. * @csg_scheduling_period_ms: Duration of Scheduling tick in milliseconds. - * @tick_timer_active: Indicates whether the @tick_timer is effectively - * active or not, as the callback function of - * @tick_timer will enqueue @tick_work only if this - * flag is true. This is mainly useful for the case - * when scheduling tick needs to be advanced from - * interrupt context, without actually deactivating - * the @tick_timer first and then enqueing @tick_work. * @tick_protm_pending_seq: Scan out sequence number of the group that has * protected mode execution pending for the queue(s) * bound to it and will be considered first for the @@ -1076,6 +1074,12 @@ struct kbase_csf_mcu_shared_regions { * @mcu_regs_data: Scheduler MCU shared regions data for managing the * shared interface mappings for on-slot queues and * CSG suspend buffers. + * @kthread_signal: Used to wake up the GPU queue submission + * thread when a queue needs attention. + * @kthread_running: Whether the GPU queue submission thread should keep + * executing. + * @gpuq_kthread: High-priority thread used to handle GPU queue + * submissions. */ struct kbase_csf_scheduler { struct mutex lock; @@ -1097,14 +1101,12 @@ struct kbase_csf_scheduler { DECLARE_BITMAP(csg_slots_idle_mask, MAX_SUPPORTED_CSGS); DECLARE_BITMAP(csg_slots_prio_update, MAX_SUPPORTED_CSGS); unsigned long last_schedule; - bool timer_enabled; - struct workqueue_struct *wq; + atomic_t timer_enabled; struct hrtimer tick_timer; - struct work_struct tick_work; - struct delayed_work tock_work; + atomic_t pending_tick_work; atomic_t pending_tock_work; struct delayed_work ping_work; - struct kbase_context *top_ctx; + struct kbase_context *top_kctx; struct kbase_queue_group *top_grp; struct kbase_queue_group *active_protm_grp; struct workqueue_struct *idle_wq; @@ -1115,11 +1117,37 @@ struct kbase_csf_scheduler { u32 non_idle_scanout_grps; u32 pm_active_count; unsigned int csg_scheduling_period_ms; - bool tick_timer_active; u32 tick_protm_pending_seq; ktime_t protm_enter_time; struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr; struct kbase_csf_mcu_shared_regions mcu_regs_data; + struct completion kthread_signal; + bool kthread_running; + struct task_struct *gpuq_kthread; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @gpu_metrics_tb: Handler of firmware trace buffer for gpu_metrics + */ + struct firmware_trace_buffer *gpu_metrics_tb; + + /** + * @gpu_metrics_timer: High-resolution timer used to periodically emit the GPU metrics + * tracepoints for applications that are using the GPU. The timer is + * needed for the long duration handling so that the length of work + * period is within the allowed limit. + * Timer callback function will be executed in soft irq context. + */ + struct hrtimer gpu_metrics_timer; + + /** + * @gpu_metrics_lock: Lock for the serialization of GPU metrics related code. The lock + * is not acquired in the HARDIRQ-safe way, so shall not be acquired + * after acquiring a lock that can be taken in the hard irq. + * The softirq must be disabled whenever the lock is taken from the + * process context. + */ + spinlock_t gpu_metrics_lock; +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ }; /* @@ -1130,15 +1158,14 @@ struct kbase_csf_scheduler { /* * Maximum value of the global progress timeout. */ -#define GLB_PROGRESS_TIMER_TIMEOUT_MAX \ - ((GLB_PROGRESS_TIMER_TIMEOUT_MASK >> \ - GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \ - GLB_PROGRESS_TIMER_TIMEOUT_SCALE) +#define GLB_PROGRESS_TIMER_TIMEOUT_MAX \ + ((GLB_PROGRESS_TIMER_TIMEOUT_MASK >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \ + GLB_PROGRESS_TIMER_TIMEOUT_SCALE) /* - * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds. + * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of nanosecond. */ -#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800) +#define DEFAULT_GLB_PWROFF_TIMEOUT_NS (800 * 1000) /* * In typical operations, the management of the shader core power transitions @@ -1186,7 +1213,7 @@ enum kbase_ipa_core_type { /* * Total number of configurable counters existing on the IPA Control interface. */ -#define KBASE_IPA_CONTROL_MAX_COUNTERS \ +#define KBASE_IPA_CONTROL_MAX_COUNTERS \ ((size_t)KBASE_IPA_CORE_TYPE_NUM * KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) /** @@ -1358,7 +1385,7 @@ struct kbase_csf_mcu_fw { /* * Firmware log polling period. */ -#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25 +#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT 25 /** * enum kbase_csf_firmware_log_mode - Firmware log operating mode @@ -1370,10 +1397,16 @@ struct kbase_csf_mcu_fw { * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log * will be periodically emptied into dmesg, manual reading through debugfs is * disabled. + * + * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD: Automatic discarding mode, firmware + * log will be periodically discarded, the remaining log can be read manually by + * the userspace (and it will also be dumped automatically into dmesg on GPU + * reset). */ enum kbase_csf_firmware_log_mode { KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL, - KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT + KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT, + KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD }; /** @@ -1387,6 +1420,7 @@ enum kbase_csf_firmware_log_mode { * @dump_buf: Buffer used for dumping the log. * @func_call_list_va_start: Virtual address of the start of the call list of FW log functions. * @func_call_list_va_end: Virtual address of the end of the call list of FW log functions. + * @poll_period_ms: Firmware log polling period in milliseconds. */ struct kbase_csf_firmware_log { enum kbase_csf_firmware_log_mode mode; @@ -1395,6 +1429,7 @@ struct kbase_csf_firmware_log { u8 *dump_buf; u32 func_call_list_va_start; u32 func_call_list_va_end; + atomic_t poll_period_ms; }; /** @@ -1490,7 +1525,7 @@ struct kbase_csf_user_reg { * image. * @shared_interface: Pointer to the interface object containing info for * the memory area shared between firmware & host. - * @shared_reg_rbtree: RB tree of the memory regions allocated from the + * @mcu_shared_zone: Memory zone tracking memory regions allocated from the * shared interface segment in MCU firmware address * space. * @db_filp: Pointer to a dummy file, that alongwith @@ -1552,22 +1587,28 @@ struct kbase_csf_user_reg { * @fw_error_work: Work item for handling the firmware internal error * fatal event. * @ipa_control: IPA Control component manager. - * @mcu_core_pwroff_dur_us: Sysfs attribute for the glb_pwroff timeout input - * in unit of micro-seconds. The firmware does not use + * @mcu_core_pwroff_dur_ns: Sysfs attribute for the glb_pwroff timeout input + * in unit of nanoseconds. The firmware does not use * it directly. * @mcu_core_pwroff_dur_count: The counterpart of the glb_pwroff timeout input * in interface required format, ready to be used * directly in the firmware. + * @mcu_core_pwroff_dur_count_modifier: Update csffw_glb_req_cfg_pwroff_timer + * to make the shr(10) modifier conditional + * on new flag in GLB_PWROFF_TIMER_CONFIG * @mcu_core_pwroff_reg_shadow: The actual value that has been programed into * the glb_pwoff register. This is separated from * the @p mcu_core_pwroff_dur_count as an update * to the latter is asynchronous. - * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time - * window in unit of microseconds. The firmware does not + * @gpu_idle_hysteresis_ns: Sysfs attribute for the idle hysteresis time + * window in unit of nanoseconds. The firmware does not * use it directly. * @gpu_idle_dur_count: The counterpart of the hysteresis time window in * interface required format, ready to be used * directly in the firmware. + * @gpu_idle_dur_count_modifier: Update csffw_glb_req_idle_enable to make the shr(10) + * modifier conditional on the new flag + * in GLB_IDLE_TIMER_CONFIG. * @fw_timeout_ms: Timeout value (in milliseconds) used when waiting * for any request sent to the firmware. * @hwcnt: Contain members required for handling the dump of @@ -1579,6 +1620,12 @@ struct kbase_csf_user_reg { * @dof: Structure for dump on fault. * @user_reg: Collective information to support the mapping to * USER Register page for user processes. + * @pending_gpuq_kicks: Lists of GPU queue that have been kicked but not + * yet processed, categorised by queue group's priority. + * @pending_gpuq_kicks_lock: Protect @pending_gpu_kicks and + * kbase_queue.pending_kick_link. + * @quirks_ext: Pointer to an allocated buffer containing the firmware + * workarounds configuration. */ struct kbase_csf_device { struct kbase_mmu_table mcu_mmu; @@ -1588,7 +1635,7 @@ struct kbase_csf_device { struct kobject *fw_cfg_kobj; struct kbase_csf_trace_buffers firmware_trace_buffers; void *shared_interface; - struct rb_root shared_reg_rbtree; + struct kbase_reg_zone mcu_shared_zone; struct file *db_filp; u32 db_file_offsets; struct tagged_addr dummy_db_page; @@ -1609,11 +1656,13 @@ struct kbase_csf_device { bool glb_init_request_pending; struct work_struct fw_error_work; struct kbase_ipa_control ipa_control; - u32 mcu_core_pwroff_dur_us; + u32 mcu_core_pwroff_dur_ns; u32 mcu_core_pwroff_dur_count; + u32 mcu_core_pwroff_dur_count_modifier; u32 mcu_core_pwroff_reg_shadow; - u32 gpu_idle_hysteresis_us; + u32 gpu_idle_hysteresis_ns; u32 gpu_idle_dur_count; + u32 gpu_idle_dur_count_modifier; unsigned int fw_timeout_ms; struct kbase_csf_hwcnt hwcnt; struct kbase_csf_mcu_fw fw; @@ -1629,6 +1678,9 @@ struct kbase_csf_device { struct kbase_debug_coresight_device coresight; #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ struct kbase_csf_user_reg user_reg; + struct list_head pending_gpuq_kicks[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; + spinlock_t pending_gpuq_kicks_lock; + u32 *quirks_ext; }; /** @@ -1645,10 +1697,6 @@ struct kbase_csf_device { * @bf_data: Data relating to Bus fault. * @gf_data: Data relating to GPU fault. * @current_setup: Stores the MMU configuration for this address space. - * @is_unresponsive: Flag to indicate MMU is not responding. - * Set if a MMU command isn't completed within - * &kbase_device:mmu_as_inactive_wait_time_ms. - * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. */ struct kbase_as { int number; @@ -1660,7 +1708,6 @@ struct kbase_as { struct kbase_fault bf_data; struct kbase_fault gf_data; struct kbase_mmu_setup current_setup; - bool is_unresponsive; }; #endif /* _KBASE_CSF_DEFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c index 49e52938499f..0139fb26d4e3 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,8 +40,8 @@ struct kbase_csf_event_cb { void *param; }; -int kbase_csf_event_wait_add(struct kbase_context *kctx, - kbase_csf_event_callback *callback, void *param) +int kbase_csf_event_wait_add(struct kbase_context *kctx, kbase_csf_event_callback *callback, + void *param) { int err = -ENOMEM; struct kbase_csf_event_cb *event_cb = @@ -56,8 +56,7 @@ int kbase_csf_event_wait_add(struct kbase_context *kctx, spin_lock_irqsave(&kctx->csf.event.lock, flags); list_add_tail(&event_cb->link, &kctx->csf.event.callback_list); - dev_dbg(kctx->kbdev->dev, - "Added event handler %pK with param %pK\n", event_cb, + dev_dbg(kctx->kbdev->dev, "Added event handler %pK with param %pK\n", event_cb, event_cb->param); spin_unlock_irqrestore(&kctx->csf.event.lock, flags); @@ -67,8 +66,8 @@ int kbase_csf_event_wait_add(struct kbase_context *kctx, return err; } -void kbase_csf_event_wait_remove(struct kbase_context *kctx, - kbase_csf_event_callback *callback, void *param) +void kbase_csf_event_wait_remove(struct kbase_context *kctx, kbase_csf_event_callback *callback, + void *param) { struct kbase_csf_event_cb *event_cb; unsigned long flags; @@ -78,8 +77,7 @@ void kbase_csf_event_wait_remove(struct kbase_context *kctx, list_for_each_entry(event_cb, &kctx->csf.event.callback_list, link) { if ((event_cb->callback == callback) && (event_cb->param == param)) { list_del(&event_cb->link); - dev_dbg(kctx->kbdev->dev, - "Removed event handler %pK with param %pK\n", + dev_dbg(kctx->kbdev->dev, "Removed event handler %pK with param %pK\n", event_cb, event_cb->param); kfree(event_cb); break; @@ -113,8 +111,7 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) struct kbase_csf_event_cb *event_cb, *next_event_cb; unsigned long flags; - dev_dbg(kctx->kbdev->dev, - "Signal event (%s GPU notify) for context %pK\n", + dev_dbg(kctx->kbdev->dev, "Signal event (%s GPU notify) for context %pK\n", notify_gpu ? "with" : "without", (void *)kctx); /* First increment the signal count and wake up event thread. @@ -136,12 +133,10 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) */ spin_lock_irqsave(&kctx->csf.event.lock, flags); - list_for_each_entry_safe( - event_cb, next_event_cb, &kctx->csf.event.callback_list, link) { + list_for_each_entry_safe(event_cb, next_event_cb, &kctx->csf.event.callback_list, link) { enum kbase_csf_event_callback_action action; - dev_dbg(kctx->kbdev->dev, - "Calling event handler %pK with param %pK\n", + dev_dbg(kctx->kbdev->dev, "Calling event handler %pK with param %pK\n", (void *)event_cb, event_cb->param); action = event_cb->callback(event_cb->param); if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) { @@ -160,17 +155,15 @@ void kbase_csf_event_term(struct kbase_context *kctx) spin_lock_irqsave(&kctx->csf.event.lock, flags); - list_for_each_entry_safe( - event_cb, next_event_cb, &kctx->csf.event.callback_list, link) { + list_for_each_entry_safe(event_cb, next_event_cb, &kctx->csf.event.callback_list, link) { list_del(&event_cb->link); - dev_warn(kctx->kbdev->dev, - "Removed event handler %pK with param %pK\n", - (void *)event_cb, event_cb->param); + dev_warn(kctx->kbdev->dev, "Removed event handler %pK with param %pK\n", + (void *)event_cb, event_cb->param); kfree(event_cb); } - WARN(!list_empty(&kctx->csf.event.error_list), - "Error list not empty for ctx %d_%d\n", kctx->tgid, kctx->id); + WARN(!list_empty(&kctx->csf.event.error_list), "Error list not empty for ctx %d_%d\n", + kctx->tgid, kctx->id); spin_unlock_irqrestore(&kctx->csf.event.lock, flags); } @@ -182,8 +175,7 @@ void kbase_csf_event_init(struct kbase_context *const kctx) spin_lock_init(&kctx->csf.event.lock); } -void kbase_csf_event_remove_error(struct kbase_context *kctx, - struct kbase_csf_notification *error) +void kbase_csf_event_remove_error(struct kbase_context *kctx, struct kbase_csf_notification *error) { unsigned long flags; @@ -201,19 +193,19 @@ bool kbase_csf_event_read_error(struct kbase_context *kctx, spin_lock_irqsave(&kctx->csf.event.lock, flags); if (likely(!list_empty(&kctx->csf.event.error_list))) { error_data = list_first_entry(&kctx->csf.event.error_list, - struct kbase_csf_notification, link); + struct kbase_csf_notification, link); list_del_init(&error_data->link); *event_data = error_data->data; - dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", - (void *)error_data, (void *)kctx); + dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", (void *)error_data, + (void *)kctx); } spin_unlock_irqrestore(&kctx->csf.event.lock, flags); return !!error_data; } void kbase_csf_event_add_error(struct kbase_context *const kctx, - struct kbase_csf_notification *const error, - struct base_csf_notification const *const data) + struct kbase_csf_notification *const error, + struct base_csf_notification const *const data) { unsigned long flags; @@ -230,8 +222,7 @@ void kbase_csf_event_add_error(struct kbase_context *const kctx, if (list_empty(&error->link)) { error->data = *data; list_add_tail(&error->link, &kctx->csf.event.error_list); - dev_dbg(kctx->kbdev->dev, - "Added error %pK of type %d in context %pK\n", + dev_dbg(kctx->kbdev->dev, "Added error %pK of type %d in context %pK\n", (void *)error, data->type, (void *)kctx); } else { dev_dbg(kctx->kbdev->dev, "Error %pK of type %d already pending in context %pK", diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h index 52122a9ef4d1..2341e790c739 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,8 +56,8 @@ typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *para * * Return: 0 on success, or negative on failure. */ -int kbase_csf_event_wait_add(struct kbase_context *kctx, - kbase_csf_event_callback *callback, void *param); +int kbase_csf_event_wait_add(struct kbase_context *kctx, kbase_csf_event_callback *callback, + void *param); /** * kbase_csf_event_wait_remove - Remove a CSF event callback @@ -70,8 +70,8 @@ int kbase_csf_event_wait_add(struct kbase_context *kctx, * This function removes an event callback from the list of CSF event callbacks * belonging to a given Kbase context. */ -void kbase_csf_event_wait_remove(struct kbase_context *kctx, - kbase_csf_event_callback *callback, void *param); +void kbase_csf_event_wait_remove(struct kbase_context *kctx, kbase_csf_event_callback *callback, + void *param); /** * kbase_csf_event_term - Removes all CSF event callbacks @@ -148,8 +148,8 @@ bool kbase_csf_event_read_error(struct kbase_context *kctx, * */ void kbase_csf_event_add_error(struct kbase_context *const kctx, - struct kbase_csf_notification *const error, - struct base_csf_notification const *const data); + struct kbase_csf_notification *const error, + struct base_csf_notification const *const data); /** * kbase_csf_event_remove_error - Remove an error from event error list @@ -157,8 +157,7 @@ void kbase_csf_event_add_error(struct kbase_context *const kctx, * @kctx: Address of a base context associated with a GPU address space. * @error: Address of the item to be removed from the context's event error list. */ -void kbase_csf_event_remove_error(struct kbase_context *kctx, - struct kbase_csf_notification *error); +void kbase_csf_event_remove_error(struct kbase_context *kctx, struct kbase_csf_notification *error); /** * kbase_csf_event_error_pending - Check the error pending status diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c index 42bff1e91584..f2362d58cfa8 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c @@ -26,6 +26,7 @@ #include "mali_kbase_csf_trace_buffer.h" #include "mali_kbase_csf_timeout.h" #include "mali_kbase_mem.h" +#include "mali_kbase_reg_track.h" #include "mali_kbase_mem_pool_group.h" #include "mali_kbase_reset_gpu.h" #include "mali_kbase_ctx_sched.h" @@ -52,18 +53,18 @@ #include #include #include +#include -#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20) +#define MALI_MAX_DEFAULT_FIRMWARE_NAME_LEN ((size_t)20) -static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; -module_param_string(fw_name, fw_name, sizeof(fw_name), 0644); +static char default_fw_name[MALI_MAX_DEFAULT_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; +module_param_string(fw_name, default_fw_name, sizeof(default_fw_name), 0644); MODULE_PARM_DESC(fw_name, "firmware image"); /* The waiting time for firmware to boot */ static unsigned int csf_firmware_boot_timeout_ms; module_param(csf_firmware_boot_timeout_ms, uint, 0444); -MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, - "Maximum time to wait for firmware to boot."); +MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, "Maximum time to wait for firmware to boot."); #ifdef CONFIG_MALI_BIFROST_DEBUG /* Makes Driver wait indefinitely for an acknowledgment for the different @@ -74,37 +75,31 @@ MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, */ bool fw_debug; /* Default value of 0/false */ module_param(fw_debug, bool, 0444); -MODULE_PARM_DESC(fw_debug, - "Enables effective use of a debugger for debugging firmware code."); +MODULE_PARM_DESC(fw_debug, "Enables effective use of a debugger for debugging firmware code."); #endif +#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) +#define FIRMWARE_HEADER_VERSION_MAJOR (0ul) +#define FIRMWARE_HEADER_VERSION_MINOR (3ul) +#define FIRMWARE_HEADER_LENGTH (0x14ul) -#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) -#define FIRMWARE_HEADER_VERSION_MAJOR (0ul) -#define FIRMWARE_HEADER_VERSION_MINOR (3ul) -#define FIRMWARE_HEADER_LENGTH (0x14ul) - -#define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ - (CSF_FIRMWARE_ENTRY_READ | \ - CSF_FIRMWARE_ENTRY_WRITE | \ - CSF_FIRMWARE_ENTRY_EXECUTE | \ - CSF_FIRMWARE_ENTRY_PROTECTED | \ - CSF_FIRMWARE_ENTRY_SHARED | \ - CSF_FIRMWARE_ENTRY_ZERO | \ +#define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ + (CSF_FIRMWARE_ENTRY_READ | CSF_FIRMWARE_ENTRY_WRITE | CSF_FIRMWARE_ENTRY_EXECUTE | \ + CSF_FIRMWARE_ENTRY_PROTECTED | CSF_FIRMWARE_ENTRY_SHARED | CSF_FIRMWARE_ENTRY_ZERO | \ CSF_FIRMWARE_ENTRY_CACHE_MODE) -#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) -#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) -#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) -#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) +#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) +#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) +#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) +#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) #define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6) -#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) -#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9) +#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) +#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9) -#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) -#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) +#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) +#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) #define CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT (2ul << 3) -#define CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT (3ul << 3) +#define CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT (3ul << 3) #define INTERFACE_ENTRY_NAME_OFFSET (0x14) @@ -115,10 +110,10 @@ MODULE_PARM_DESC(fw_debug, #define BUILD_INFO_GIT_DIRTY_LEN (1U) #define BUILD_INFO_GIT_SHA_PATTERN "git_sha: " -#define CSF_MAX_FW_STOP_LOOPS (100000) +#define CSF_MAX_FW_STOP_LOOPS (100000) -#define CSF_GLB_REQ_CFG_MASK \ - (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ +#define CSF_GLB_REQ_CFG_MASK \ + (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) static inline u32 input_page_read(const u32 *const input, const u32 offset) @@ -128,21 +123,18 @@ static inline u32 input_page_read(const u32 *const input, const u32 offset) return input[offset / sizeof(u32)]; } -static inline void input_page_write(u32 *const input, const u32 offset, - const u32 value) +static inline void input_page_write(u32 *const input, const u32 offset, const u32 value) { WARN_ON(offset % sizeof(u32)); input[offset / sizeof(u32)] = value; } -static inline void input_page_partial_write(u32 *const input, const u32 offset, - u32 value, u32 mask) +static inline void input_page_partial_write(u32 *const input, const u32 offset, u32 value, u32 mask) { WARN_ON(offset % sizeof(u32)); - input[offset / sizeof(u32)] = - (input_page_read(input, offset) & ~mask) | (value & mask); + input[offset / sizeof(u32)] = (input_page_read(input, offset) & ~mask) | (value & mask); } static inline u32 output_page_read(const u32 *const output, const u32 offset) @@ -152,6 +144,7 @@ static inline u32 output_page_read(const u32 *const output, const u32 offset) return output[offset / sizeof(u32)]; } + static unsigned int entry_type(u32 header) { return header & 0xFF; @@ -188,25 +181,23 @@ struct firmware_timeline_metadata { /* The shared interface area, used for communicating with firmware, is managed * like a virtual memory zone. Reserve the virtual space from that zone * corresponding to shared interface entry parsed from the firmware image. - * The shared_reg_rbtree should have been initialized before calling this + * The MCU_SHARED_ZONE should have been initialized before calling this * function. */ static int setup_shared_iface_static_region(struct kbase_device *kbdev) { - struct kbase_csf_firmware_interface *interface = - kbdev->csf.shared_interface; + struct kbase_csf_firmware_interface *interface = kbdev->csf.shared_interface; struct kbase_va_region *reg; int ret = -ENOMEM; if (!interface) return -EINVAL; - reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, - interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED); + reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, interface->num_pages_aligned); if (reg) { mutex_lock(&kbdev->csf.reg_lock); - ret = kbase_add_va_region_rbtree(kbdev, reg, - interface->virtual, interface->num_pages_aligned, 1); + ret = kbase_add_va_region_rbtree(kbdev, reg, interface->virtual, + interface->num_pages_aligned, 1); mutex_unlock(&kbdev->csf.reg_lock); if (ret) kfree(reg); @@ -222,23 +213,23 @@ static int wait_mcu_status_value(struct kbase_device *kbdev, u32 val) u32 max_loops = CSF_MAX_FW_STOP_LOOPS; /* wait for the MCU_STATUS register to reach the given status value */ - while (--max_loops && - (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) != val)) { - } + while (--max_loops && (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_STATUS)) != val)) + ; return (max_loops == 0) ? -1 : 0; } + void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) { KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL), MCU_CONTROL_REQ_DISABLE); } static void wait_for_firmware_stop(struct kbase_device *kbdev) { - if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0) { + if (wait_mcu_status_value(kbdev, MCU_CONTROL_REQ_DISABLE) < 0) { /* This error shall go away once MIDJM-2371 is closed */ dev_err(kbdev->dev, "Firmware failed to stop"); } @@ -273,8 +264,8 @@ static void wait_for_firmware_boot(struct kbase_device *kbdev) /* Firmware will generate a global interface interrupt once booting * is complete */ - remaining = wait_event_timeout(kbdev->csf.event_wait, - kbdev->csf.interrupt_received == true, wait_timeout); + remaining = wait_event_timeout(kbdev->csf.event_wait, kbdev->csf.interrupt_received == true, + wait_timeout); if (!remaining) dev_err(kbdev->dev, "Timed out waiting for fw boot completion"); @@ -282,16 +273,20 @@ static void wait_for_firmware_boot(struct kbase_device *kbdev) kbdev->csf.interrupt_received = false; } +static void enable_mcu(struct kbase_device *kbdev) +{ + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + + /* Trigger the boot of MCU firmware, Use the AUTO mode as + * otherwise on fast reset, to exit protected mode, MCU will + * not reboot by itself to enter normal mode. + */ + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL), MCU_CONTROL_REQ_AUTO); +} + static void boot_csf_firmware(struct kbase_device *kbdev) { - kbase_csf_firmware_enable_mcu(kbdev); - -#if IS_ENABLED(CONFIG_MALI_CORESIGHT) - kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); - - if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) - dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled"); -#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + enable_mcu(kbdev); wait_for_firmware_boot(kbdev); } @@ -308,7 +303,7 @@ static void boot_csf_firmware(struct kbase_device *kbdev) static int wait_ready(struct kbase_device *kbdev) { const ktime_t wait_loop_start = ktime_get_raw(); - const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; + const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms; s64 diff; do { @@ -316,8 +311,8 @@ static int wait_ready(struct kbase_device *kbdev) for (i = 0; i < 1000; i++) { /* Wait for the MMU status to indicate there is no active command */ - if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & - AS_STATUS_AS_ACTIVE)) + if (!(kbase_reg_read32(kbdev, MMU_AS_OFFSET(MCU_AS_NR, STATUS)) & + AS_STATUS_AS_ACTIVE_EXT_MASK)) return 0; } @@ -370,8 +365,8 @@ static int load_mmu_tables(struct kbase_device *kbdev) * section, which could be different from the cache mode requested by * firmware. */ -static unsigned long convert_mem_flags(const struct kbase_device * const kbdev, - const u32 flags, u32 *cm) +static unsigned long convert_mem_flags(const struct kbase_device *const kbdev, const u32 flags, + u32 *cm) { unsigned long mem_flags = 0; u32 cache_mode = flags & CSF_FIRMWARE_ENTRY_CACHE_MODE; @@ -396,45 +391,41 @@ static unsigned long convert_mem_flags(const struct kbase_device * const kbdev, * the system does not support ACE coherency. */ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT) && - (kbdev->system_coherency != COHERENCY_ACE)) + (kbdev->system_coherency != COHERENCY_ACE)) cache_mode = CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT; /* Substitute uncached incoherent memory for uncached coherent memory * if the system does not support ACE-Lite coherency. */ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT) && - (kbdev->system_coherency == COHERENCY_NONE)) + (kbdev->system_coherency == COHERENCY_NONE)) cache_mode = CSF_FIRMWARE_CACHE_MODE_NONE; *cm = cache_mode; switch (cache_mode) { case CSF_FIRMWARE_CACHE_MODE_NONE: - mem_flags |= - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + mem_flags |= KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_NON_CACHEABLE); break; case CSF_FIRMWARE_CACHE_MODE_CACHED: - mem_flags |= - KBASE_REG_MEMATTR_INDEX( - AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY); + mem_flags |= KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY); break; case CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT: case CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT: WARN_ON(!is_shared); mem_flags |= KBASE_REG_SHARE_BOTH | - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); + KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_SHARED); break; default: - dev_err(kbdev->dev, - "Firmware contains interface with unsupported cache mode\n"); + dev_err(kbdev->dev, "Firmware contains interface with unsupported cache mode\n"); break; } return mem_flags; } static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data, - struct tagged_addr *phys, u32 num_pages, u32 flags, - u32 data_start, u32 data_end) + struct tagged_addr *phys, u32 num_pages, u32 flags, + u32 data_start, u32 data_end) { u32 data_pos = data_start; u32 data_len = data_end - data_start; @@ -448,7 +439,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data, for (page_num = 0; page_num < page_limit; ++page_num) { struct page *const page = as_page(phys[page_num]); - char *const p = kmap_atomic(page); + char *const p = kbase_kmap_atomic(page); u32 const copy_len = min_t(u32, PAGE_SIZE, data_len); if (copy_len > 0) { @@ -465,7 +456,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data, kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]), PAGE_SIZE, DMA_TO_DEVICE); - kunmap_atomic(p); + kbase_kunmap_atomic(p); } } @@ -484,7 +475,8 @@ static int reload_fw_image(struct kbase_device *kbdev) /* Do a basic sanity check on MAGIC signature */ if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) { - dev_err(kbdev->dev, "Incorrect magic value, firmware image could have been corrupted\n"); + dev_err(kbdev->dev, + "Incorrect magic value, firmware image could have been corrupted\n"); ret = -EINVAL; goto out; } @@ -532,6 +524,7 @@ out: * within the 2MB pages aligned allocation. * @is_small_page: This is an output flag used to select between the small and large page * to be used for the FW entry allocation. + * @force_small_page: Use 4kB pages to allocate memory needed for FW loading * * Go through all the already initialized interfaces and find if a previously * allocated large page can be used to store contents of new FW interface entry. @@ -543,7 +536,7 @@ static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev, const u32 flags, struct tagged_addr **phys, struct protected_memory_allocation ***pma, u32 num_pages, u32 *num_pages_aligned, - bool *is_small_page) + bool *is_small_page, bool force_small_page) { struct kbase_csf_firmware_interface *interface = NULL; struct kbase_csf_firmware_interface *target_interface = NULL; @@ -559,13 +552,14 @@ static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev, *phys = NULL; *pma = NULL; + if (force_small_page) + goto out; /* If the section starts at 2MB aligned boundary, * then use 2MB page(s) for it. */ if (!(virtual_start & (SZ_2M - 1))) { - *num_pages_aligned = - round_up(*num_pages_aligned, NUM_4K_PAGES_IN_2MB_PAGE); + *num_pages_aligned = round_up(*num_pages_aligned, NUM_4K_PAGES_IN_2MB_PAGE); *is_small_page = false; goto out; } @@ -646,40 +640,41 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, unsigned int name_len; struct tagged_addr *phys = NULL; struct kbase_csf_firmware_interface *interface = NULL; - bool allocated_pages = false, protected_mode = false; + bool protected_mode = false; unsigned long mem_flags = 0; u32 cache_mode = 0; struct protected_memory_allocation **pma = NULL; bool reuse_pages = false; bool is_small_page = true; - bool ignore_page_migration = true; + bool force_small_page = false; if (data_end < data_start) { dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n", - data_end, data_start); + data_end, data_start); return -EINVAL; } if (virtual_end < virtual_start) { dev_err(kbdev->dev, "Firmware corrupt, virtual_end < virtual_start (0x%x<0x%x)\n", - virtual_end, virtual_start); + virtual_end, virtual_start); return -EINVAL; } if (data_end > fw->size) { - dev_err(kbdev->dev, "Firmware corrupt, file truncated? data_end=0x%x > fw->size=0x%zx\n", - data_end, fw->size); + dev_err(kbdev->dev, + "Firmware corrupt, file truncated? data_end=0x%x > fw->size=0x%zx\n", + data_end, fw->size); return -EINVAL; } - if ((virtual_start & ~PAGE_MASK) != 0 || - (virtual_end & ~PAGE_MASK) != 0) { - dev_err(kbdev->dev, "Firmware corrupt: virtual addresses not page aligned: 0x%x-0x%x\n", - virtual_start, virtual_end); + if ((virtual_start & ~PAGE_MASK) != 0 || (virtual_end & ~PAGE_MASK) != 0) { + dev_err(kbdev->dev, + "Firmware corrupt: virtual addresses not page aligned: 0x%x-0x%x\n", + virtual_start, virtual_end); return -EINVAL; } if ((flags & CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS) != flags) { dev_err(kbdev->dev, "Firmware contains interface with unsupported flags (0x%x)\n", - flags); + flags); return -EINVAL; } @@ -692,12 +687,13 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, return 0; } - num_pages = (virtual_end - virtual_start) - >> PAGE_SHIFT; + num_pages = (virtual_end - virtual_start) >> PAGE_SHIFT; - reuse_pages = - entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys, - &pma, num_pages, &num_pages_aligned, &is_small_page); +retry_alloc: + ret = 0; + reuse_pages = entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, + &phys, &pma, num_pages, &num_pages_aligned, + &is_small_page, force_small_page); if (!reuse_pages) phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL); @@ -706,32 +702,40 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, if (protected_mode) { if (!reuse_pages) { - pma = kbase_csf_protected_memory_alloc( - kbdev, phys, num_pages_aligned, is_small_page); + pma = kbase_csf_protected_memory_alloc(kbdev, phys, num_pages_aligned, + is_small_page); + if (!pma) + ret = -ENOMEM; + } else if (WARN_ON(!pma)) { + ret = -EINVAL; + goto out; } - - if (!pma) - ret = -ENOMEM; } else { if (!reuse_pages) { ret = kbase_mem_pool_alloc_pages( kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), num_pages_aligned, phys, false, NULL); - ignore_page_migration = false; } } if (ret < 0) { - dev_err(kbdev->dev, - "Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n", - num_pages_aligned, virtual_start); + dev_warn( + kbdev->dev, + "Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x using %s ", + num_pages_aligned, virtual_start, + is_small_page ? "small pages" : "large page"); + WARN_ON(reuse_pages); + if (!is_small_page) { + dev_warn(kbdev->dev, "Retrying by using small pages"); + force_small_page = true; + kfree(phys); + goto retry_alloc; + } goto out; } - allocated_pages = true; - load_fw_image_section(kbdev, fw->data, phys, num_pages, flags, - data_start, data_end); + load_fw_image_section(kbdev, fw->data, phys, num_pages, flags, data_start, data_end); /* Allocate enough memory for the struct kbase_csf_firmware_interface and * the name of the interface. @@ -785,17 +789,14 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, */ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT) || (cache_mode == CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT)) { - WARN_ON(mem_attr_index != - AS_MEMATTR_INDEX_SHARED); + WARN_ON(mem_attr_index != KBASE_MEMATTR_INDEX_SHARED); cpu_map_prot = PAGE_KERNEL; } else { - WARN_ON(mem_attr_index != - AS_MEMATTR_INDEX_NON_CACHEABLE); + WARN_ON(mem_attr_index != KBASE_MEMATTR_INDEX_NON_CACHEABLE); cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); } - page_list = kmalloc_array(num_pages, sizeof(*page_list), - GFP_KERNEL); + page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); if (!page_list) { ret = -ENOMEM; goto out; @@ -804,8 +805,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, for (i = 0; i < num_pages; i++) page_list[i] = as_page(phys[i]); - interface->kernel_map = vmap(page_list, num_pages, VM_MAP, - cpu_map_prot); + interface->kernel_map = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); kfree(page_list); @@ -828,8 +828,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, virtual_start >> PAGE_SHIFT, phys, num_pages_aligned, mem_flags, - KBASE_MEM_GROUP_CSF_FW, NULL, NULL, - ignore_page_migration); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL); if (ret != 0) { dev_err(kbdev->dev, "Failed to insert firmware pages\n"); @@ -844,22 +843,18 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, return ret; out: - if (allocated_pages) { - if (!reuse_pages) { - if (protected_mode) { - kbase_csf_protected_memory_free( - kbdev, pma, num_pages_aligned, is_small_page); - } else { - kbase_mem_pool_free_pages( - kbase_mem_pool_group_select( - kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), - num_pages_aligned, phys, false, false); - } + if (!reuse_pages && phys) { + if (pma) { + kbase_csf_protected_memory_free(kbdev, pma, num_pages_aligned, + is_small_page); + } else { + kbase_mem_pool_free_pages( + kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, + is_small_page), + num_pages_aligned, phys, false, false); } - } - - if (!reuse_pages) kfree(phys); + } kfree(interface); return ret; @@ -884,8 +879,7 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev, const u32 data_end = data_start + data_size; const char *name = (char *)&entry[2]; struct firmware_timeline_metadata *metadata; - const unsigned int name_len = - size - TL_METADATA_ENTRY_NAME_OFFSET; + const unsigned int name_len = size - TL_METADATA_ENTRY_NAME_OFFSET; size_t allocation_size = sizeof(*metadata) + name_len + 1 + data_size; if (data_end > fw->size) { @@ -937,6 +931,7 @@ static int parse_build_info_metadata_entry(struct kbase_device *kbdev, const u32 meta_start_addr = entry[0]; char *ptr = NULL; size_t sha_pattern_len = strlen(BUILD_INFO_GIT_SHA_PATTERN); + CSTD_UNUSED(size); /* Only print git SHA to avoid releasing sensitive information */ ptr = strstr(fw->data + meta_start_addr, BUILD_INFO_GIT_SHA_PATTERN); @@ -944,7 +939,7 @@ static int parse_build_info_metadata_entry(struct kbase_device *kbdev, if (ptr && strlen(ptr) >= BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + sha_pattern_len) { char git_sha[BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + 1]; - int i = 0; + unsigned int i = 0; /* Move ptr to start of SHA */ ptr += sha_pattern_len; @@ -1001,7 +996,7 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs if ((offset % sizeof(*entry)) || (size % sizeof(*entry))) { dev_err(kbdev->dev, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", - offset, size); + offset, size); return -EINVAL; } @@ -1019,7 +1014,7 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs /* Interface memory setup */ if (size < INTERFACE_ENTRY_NAME_OFFSET + sizeof(*entry)) { dev_err(kbdev->dev, "Interface memory setup entry too short (size=%u)\n", - size); + size); return -EINVAL; } return parse_memory_setup_entry(kbdev, fw, entry, size); @@ -1027,25 +1022,21 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs /* Configuration option */ if (size < CONFIGURATION_ENTRY_NAME_OFFSET + sizeof(*entry)) { dev_err(kbdev->dev, "Configuration option entry too short (size=%u)\n", - size); + size); return -EINVAL; } - return kbase_csf_firmware_cfg_option_entry_parse( - kbdev, fw, entry, size, updatable); + return kbase_csf_firmware_cfg_option_entry_parse(kbdev, fw, entry, size, updatable); case CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER: /* Trace buffer */ if (size < TRACE_BUFFER_ENTRY_NAME_OFFSET + sizeof(*entry)) { - dev_err(kbdev->dev, "Trace Buffer entry too short (size=%u)\n", - size); + dev_err(kbdev->dev, "Trace Buffer entry too short (size=%u)\n", size); return -EINVAL; } - return kbase_csf_firmware_parse_trace_buffer_entry( - kbdev, entry, size, updatable); + return kbase_csf_firmware_parse_trace_buffer_entry(kbdev, entry, size, updatable); case CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA: /* Meta data section */ if (size < TL_METADATA_ENTRY_NAME_OFFSET + sizeof(*entry)) { - dev_err(kbdev->dev, "Timeline metadata entry too short (size=%u)\n", - size); + dev_err(kbdev->dev, "Timeline metadata entry too short (size=%u)\n", size); return -EINVAL; } return parse_timeline_metadata_entry(kbdev, fw, entry, size); @@ -1059,8 +1050,7 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST: /* Function call list section */ if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) { - dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n", - size); + dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n", size); return -EINVAL; } kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry); @@ -1109,16 +1099,13 @@ static void free_global_iface(struct kbase_device *kbdev) */ static inline void *iface_gpu_va_to_cpu(struct kbase_device *kbdev, u32 gpu_va) { - struct kbase_csf_firmware_interface *interface = - kbdev->csf.shared_interface; + struct kbase_csf_firmware_interface *interface = kbdev->csf.shared_interface; u8 *kernel_base = interface->kernel_map; if (gpu_va < interface->virtual || gpu_va >= interface->virtual + interface->num_pages * PAGE_SIZE) { - dev_err(kbdev->dev, - "Interface address 0x%x not within %u-page region at 0x%x", - gpu_va, interface->num_pages, - interface->virtual); + dev_err(kbdev->dev, "Interface address 0x%x not within %u-page region at 0x%x", + gpu_va, interface->num_pages, interface->virtual); return NULL; } @@ -1126,15 +1113,12 @@ static inline void *iface_gpu_va_to_cpu(struct kbase_device *kbdev, u32 gpu_va) } static int parse_cmd_stream_info(struct kbase_device *kbdev, - struct kbase_csf_cmd_stream_info *sinfo, - u32 *stream_base) + struct kbase_csf_cmd_stream_info *sinfo, u32 *stream_base) { sinfo->kbdev = kbdev; - sinfo->features = stream_base[STREAM_FEATURES/4]; - sinfo->input = iface_gpu_va_to_cpu(kbdev, - stream_base[STREAM_INPUT_VA/4]); - sinfo->output = iface_gpu_va_to_cpu(kbdev, - stream_base[STREAM_OUTPUT_VA/4]); + sinfo->features = stream_base[STREAM_FEATURES / 4]; + sinfo->input = iface_gpu_va_to_cpu(kbdev, stream_base[STREAM_INPUT_VA / 4]); + sinfo->output = iface_gpu_va_to_cpu(kbdev, stream_base[STREAM_OUTPUT_VA / 4]); if (sinfo->input == NULL || sinfo->output == NULL) return -EINVAL; @@ -1143,57 +1127,48 @@ static int parse_cmd_stream_info(struct kbase_device *kbdev, } static int parse_cmd_stream_group_info(struct kbase_device *kbdev, - struct kbase_csf_cmd_stream_group_info *ginfo, - u32 *group_base, u32 group_stride) + struct kbase_csf_cmd_stream_group_info *ginfo, + u32 *group_base, u32 group_stride) { unsigned int sid; ginfo->kbdev = kbdev; - ginfo->features = group_base[GROUP_FEATURES/4]; - ginfo->input = iface_gpu_va_to_cpu(kbdev, - group_base[GROUP_INPUT_VA/4]); - ginfo->output = iface_gpu_va_to_cpu(kbdev, - group_base[GROUP_OUTPUT_VA/4]); + ginfo->features = group_base[GROUP_FEATURES / 4]; + ginfo->input = iface_gpu_va_to_cpu(kbdev, group_base[GROUP_INPUT_VA / 4]); + ginfo->output = iface_gpu_va_to_cpu(kbdev, group_base[GROUP_OUTPUT_VA / 4]); if (ginfo->input == NULL || ginfo->output == NULL) return -ENOMEM; - ginfo->suspend_size = group_base[GROUP_SUSPEND_SIZE/4]; - ginfo->protm_suspend_size = group_base[GROUP_PROTM_SUSPEND_SIZE/4]; - ginfo->stream_num = group_base[GROUP_STREAM_NUM/4]; + ginfo->suspend_size = group_base[GROUP_SUSPEND_SIZE / 4]; + ginfo->protm_suspend_size = group_base[GROUP_PROTM_SUSPEND_SIZE / 4]; + ginfo->stream_num = group_base[GROUP_STREAM_NUM / 4]; if (ginfo->stream_num < MIN_SUPPORTED_STREAMS_PER_GROUP || - ginfo->stream_num > MAX_SUPPORTED_STREAMS_PER_GROUP) { - dev_err(kbdev->dev, "CSG with %u CSs out of range %u-%u", - ginfo->stream_num, - MIN_SUPPORTED_STREAMS_PER_GROUP, - MAX_SUPPORTED_STREAMS_PER_GROUP); + ginfo->stream_num > MAX_SUPPORTED_STREAMS_PER_GROUP) { + dev_err(kbdev->dev, "CSG with %u CSs out of range %u-%u", ginfo->stream_num, + MIN_SUPPORTED_STREAMS_PER_GROUP, MAX_SUPPORTED_STREAMS_PER_GROUP); return -EINVAL; } - ginfo->stream_stride = group_base[GROUP_STREAM_STRIDE/4]; + ginfo->stream_stride = group_base[GROUP_STREAM_STRIDE / 4]; if (ginfo->stream_num * ginfo->stream_stride > group_stride) { - dev_err(kbdev->dev, - "group stride of 0x%x exceeded by %u CSs with stride 0x%x", - group_stride, ginfo->stream_num, - ginfo->stream_stride); + dev_err(kbdev->dev, "group stride of 0x%x exceeded by %u CSs with stride 0x%x", + group_stride, ginfo->stream_num, ginfo->stream_stride); return -EINVAL; } - ginfo->streams = kmalloc_array(ginfo->stream_num, - sizeof(*ginfo->streams), GFP_KERNEL); + ginfo->streams = kmalloc_array(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL); if (!ginfo->streams) return -ENOMEM; for (sid = 0; sid < ginfo->stream_num; sid++) { int err; - u32 *stream_base = group_base + (STREAM_CONTROL_0 + - ginfo->stream_stride * sid) / 4; + u32 *stream_base = group_base + (STREAM_CONTROL_0 + ginfo->stream_stride * sid) / 4; - err = parse_cmd_stream_info(kbdev, &ginfo->streams[sid], - stream_base); + err = parse_cmd_stream_info(kbdev, &ginfo->streams[sid], stream_base); if (err < 0) { /* caller will free the memory for CSs array */ return err; @@ -1205,17 +1180,15 @@ static int parse_cmd_stream_group_info(struct kbase_device *kbdev, static u32 get_firmware_version(struct kbase_device *kbdev) { - struct kbase_csf_firmware_interface *interface = - kbdev->csf.shared_interface; + struct kbase_csf_firmware_interface *interface = kbdev->csf.shared_interface; u32 *shared_info = interface->kernel_map; - return shared_info[GLB_VERSION/4]; + return shared_info[GLB_VERSION / 4]; } static int parse_capabilities(struct kbase_device *kbdev) { - struct kbase_csf_firmware_interface *interface = - kbdev->csf.shared_interface; + struct kbase_csf_firmware_interface *interface = kbdev->csf.shared_interface; u32 *shared_info = interface->kernel_map; struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; unsigned int gid; @@ -1232,59 +1205,50 @@ static int parse_capabilities(struct kbase_device *kbdev) return -EINVAL; } - iface->kbdev = kbdev; - iface->features = shared_info[GLB_FEATURES/4]; - iface->input = iface_gpu_va_to_cpu(kbdev, shared_info[GLB_INPUT_VA/4]); - iface->output = iface_gpu_va_to_cpu(kbdev, - shared_info[GLB_OUTPUT_VA/4]); + iface->features = shared_info[GLB_FEATURES / 4]; + iface->input = iface_gpu_va_to_cpu(kbdev, shared_info[GLB_INPUT_VA / 4]); + iface->output = iface_gpu_va_to_cpu(kbdev, shared_info[GLB_OUTPUT_VA / 4]); if (iface->input == NULL || iface->output == NULL) return -ENOMEM; - iface->group_num = shared_info[GLB_GROUP_NUM/4]; + iface->group_num = shared_info[GLB_GROUP_NUM / 4]; - if (iface->group_num < MIN_SUPPORTED_CSGS || - iface->group_num > MAX_SUPPORTED_CSGS) { - dev_err(kbdev->dev, - "Interface containing %u CSGs outside of range %u-%u", - iface->group_num, MIN_SUPPORTED_CSGS, - MAX_SUPPORTED_CSGS); + if (iface->group_num < MIN_SUPPORTED_CSGS || iface->group_num > MAX_SUPPORTED_CSGS) { + dev_err(kbdev->dev, "Interface containing %u CSGs outside of range %u-%u", + iface->group_num, MIN_SUPPORTED_CSGS, MAX_SUPPORTED_CSGS); return -EINVAL; } - iface->group_stride = shared_info[GLB_GROUP_STRIDE/4]; - iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4]; + iface->group_stride = shared_info[GLB_GROUP_STRIDE / 4]; + iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE / 4]; if (iface->version >= kbase_csf_interface_version(1, 1, 0)) iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4]; else iface->instr_features = 0; - if ((GROUP_CONTROL_0 + - (unsigned long)iface->group_num * iface->group_stride) > - (interface->num_pages * PAGE_SIZE)) { + if ((GROUP_CONTROL_0 + (unsigned long)iface->group_num * iface->group_stride) > + (interface->num_pages * PAGE_SIZE)) { dev_err(kbdev->dev, - "interface size of %u pages exceeded by %u CSGs with stride 0x%x", - interface->num_pages, iface->group_num, - iface->group_stride); + "interface size of %u pages exceeded by %u CSGs with stride 0x%x", + interface->num_pages, iface->group_num, iface->group_stride); return -EINVAL; } WARN_ON(iface->groups); - iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), - GFP_KERNEL); + iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL); if (!iface->groups) return -ENOMEM; for (gid = 0; gid < iface->group_num; gid++) { int err; - u32 *group_base = shared_info + (GROUP_CONTROL_0 + - iface->group_stride * gid) / 4; + u32 *group_base = shared_info + (GROUP_CONTROL_0 + iface->group_stride * gid) / 4; - err = parse_cmd_stream_group_info(kbdev, &iface->groups[gid], - group_base, iface->group_stride); + err = parse_cmd_stream_group_info(kbdev, &iface->groups[gid], group_base, + iface->group_stride); if (err < 0) { free_global_iface(kbdev); return err; @@ -1295,39 +1259,41 @@ static int parse_capabilities(struct kbase_device *kbdev) } static inline void access_firmware_memory_common(struct kbase_device *kbdev, - struct kbase_csf_firmware_interface *interface, u32 offset_bytes, - u32 *value, const bool read) + struct kbase_csf_firmware_interface *interface, + u32 offset_bytes, u32 *value, const bool read) { u32 page_num = offset_bytes >> PAGE_SHIFT; u32 offset_in_page = offset_bytes & ~PAGE_MASK; struct page *target_page = as_page(interface->phys[page_num]); - uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page); + uintptr_t cpu_addr = (uintptr_t)kbase_kmap_atomic(target_page); u32 *addr = (u32 *)(cpu_addr + offset_in_page); if (read) { kbase_sync_single_for_device(kbdev, - kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page, - sizeof(u32), DMA_BIDIRECTIONAL); + kbase_dma_addr_from_tagged(interface->phys[page_num]) + + offset_in_page, + sizeof(u32), DMA_BIDIRECTIONAL); *value = *addr; } else { *addr = *value; kbase_sync_single_for_device(kbdev, - kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page, - sizeof(u32), DMA_BIDIRECTIONAL); + kbase_dma_addr_from_tagged(interface->phys[page_num]) + + offset_in_page, + sizeof(u32), DMA_BIDIRECTIONAL); } - kunmap_atomic((u32 *)cpu_addr); + kbase_kunmap_atomic((u32 *)cpu_addr); } -static inline void access_firmware_memory(struct kbase_device *kbdev, - u32 gpu_addr, u32 *value, const bool read) +static inline void access_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 *value, + const bool read) { struct kbase_csf_firmware_interface *interface, *access_interface = NULL; u32 offset_bytes = 0; list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { if ((gpu_addr >= interface->virtual) && - (gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) { + (gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) { offset_bytes = gpu_addr - interface->virtual; access_interface = interface; break; @@ -1340,16 +1306,16 @@ static inline void access_firmware_memory(struct kbase_device *kbdev, dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr); } -static inline void access_firmware_memory_exe(struct kbase_device *kbdev, - u32 gpu_addr, u32 *value, const bool read) +static inline void access_firmware_memory_exe(struct kbase_device *kbdev, u32 gpu_addr, u32 *value, + const bool read) { struct kbase_csf_firmware_interface *interface, *access_interface = NULL; u32 offset_bytes = 0; list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { if ((gpu_addr >= interface->virtual_exe_start) && - (gpu_addr < interface->virtual_exe_start + - (interface->num_pages << PAGE_SHIFT))) { + (gpu_addr < + interface->virtual_exe_start + (interface->num_pages << PAGE_SHIFT))) { offset_bytes = gpu_addr - interface->virtual_exe_start; access_interface = interface; @@ -1370,110 +1336,96 @@ static inline void access_firmware_memory_exe(struct kbase_device *kbdev, dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr); } -void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, - u32 gpu_addr, u32 *value) +void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 *value) { access_firmware_memory(kbdev, gpu_addr, value, true); } -void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, - u32 gpu_addr, u32 value) +void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 value) { access_firmware_memory(kbdev, gpu_addr, &value, false); } -void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, - u32 gpu_addr, u32 *value) +void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, u32 gpu_addr, u32 *value) { access_firmware_memory_exe(kbdev, gpu_addr, value, true); } -void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, - u32 gpu_addr, u32 value) +void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, u32 gpu_addr, u32 value) { access_firmware_memory_exe(kbdev, gpu_addr, &value, false); } -void kbase_csf_firmware_cs_input( - const struct kbase_csf_cmd_stream_info *const info, const u32 offset, - const u32 value) +void kbase_csf_firmware_cs_input(const struct kbase_csf_cmd_stream_info *const info, + const u32 offset, const u32 value) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); input_page_write(info->input, offset, value); } -u32 kbase_csf_firmware_cs_input_read( - const struct kbase_csf_cmd_stream_info *const info, - const u32 offset) +u32 kbase_csf_firmware_cs_input_read(const struct kbase_csf_cmd_stream_info *const info, + const u32 offset) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; u32 const val = input_page_read(info->input, offset); dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); return val; } -void kbase_csf_firmware_cs_input_mask( - const struct kbase_csf_cmd_stream_info *const info, const u32 offset, - const u32 value, const u32 mask) +void kbase_csf_firmware_cs_input_mask(const struct kbase_csf_cmd_stream_info *const info, + const u32 offset, const u32 value, const u32 mask) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; - dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", - offset, value, mask); + dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", offset, value, mask); input_page_partial_write(info->input, offset, value, mask); } -u32 kbase_csf_firmware_cs_output( - const struct kbase_csf_cmd_stream_info *const info, const u32 offset) +u32 kbase_csf_firmware_cs_output(const struct kbase_csf_cmd_stream_info *const info, + const u32 offset) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; u32 const val = output_page_read(info->output, offset); dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); return val; } -void kbase_csf_firmware_csg_input( - const struct kbase_csf_cmd_stream_group_info *const info, - const u32 offset, const u32 value) +void kbase_csf_firmware_csg_input(const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset, const u32 value) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; - dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", - offset, value); + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", offset, value); input_page_write(info->input, offset, value); } -u32 kbase_csf_firmware_csg_input_read( - const struct kbase_csf_cmd_stream_group_info *const info, - const u32 offset) +u32 kbase_csf_firmware_csg_input_read(const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; u32 const val = input_page_read(info->input, offset); dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); return val; } -void kbase_csf_firmware_csg_input_mask( - const struct kbase_csf_cmd_stream_group_info *const info, - const u32 offset, const u32 value, const u32 mask) +void kbase_csf_firmware_csg_input_mask(const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset, const u32 value, const u32 mask) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; - dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", - offset, value, mask); + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", offset, value, mask); input_page_partial_write(info->input, offset, value, mask); } -u32 kbase_csf_firmware_csg_output( - const struct kbase_csf_cmd_stream_group_info *const info, - const u32 offset) +u32 kbase_csf_firmware_csg_output(const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; u32 const val = output_page_read(info->output, offset); dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); @@ -1481,43 +1433,41 @@ u32 kbase_csf_firmware_csg_output( } KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); -void kbase_csf_firmware_global_input( - const struct kbase_csf_global_iface *const iface, const u32 offset, - const u32 value) + +void kbase_csf_firmware_global_input(const struct kbase_csf_global_iface *const iface, + const u32 offset, const u32 value) { - const struct kbase_device * const kbdev = iface->kbdev; + const struct kbase_device *const kbdev = iface->kbdev; dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); input_page_write(iface->input, offset, value); } KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); -void kbase_csf_firmware_global_input_mask( - const struct kbase_csf_global_iface *const iface, const u32 offset, - const u32 value, const u32 mask) +void kbase_csf_firmware_global_input_mask(const struct kbase_csf_global_iface *const iface, + const u32 offset, const u32 value, const u32 mask) { - const struct kbase_device * const kbdev = iface->kbdev; + const struct kbase_device *const kbdev = iface->kbdev; - dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", - offset, value, mask); + dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", offset, value, mask); input_page_partial_write(iface->input, offset, value, mask); } KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask); -u32 kbase_csf_firmware_global_input_read( - const struct kbase_csf_global_iface *const iface, const u32 offset) +u32 kbase_csf_firmware_global_input_read(const struct kbase_csf_global_iface *const iface, + const u32 offset) { - const struct kbase_device * const kbdev = iface->kbdev; + const struct kbase_device *const kbdev = iface->kbdev; u32 const val = input_page_read(iface->input, offset); dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); return val; } -u32 kbase_csf_firmware_global_output( - const struct kbase_csf_global_iface *const iface, const u32 offset) +u32 kbase_csf_firmware_global_output(const struct kbase_csf_global_iface *const iface, + const u32 offset) { - const struct kbase_device * const kbdev = iface->kbdev; + const struct kbase_device *const kbdev = iface->kbdev; u32 const val = output_page_read(iface->output, offset); dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); @@ -1536,12 +1486,12 @@ static u32 csf_doorbell_offset(int doorbell_nr) WARN_ON(doorbell_nr < 0); WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); - return CSF_HW_DOORBELL_PAGE_OFFSET + (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE); + return DOORBELL_BLOCK_OFFSET(doorbell_nr, DOORBELL); } void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr) { - kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1); + kbase_reg_write32(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1); } EXPORT_SYMBOL(kbase_csf_ring_doorbell); @@ -1580,7 +1530,7 @@ static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) continue; } - fault = (struct kbase_fault) { + fault = (struct kbase_fault){ .status = GPU_EXCEPTION_TYPE_SW_FAULT_1, }; @@ -1588,8 +1538,7 @@ static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) kbase_ctx_sched_release_ctx_lock(kctx); } - if (kbase_prepare_to_reset_gpu(kbdev, - RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -1608,20 +1557,16 @@ static void firmware_error_worker(struct work_struct *const data) handle_internal_firmware_fatal(kbdev); } -static bool global_request_complete(struct kbase_device *const kbdev, - u32 const req_mask) +static bool global_request_complete(struct kbase_device *const kbdev, u32 const req_mask) { - struct kbase_csf_global_iface *global_iface = - &kbdev->csf.global_iface; + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; bool complete = false; unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); - if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & - req_mask) == - (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & - req_mask)) + if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & req_mask)) complete = true; kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -1637,8 +1582,7 @@ static int wait_for_global_request_with_timeout(struct kbase_device *const kbdev int err = 0; remaining = wait_event_timeout(kbdev->csf.event_wait, - global_request_complete(kbdev, req_mask), - wait_timeout); + global_request_complete(kbdev, req_mask), wait_timeout); if (!remaining) { dev_warn(kbdev->dev, @@ -1656,9 +1600,8 @@ static int wait_for_global_request(struct kbase_device *const kbdev, u32 const r return wait_for_global_request_with_timeout(kbdev, req_mask, kbdev->csf.fw_timeout_ms); } -static void set_global_request( - const struct kbase_csf_global_iface *const global_iface, - u32 const req_mask) +static void set_global_request(const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) { u32 glb_req; @@ -1666,36 +1609,35 @@ static void set_global_request( glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK); glb_req ^= req_mask; - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, - req_mask); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, req_mask); } -static void enable_endpoints_global( - const struct kbase_csf_global_iface *const global_iface, - u64 const shader_core_mask) +static void enable_endpoints_global(const struct kbase_csf_global_iface *const global_iface, + u64 const shader_core_mask) { - kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, - shader_core_mask & U32_MAX); - kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, - shader_core_mask >> 32); + kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, shader_core_mask & U32_MAX); + kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, shader_core_mask >> 32); set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK); } -static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, - const struct kbase_csf_global_iface *const global_iface) +static void set_shader_poweroff_timer(struct kbase_device *const kbdev, + const struct kbase_csf_global_iface *const global_iface) { u32 pwroff_reg; if (kbdev->csf.firmware_hctl_core_pwr) - pwroff_reg = - GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER, - GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); + pwroff_reg = GLB_PWROFF_TIMER_TIMER_SOURCE_SET( + DISABLE_GLB_PWROFF_TIMER, GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); else pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count; - kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, - pwroff_reg); + kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, pwroff_reg); + + kbase_csf_firmware_global_input_mask(global_iface, GLB_PWROFF_TIMER_CONFIG, + kbdev->csf.mcu_core_pwroff_dur_count_modifier, + GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK); + set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); /* Save the programed reg value in its shadow field */ @@ -1704,12 +1646,11 @@ static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, dev_dbg(kbdev->dev, "GLB_PWROFF_TIMER set to 0x%.8x\n", pwroff_reg); } -static void set_timeout_global( - const struct kbase_csf_global_iface *const global_iface, - u64 const timeout) +static void set_timeout_global(const struct kbase_csf_global_iface *const global_iface, + u64 const timeout) { kbase_csf_firmware_global_input(global_iface, GLB_PROGRESS_TIMER, - timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); + timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); } @@ -1722,6 +1663,11 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, kbdev->csf.gpu_idle_dur_count); + + kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG, + kbdev->csf.gpu_idle_dur_count_modifier, + GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK); dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", @@ -1758,8 +1704,7 @@ static void set_global_debug_request(const struct kbase_csf_global_iface *const kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); } -static void request_fw_core_dump( - const struct kbase_csf_global_iface *const global_iface) +static void request_fw_core_dump(const struct kbase_csf_global_iface *const global_iface) { uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); @@ -1770,8 +1715,7 @@ static void request_fw_core_dump( int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) { - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags; int ret; @@ -1804,26 +1748,23 @@ int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) */ static void kbasep_enable_rtu(struct kbase_device *kbdev) { - const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - - if (gpu_id < GPU_ID2_PRODUCT_MAKE(12, 8, 3, 0)) + if (!kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(SHADER_PWRFEATURES))) return; if (kbdev->csf.firmware_hctl_core_pwr) - kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_PWRFEATURES), 1); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(SHADER_PWRFEATURES), 1); } static void global_init(struct kbase_device *const kbdev, u64 core_mask) { - u32 const ack_irq_mask = + u32 ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK | GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); @@ -1832,7 +1773,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) /* Update shader core allocation enable mask */ enable_endpoints_global(global_iface, core_mask); - enable_shader_poweroff_timer(kbdev, global_iface); + set_shader_poweroff_timer(kbdev, global_iface); set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); @@ -1843,8 +1784,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) enable_gpu_idle_timer(kbdev); /* Unmask the interrupts */ - kbase_csf_firmware_global_input(global_iface, - GLB_ACK_IRQ_MASK, ack_irq_mask); + kbase_csf_firmware_global_input(global_iface, GLB_ACK_IRQ_MASK, ack_irq_mask); #if IS_ENABLED(CONFIG_MALI_CORESIGHT) /* Enable FW MCU read/write debug interfaces */ @@ -1873,26 +1813,38 @@ static int global_init_on_boot(struct kbase_device *const kbdev) { unsigned long flags; u64 core_mask; + int ret = 0; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); core_mask = kbase_pm_ca_get_core_mask(kbdev); - kbdev->csf.firmware_hctl_core_pwr = - kbase_pm_no_mcu_core_pwroff(kbdev); + kbdev->csf.firmware_hctl_core_pwr = kbase_pm_no_mcu_core_pwroff(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); global_init(kbdev, core_mask); - return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); + ret = wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); + +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + if (!ret) { + kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + + if (!kbase_debug_coresight_csf_state_wait(kbdev, + KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { + dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled"); + ret = -ETIME; + } + } +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + + return ret; } -void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, - u64 core_mask) +void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, u64 core_mask) { lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->csf.glb_init_request_pending = true; - kbdev->csf.firmware_hctl_core_pwr = - kbase_pm_no_mcu_core_pwroff(kbdev); + kbdev->csf.firmware_hctl_core_pwr = kbase_pm_no_mcu_core_pwroff(kbdev); global_init(kbdev, core_mask); } @@ -1907,8 +1859,8 @@ bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev) return !kbdev->csf.glb_init_request_pending; } -void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, - bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask) +void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, bool update_core_pwroff_timer, + bool update_core_mask, u64 core_mask) { unsigned long flags; @@ -1918,7 +1870,7 @@ void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, if (update_core_mask) enable_endpoints_global(&kbdev->csf.global_iface, core_mask); if (update_core_pwroff_timer) - enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); + set_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -1928,8 +1880,8 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK | - GLB_REQ_CFG_PWROFF_TIMER_MASK); + return global_request_complete(kbdev, + GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PWROFF_TIMER_MASK); } /** @@ -1947,8 +1899,8 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) */ static void kbase_csf_firmware_reload_worker(struct work_struct *work) { - struct kbase_device *kbdev = container_of(work, struct kbase_device, - csf.firmware_reload_work); + struct kbase_device *kbdev = + container_of(work, struct kbase_device, csf.firmware_reload_work); int err; dev_info(kbdev->dev, "reloading firmware"); @@ -1962,6 +1914,10 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) kbase_csf_tl_reader_reset(&kbdev->timeline->csf_tl_reader); + err = kbase_csf_firmware_cfg_fw_wa_enable(kbdev); + if (WARN_ON(err)) + return; + /* Reboot the firmware */ kbase_csf_firmware_enable_mcu(kbdev); } @@ -1989,6 +1945,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) if (unlikely(!kbdev->csf.firmware_inited)) return; + /* Check firmware rebooted properly: we do not expect * the version number to change with a running reboot. */ @@ -2004,12 +1961,12 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); } -static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us) +static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ns, u32 *modifier) { #define HYSTERESIS_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); - u64 dur_val = dur_us; + u64 dur_val = dur_ns; u32 cnt_val_u32, reg_val_u32; bool src_system_timestamp = freq > 0; @@ -2027,21 +1984,24 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_u "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); } - /* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */ - dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; - dur_val = div_u64(dur_val, 1000000); + /* Formula for dur_val = (dur/1e9) * freq_HZ) */ + dur_val = dur_val * freq; + dur_val = div_u64(dur_val, NSEC_PER_SEC); + if (dur_val < S32_MAX) { + *modifier = 1; + } else { + dur_val = dur_val >> HYSTERESIS_VAL_UNIT_SHIFT; + *modifier = 0; + } /* Interface limits the value field to S32_MAX */ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); /* add the source flag */ - if (src_system_timestamp) - reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); - else - reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); + reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET( + reg_val_u32, (src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP : + GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER)); return reg_val_u32; } @@ -2052,16 +2012,18 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 dur; kbase_csf_scheduler_spin_lock(kbdev, &flags); - dur = kbdev->csf.gpu_idle_hysteresis_us; + dur = kbdev->csf.gpu_idle_hysteresis_ns; kbase_csf_scheduler_spin_unlock(kbdev, flags); return dur; } -u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) +u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur_ns) { unsigned long flags; - const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); + u32 modifier = 0; + + const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur_ns, &modifier); /* The 'fw_load_lock' is taken to synchronize against the deferred * loading of FW, where the idle timer will be enabled. @@ -2069,19 +2031,28 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_lock(&kbdev->fw_load_lock); if (unlikely(!kbdev->csf.firmware_inited)) { kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_hysteresis_ns = dur_ns; kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbdev->csf.gpu_idle_dur_count_modifier = modifier; kbase_csf_scheduler_spin_unlock(kbdev, flags); mutex_unlock(&kbdev->fw_load_lock); goto end; } mutex_unlock(&kbdev->fw_load_lock); + if (kbase_reset_gpu_prevent_and_wait(kbdev)) { + dev_warn(kbdev->dev, + "Failed to prevent GPU reset when updating idle_hysteresis_time"); + return kbdev->csf.gpu_idle_dur_count; + } + kbase_csf_scheduler_pm_active(kbdev); - if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { + if (kbase_csf_scheduler_killable_wait_mcu_active(kbdev)) { dev_err(kbdev->dev, "Unable to activate the MCU, the idle hysteresis value shall remain unchanged"); kbase_csf_scheduler_pm_idle(kbdev); + kbase_reset_gpu_allow(kbdev); + return kbdev->csf.gpu_idle_dur_count; } @@ -2100,30 +2071,34 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_hysteresis_ns = dur_ns; kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbdev->csf.gpu_idle_dur_count_modifier = modifier; kbase_csf_firmware_enable_gpu_idle_timer(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); mutex_unlock(&kbdev->csf.reg_lock); kbase_csf_scheduler_pm_idle(kbdev); - + kbase_reset_gpu_allow(kbdev); end: - dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", - hysteresis_val); + dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", hysteresis_val); return hysteresis_val; } -static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) +static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_ns, + u32 *modifier) { /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); - u64 dur_val = dur_us; + u64 dur_val = dur_ns; u32 cnt_val_u32, reg_val_u32; bool src_system_timestamp = freq > 0; + const struct kbase_pm_policy *current_policy = kbase_pm_get_policy(kbdev); + bool always_on = current_policy == &kbase_pm_always_on_policy_ops; + if (!src_system_timestamp) { /* Get the cycle_counter source alternative */ spin_lock(&kbdev->pm.clk_rtm.lock); @@ -2138,21 +2113,32 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3 "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!"); } - /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ - dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; - dur_val = div_u64(dur_val, 1000000); + /* Formula for dur_val = (dur/1e9) * freq_HZ) */ + dur_val = dur_val * freq; + dur_val = div_u64(dur_val, NSEC_PER_SEC); + if (dur_val < S32_MAX) { + *modifier = 1; + } else { + dur_val = dur_val >> HYSTERESIS_VAL_UNIT_SHIFT; + *modifier = 0; + } - /* Interface limits the value field to S32_MAX */ - cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; + if (dur_val == 0 && !always_on) { + /* Lower Bound - as 0 disables timeout and host controls shader-core power management. */ + cnt_val_u32 = 1; + } else if (dur_val > S32_MAX) { + /* Upper Bound - as interface limits the field to S32_MAX */ + cnt_val_u32 = S32_MAX; + } else { + cnt_val_u32 = (u32)dur_val; + } reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); /* add the source flag */ - if (src_system_timestamp) - reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); - else - reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); + reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET( + reg_val_u32, + (src_system_timestamp ? GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP : + GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER)); return reg_val_u32; } @@ -2163,20 +2149,23 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - pwroff = kbdev->csf.mcu_core_pwroff_dur_us; + pwroff = kbdev->csf.mcu_core_pwroff_dur_ns; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return pwroff; } -u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) +u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur_ns) { unsigned long flags; - const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); + u32 modifier = 0; + + const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur_ns, &modifier); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->csf.mcu_core_pwroff_dur_us = dur; + kbdev->csf.mcu_core_pwroff_dur_ns = dur_ns; kbdev->csf.mcu_core_pwroff_dur_count = pwroff; + kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); @@ -2184,6 +2173,11 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 return pwroff; } +u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev) +{ + return kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS); +} + /** * kbase_device_csf_iterator_trace_init - Send request to enable iterator * trace port. @@ -2195,24 +2189,27 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev) { /* Enable the iterator trace port if supported by the GPU. - * It requires the GPU to have a nonzero "iter_trace_enable" + * It requires the GPU to have a nonzero "iter-trace-enable" * property in the device tree, and the FW must advertise * this feature in GLB_FEATURES. */ if (kbdev->pm.backend.gpu_powered) { - /* check device tree for iterator trace enable property */ - const void *iter_trace_param = of_get_property( - kbdev->dev->of_node, - "iter_trace_enable", NULL); + /* check device tree for iterator trace enable property + * and fallback to "iter_trace_enable" if it is not found + */ + const void *iter_trace_param = + of_get_property(kbdev->dev->of_node, "iter-trace-enable", NULL); - const struct kbase_csf_global_iface *iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + + if (!iter_trace_param) + iter_trace_param = + of_get_property(kbdev->dev->of_node, "iter_trace_enable", NULL); if (iter_trace_param) { u32 iter_trace_value = be32_to_cpup(iter_trace_param); - if ((iface->features & - GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) && + if ((iface->features & GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) && iter_trace_value) { long ack_timeout; @@ -2221,27 +2218,21 @@ static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev) /* write enable request to global input */ kbase_csf_firmware_global_input_mask( - iface, GLB_REQ, - GLB_REQ_ITER_TRACE_ENABLE_MASK, + iface, GLB_REQ, GLB_REQ_ITER_TRACE_ENABLE_MASK, GLB_REQ_ITER_TRACE_ENABLE_MASK); /* Ring global doorbell */ - kbase_csf_ring_doorbell(kbdev, - CSF_KERNEL_DOORBELL_NR); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ack_timeout = wait_event_timeout( kbdev->csf.event_wait, - !((kbase_csf_firmware_global_input_read( - iface, GLB_REQ) ^ - kbase_csf_firmware_global_output( - iface, GLB_ACK)) & + !((kbase_csf_firmware_global_input_read(iface, GLB_REQ) ^ + kbase_csf_firmware_global_output(iface, GLB_ACK)) & GLB_REQ_ITER_TRACE_ENABLE_MASK), ack_timeout); return ack_timeout ? 0 : -EINVAL; - } } - } return 0; } @@ -2249,25 +2240,22 @@ static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev) int kbase_csf_firmware_early_init(struct kbase_device *kbdev) { init_waitqueue_head(&kbdev->csf.event_wait); - kbdev->csf.interrupt_received = false; - kbdev->csf.fw_timeout_ms = - kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); - - kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US; - kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count( - kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); + kbdev->csf.fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); + kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev); INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); INIT_LIST_HEAD(&kbdev->csf.firmware_config); INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata); INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); INIT_LIST_HEAD(&kbdev->csf.user_reg.list); - INIT_WORK(&kbdev->csf.firmware_reload_work, - kbase_csf_firmware_reload_worker); + INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker); INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); + kbdev->csf.glb_init_request_pending = true; + mutex_init(&kbdev->csf.reg_lock); + kbase_csf_pending_gpuq_kicks_init(kbdev); kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL }; @@ -2276,19 +2264,24 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { + kbase_csf_pending_gpuq_kicks_term(kbdev); mutex_destroy(&kbdev->csf.reg_lock); } int kbase_csf_firmware_late_init(struct kbase_device *kbdev) { - kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; + u32 modifier = 0; + + kbdev->csf.gpu_idle_hysteresis_ns = FIRMWARE_IDLE_HYSTERESIS_TIME_NS; + #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_ns /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ns); kbdev->csf.gpu_idle_dur_count = - convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &modifier); + kbdev->csf.gpu_idle_dur_count_modifier = modifier; return 0; } @@ -2303,6 +2296,7 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) u32 entry_end_offset; u32 entry_offset; int ret; + const char *fw_name = default_fw_name; lockdep_assert_held(&kbdev->fw_load_lock); @@ -2310,8 +2304,7 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) return -EINVAL; kbdev->as_free &= ~MCU_AS_BITMASK; - ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, - BASE_MEM_GROUP_DEFAULT); + ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, BASE_MEM_GROUP_DEFAULT); if (ret != 0) { /* Release the address space */ @@ -2326,10 +2319,35 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) goto err_out; } +#if IS_ENABLED(CONFIG_OF) + /* If we can't read CSF firmware name from DTB, + * fw_name is not modified and remains the default. + */ + ret = of_property_read_string(kbdev->dev->of_node, "firmware-name", &fw_name); + if (ret == -EINVAL) { + /* Property doesn't exist in DTB, and fw_name already points to default FW name + * so just reset return value and continue. + */ + ret = 0; + } else if (ret == -ENODATA) { + dev_warn(kbdev->dev, + "\"firmware-name\" DTB property contains no data, using default FW name"); + /* Reset return value so FW does not fail to load */ + ret = 0; + } else if (ret == -EILSEQ) { + /* This is reached when the size of the fw_name buffer is too small for the string + * stored in the DTB and the null terminator. + */ + dev_warn(kbdev->dev, + "\"firmware-name\" DTB property value too long, using default FW name."); + /* Reset return value so FW does not fail to load */ + ret = 0; + } + +#endif /* IS_ENABLED(CONFIG_OF) */ + if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { - dev_err(kbdev->dev, - "Failed to load firmware image '%s'\n", - fw_name); + dev_err(kbdev->dev, "Failed to load firmware image '%s'\n", fw_name); ret = -ENOENT; } else { /* Try to save a copy and then release the loaded firmware image */ @@ -2367,10 +2385,9 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) version_major = mcu_fw->data[5]; if (version_major != FIRMWARE_HEADER_VERSION_MAJOR || - version_minor != FIRMWARE_HEADER_VERSION_MINOR) { - dev_err(kbdev->dev, - "Firmware header version %d.%d not understood\n", - version_major, version_minor); + version_minor != FIRMWARE_HEADER_VERSION_MINOR) { + dev_err(kbdev->dev, "Firmware header version %d.%d not understood\n", version_major, + version_minor); ret = -EINVAL; goto err_out; } @@ -2411,7 +2428,8 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) } else { ret = setup_shared_iface_static_region(kbdev); if (ret != 0) { - dev_err(kbdev->dev, "Failed to insert a region for shared iface entry parsed from fw image\n"); + dev_err(kbdev->dev, + "Failed to insert a region for shared iface entry parsed from fw image\n"); goto err_out; } } @@ -2422,6 +2440,12 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) goto err_out; } + ret = kbase_csf_firmware_cfg_fw_wa_init(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to initialize firmware workarounds"); + goto err_out; + } + /* Make sure L2 cache is powered up */ kbase_pm_wait_for_l2_powered(kbdev); @@ -2456,6 +2480,12 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) if (ret != 0) goto err_out; + ret = kbase_csf_firmware_log_init(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret); + goto err_out; + } + ret = kbase_csf_firmware_cfg_init(kbdev); if (ret != 0) goto err_out; @@ -2464,19 +2494,12 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) if (ret != 0) goto err_out; - ret = kbase_csf_firmware_log_init(kbdev); - if (ret != 0) { - dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret); - goto err_out; - } - if (kbdev->csf.fw_core_dump.available) kbase_csf_firmware_core_dump_init(kbdev); /* Firmware loaded successfully, ret = 0 */ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL, - (((u64)version_hash) << 32) | - (((u64)version_major) << 8) | version_minor); + (((u64)version_hash) << 32) | (((u64)version_major) << 8) | version_minor); return 0; err_out: @@ -2495,10 +2518,10 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) WARN(ret, "failed to wait for GPU reset"); - kbase_csf_firmware_log_term(kbdev); - kbase_csf_firmware_cfg_term(kbdev); + kbase_csf_firmware_log_term(kbdev); + kbase_csf_timeout_term(kbdev); kbase_csf_free_dummy_user_reg_page(kbdev); @@ -2526,32 +2549,29 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) unload_mmu_tables(kbdev); + kbase_csf_firmware_cfg_fw_wa_term(kbdev); + kbase_csf_firmware_trace_buffers_term(kbdev); while (!list_empty(&kbdev->csf.firmware_interfaces)) { struct kbase_csf_firmware_interface *interface; - interface = - list_first_entry(&kbdev->csf.firmware_interfaces, - struct kbase_csf_firmware_interface, - node); + interface = list_first_entry(&kbdev->csf.firmware_interfaces, + struct kbase_csf_firmware_interface, node); list_del(&interface->node); vunmap(interface->kernel_map); if (!interface->reuse_pages) { if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) { - kbase_csf_protected_memory_free( - kbdev, interface->pma, interface->num_pages_aligned, - interface->is_small_page); + kbase_csf_protected_memory_free(kbdev, interface->pma, + interface->num_pages_aligned, + interface->is_small_page); } else { kbase_mem_pool_free_pages( - kbase_mem_pool_group_select( - kbdev, KBASE_MEM_GROUP_CSF_FW, - interface->is_small_page), - interface->num_pages_aligned, - interface->phys, - true, false); + kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, + interface->is_small_page), + interface->num_pages_aligned, interface->phys, true, false); } kfree(interface->phys); @@ -2563,10 +2583,8 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) while (!list_empty(&kbdev->csf.firmware_timeline_metadata)) { struct firmware_timeline_metadata *metadata; - metadata = list_first_entry( - &kbdev->csf.firmware_timeline_metadata, - struct firmware_timeline_metadata, - node); + metadata = list_first_entry(&kbdev->csf.firmware_timeline_metadata, + struct firmware_timeline_metadata, node); list_del(&metadata->node); kfree(metadata); @@ -2728,9 +2746,8 @@ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) kbase_csf_scheduler_spin_lock_assert_held(kbdev); - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, - GLB_REQ_REQ_IDLE_DISABLE, - GLB_REQ_IDLE_DISABLE_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, + GLB_REQ_IDLE_DISABLE_MASK); dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); @@ -2738,8 +2755,7 @@ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) void kbase_csf_firmware_ping(struct kbase_device *const kbdev) { - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); @@ -2755,11 +2771,9 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms); } -int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, - u64 const timeout) +int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout) { - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags; int err; @@ -2810,14 +2824,13 @@ int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) unsigned long flags; bool pmode_exited; - if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + if (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & GPU_STATUS_PROTECTED_MODE_ACTIVE) break; /* Check if GPU already exited the protected mode */ kbase_csf_scheduler_spin_lock(kbdev, &flags); - pmode_exited = - !kbase_csf_scheduler_protected_mode_in_use(kbdev); + pmode_exited = !kbase_csf_scheduler_protected_mode_in_use(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); if (pmode_exited) break; @@ -2853,7 +2866,7 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) * halt request to firmware. */ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev)); - set_global_request(global_iface, GLB_REQ_HALT_MASK); + set_global_request(global_iface, GLB_REQ_HALT_MASK); dev_dbg(kbdev->dev, "Sending request to HALT MCU"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -2861,13 +2874,12 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) { - KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; - /* Trigger the boot of MCU firmware, Use the AUTO mode as - * otherwise on fast reset, to exit protected mode, MCU will - * not reboot by itself to enter normal mode. - */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); + /* Clear the HALT bit before triggering the boot of MCU firmware */ + kbase_csf_firmware_global_input_mask(iface, GLB_REQ, 0, GLB_REQ_HALT_MASK); + + enable_mcu(kbdev); } #ifdef KBASE_PM_RUNTIME @@ -2879,7 +2891,7 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP(kbdev, kbase_backend_get_cycle_cnt(kbdev)); kbase_csf_scheduler_spin_lock(kbdev, &flags); - set_global_request(global_iface, GLB_REQ_SLEEP_MASK); + set_global_request(global_iface, GLB_REQ_SLEEP_MASK); dev_dbg(kbdev->dev, "Sending sleep request to MCU"); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -2889,11 +2901,21 @@ bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); + return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) && kbase_csf_firmware_mcu_halted(kbdev)); } #endif +bool kbase_csf_firmware_mcu_halt_req_complete(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + + return kbase_csf_firmware_mcu_halted(kbdev); +} + + int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; @@ -2902,7 +2924,9 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) /* Ensure GPU is powered-up until we complete config update.*/ kbase_csf_scheduler_pm_active(kbdev); - kbase_csf_scheduler_wait_mcu_active(kbdev); + err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev); + if (err) + goto exit; /* The 'reg_lock' is also taken and is held till the update is * complete, to ensure the config update gets serialized. @@ -2915,10 +2939,10 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); - err = wait_for_global_request(kbdev, - GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); + err = wait_for_global_request(kbdev, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); mutex_unlock(&kbdev->csf.reg_lock); +exit: kbase_csf_scheduler_pm_idle(kbdev); return err; } @@ -2938,12 +2962,10 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) * * Return: Total number of CSs, summed across all groups. */ -static u32 copy_grp_and_stm( - const struct kbase_csf_global_iface * const iface, - struct basep_cs_group_control * const group_data, - u32 max_group_num, - struct basep_cs_stream_control * const stream_data, - u32 max_total_stream_num) +static u32 copy_grp_and_stm(const struct kbase_csf_global_iface *const iface, + struct basep_cs_group_control *const group_data, u32 max_group_num, + struct basep_cs_stream_control *const stream_data, + u32 max_total_stream_num) { u32 i, total_stream_num = 0; @@ -2959,8 +2981,7 @@ static u32 copy_grp_and_stm( if (i < max_group_num) { group_data[i].features = iface->groups[i].features; group_data[i].stream_num = iface->groups[i].stream_num; - group_data[i].suspend_size = - iface->groups[i].suspend_size; + group_data[i].suspend_size = iface->groups[i].suspend_size; } for (j = 0; j < iface->groups[i].stream_num; j++) { if (total_stream_num < max_total_stream_num) @@ -2973,21 +2994,18 @@ static u32 copy_grp_and_stm( return total_stream_num; } -u32 kbase_csf_firmware_get_glb_iface( - struct kbase_device *kbdev, - struct basep_cs_group_control *const group_data, - u32 const max_group_num, - struct basep_cs_stream_control *const stream_data, - u32 const max_total_stream_num, u32 *const glb_version, - u32 *const features, u32 *const group_num, u32 *const prfcnt_size, - u32 *instr_features) +u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev, + struct basep_cs_group_control *const group_data, + u32 const max_group_num, + struct basep_cs_stream_control *const stream_data, + u32 const max_total_stream_num, u32 *const glb_version, + u32 *const features, u32 *const group_num, + u32 *const prfcnt_size, u32 *instr_features) { - const struct kbase_csf_global_iface * const iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const iface = &kbdev->csf.global_iface; - if (WARN_ON(!glb_version) || WARN_ON(!features) || - WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || - WARN_ON(!instr_features)) + if (WARN_ON(!glb_version) || WARN_ON(!features) || WARN_ON(!group_num) || + WARN_ON(!prfcnt_size) || WARN_ON(!instr_features)) return 0; *glb_version = iface->version; @@ -2996,17 +3014,16 @@ u32 kbase_csf_firmware_get_glb_iface( *prfcnt_size = iface->prfcnt_size; *instr_features = iface->instr_features; - return copy_grp_and_stm(iface, group_data, max_group_num, - stream_data, max_total_stream_num); + return copy_grp_and_stm(iface, group_data, max_group_num, stream_data, + max_total_stream_num); } -const char *kbase_csf_firmware_get_timeline_metadata( - struct kbase_device *kbdev, const char *name, size_t *size) +const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, const char *name, + size_t *size) { struct firmware_timeline_metadata *metadata; - list_for_each_entry( - metadata, &kbdev->csf.firmware_timeline_metadata, node) { + list_for_each_entry(metadata, &kbdev->csf.firmware_timeline_metadata, node) { if (!strcmp(metadata->name, name)) { *size = metadata->size; return metadata->data; @@ -3017,18 +3034,17 @@ const char *kbase_csf_firmware_get_timeline_metadata( return NULL; } -int kbase_csf_firmware_mcu_shared_mapping_init( - struct kbase_device *kbdev, - unsigned int num_pages, - unsigned long cpu_map_properties, - unsigned long gpu_map_properties, - struct kbase_csf_mapping *csf_mapping) +int kbase_csf_firmware_mcu_shared_mapping_init(struct kbase_device *kbdev, unsigned int num_pages, + unsigned long cpu_map_properties, + unsigned long gpu_map_properties, + struct kbase_csf_mapping *csf_mapping) { struct tagged_addr *phys; struct kbase_va_region *va_reg; struct page **page_list; void *cpu_addr; - int i, ret = 0; + unsigned int i; + int ret = 0; pgprot_t cpu_map_prot = PAGE_KERNEL; unsigned long gpu_map_prot; @@ -3036,11 +3052,12 @@ int kbase_csf_firmware_mcu_shared_mapping_init( cpu_map_prot = PAGE_KERNEL_RO; if (kbdev->system_coherency == COHERENCY_ACE) { - gpu_map_prot = - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); + gpu_map_prot = KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_DEFAULT_ACE); + } else if (kbdev->system_coherency == COHERENCY_ACE_LITE) { + gpu_map_prot = KBASE_REG_SHARE_BOTH | + KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_SHARED); } else { - gpu_map_prot = - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + gpu_map_prot = KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_NON_CACHEABLE); cpu_map_prot = pgprot_writecombine(cpu_map_prot); } @@ -3064,8 +3081,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( if (!cpu_addr) goto vmap_error; - va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, - KBASE_REG_ZONE_MCU_SHARED); + va_reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, num_pages); if (!va_reg) goto va_region_alloc_error; @@ -3081,7 +3097,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, &phys[0], num_pages, gpu_map_properties, - KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL); if (ret) goto mmu_insert_pages_error; @@ -3102,9 +3118,8 @@ va_region_add_error: va_region_alloc_error: vunmap(cpu_addr); vmap_error: - kbase_mem_pool_free_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false, false); + kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys, + false, false); phys_mem_pool_alloc_error: kfree(page_list); @@ -3122,8 +3137,8 @@ out: return -ENOMEM; } -void kbase_csf_firmware_mcu_shared_mapping_term( - struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping) +void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev, + struct kbase_csf_mapping *csf_mapping) { if (csf_mapping->va_reg) { mutex_lock(&kbdev->csf.reg_lock); @@ -3133,10 +3148,8 @@ void kbase_csf_firmware_mcu_shared_mapping_term( } if (csf_mapping->phys) { - kbase_mem_pool_free_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - csf_mapping->num_pages, csf_mapping->phys, false, - false); + kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + csf_mapping->num_pages, csf_mapping->phys, false, false); } vunmap(csf_mapping->cpu_addr); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h index 714a14001189..b6f07d001d24 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,7 @@ #include "device/mali_kbase_device.h" #include +#include /* * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in: @@ -56,7 +57,7 @@ #define CSF_NUM_DOORBELL ((u8)24) /* Offset to the first HW doorbell page */ -#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)0x80000) +#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)DOORBELLS_BASE) /* Size of HW Doorbell page, used to calculate the offset to subsequent pages */ #define CSF_HW_DOORBELL_PAGE_SIZE ((u32)0x10000) @@ -80,7 +81,6 @@ struct kbase_device; - /** * struct kbase_csf_mapping - Memory mapping for CSF memory. * @phys: Physical memory allocation used by the mapping. @@ -134,8 +134,8 @@ struct kbase_csf_cmd_stream_info { * @offset: Offset of the word to be written, in bytes. * @value: Value to be written. */ -void kbase_csf_firmware_cs_input( - const struct kbase_csf_cmd_stream_info *info, u32 offset, u32 value); +void kbase_csf_firmware_cs_input(const struct kbase_csf_cmd_stream_info *info, u32 offset, + u32 value); /** * kbase_csf_firmware_cs_input_read() - Read a word in a CS's input page @@ -145,8 +145,8 @@ void kbase_csf_firmware_cs_input( * @info: CSI provided by the firmware. * @offset: Offset of the word to be read, in bytes. */ -u32 kbase_csf_firmware_cs_input_read( - const struct kbase_csf_cmd_stream_info *const info, const u32 offset); +u32 kbase_csf_firmware_cs_input_read(const struct kbase_csf_cmd_stream_info *const info, + const u32 offset); /** * kbase_csf_firmware_cs_input_mask() - Set part of a word in a CS's input page @@ -156,9 +156,8 @@ u32 kbase_csf_firmware_cs_input_read( * @value: Value to be written. * @mask: Bitmask with the bits to be modified set. */ -void kbase_csf_firmware_cs_input_mask( - const struct kbase_csf_cmd_stream_info *info, u32 offset, - u32 value, u32 mask); +void kbase_csf_firmware_cs_input_mask(const struct kbase_csf_cmd_stream_info *info, u32 offset, + u32 value, u32 mask); /** * kbase_csf_firmware_cs_output() - Read a word in a CS's output page @@ -168,8 +167,7 @@ void kbase_csf_firmware_cs_input_mask( * @info: CSI provided by the firmware. * @offset: Offset of the word to be read, in bytes. */ -u32 kbase_csf_firmware_cs_output( - const struct kbase_csf_cmd_stream_info *info, u32 offset); +u32 kbase_csf_firmware_cs_output(const struct kbase_csf_cmd_stream_info *info, u32 offset); /** * struct kbase_csf_cmd_stream_group_info - CSG interface provided by the * firmware. @@ -207,9 +205,8 @@ struct kbase_csf_cmd_stream_group_info { * @offset: Offset of the word to be written, in bytes. * @value: Value to be written. */ -void kbase_csf_firmware_csg_input( - const struct kbase_csf_cmd_stream_group_info *info, u32 offset, - u32 value); +void kbase_csf_firmware_csg_input(const struct kbase_csf_cmd_stream_group_info *info, u32 offset, + u32 value); /** * kbase_csf_firmware_csg_input_read() - Read a word in a CSG's input page @@ -219,8 +216,8 @@ void kbase_csf_firmware_csg_input( * @info: CSG interface provided by the firmware. * @offset: Offset of the word to be read, in bytes. */ -u32 kbase_csf_firmware_csg_input_read( - const struct kbase_csf_cmd_stream_group_info *info, u32 offset); +u32 kbase_csf_firmware_csg_input_read(const struct kbase_csf_cmd_stream_group_info *info, + u32 offset); /** * kbase_csf_firmware_csg_input_mask() - Set part of a word in a CSG's @@ -231,9 +228,8 @@ u32 kbase_csf_firmware_csg_input_read( * @value: Value to be written. * @mask: Bitmask with the bits to be modified set. */ -void kbase_csf_firmware_csg_input_mask( - const struct kbase_csf_cmd_stream_group_info *info, u32 offset, - u32 value, u32 mask); +void kbase_csf_firmware_csg_input_mask(const struct kbase_csf_cmd_stream_group_info *info, + u32 offset, u32 value, u32 mask); /** * kbase_csf_firmware_csg_output()- Read a word in a CSG's output page @@ -243,8 +239,8 @@ void kbase_csf_firmware_csg_input_mask( * @info: CSG interface provided by the firmware. * @offset: Offset of the word to be read, in bytes. */ -u32 kbase_csf_firmware_csg_output( - const struct kbase_csf_cmd_stream_group_info *info, u32 offset); +u32 kbase_csf_firmware_csg_output(const struct kbase_csf_cmd_stream_group_info *info, u32 offset); + /** * struct kbase_csf_global_iface - Global CSF interface @@ -286,8 +282,8 @@ struct kbase_csf_global_iface { * @offset: Offset of the word to be written, in bytes. * @value: Value to be written. */ -void kbase_csf_firmware_global_input( - const struct kbase_csf_global_iface *iface, u32 offset, u32 value); +void kbase_csf_firmware_global_input(const struct kbase_csf_global_iface *iface, u32 offset, + u32 value); /** * kbase_csf_firmware_global_input_mask() - Set part of a word in the global @@ -298,9 +294,8 @@ void kbase_csf_firmware_global_input( * @value: Value to be written. * @mask: Bitmask with the bits to be modified set. */ -void kbase_csf_firmware_global_input_mask( - const struct kbase_csf_global_iface *iface, u32 offset, - u32 value, u32 mask); +void kbase_csf_firmware_global_input_mask(const struct kbase_csf_global_iface *iface, u32 offset, + u32 value, u32 mask); /** * kbase_csf_firmware_global_input_read() - Read a word in a global input page @@ -310,8 +305,7 @@ void kbase_csf_firmware_global_input_mask( * @info: CSG interface provided by the firmware. * @offset: Offset of the word to be read, in bytes. */ -u32 kbase_csf_firmware_global_input_read( - const struct kbase_csf_global_iface *info, u32 offset); +u32 kbase_csf_firmware_global_input_read(const struct kbase_csf_global_iface *info, u32 offset); /** * kbase_csf_firmware_global_output() - Read a word in the global output page @@ -321,8 +315,7 @@ u32 kbase_csf_firmware_global_input_read( * @iface: CSF interface provided by the firmware. * @offset: Offset of the word to be read, in bytes. */ -u32 kbase_csf_firmware_global_output( - const struct kbase_csf_global_iface *iface, u32 offset); +u32 kbase_csf_firmware_global_output(const struct kbase_csf_global_iface *iface, u32 offset); /** * kbase_csf_ring_doorbell() - Ring the doorbell @@ -344,8 +337,7 @@ void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr); * is not permanently mapped on the CPU address space, therefore it maps it * and then unmaps it to access it independently. */ -void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, - u32 gpu_addr, u32 *value); +void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 *value); /** * kbase_csf_update_firmware_memory - Write a value in a GPU address @@ -359,8 +351,7 @@ void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, * is not permanently mapped on the CPU address space, therefore it maps it * and then unmaps it to access it independently. */ -void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, - u32 gpu_addr, u32 value); +void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 value); /** * kbase_csf_read_firmware_memory_exe - Read a value in a GPU address in the @@ -378,8 +369,7 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, * their final execution location during firmware boot using an address based on the * final execution location. */ -void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, - u32 gpu_addr, u32 *value); +void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, u32 gpu_addr, u32 *value); /** * kbase_csf_update_firmware_memory_exe - Write a value in a GPU address in the @@ -397,8 +387,7 @@ void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, * their final execution location during firmware boot using an address based on the * final execution location. */ -void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, - u32 gpu_addr, u32 value); +void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, u32 gpu_addr, u32 value); /** * kbase_csf_firmware_early_init() - Early initialization for the firmware. @@ -560,11 +549,22 @@ static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev) #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) return true; #else - return (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) == - MCU_STATUS_HALTED); + return (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_STATUS)) == MCU_STATUS_VALUE_HALT); #endif /* CONFIG_MALI_BIFROST_NO_MALI */ } +/** + * kbase_csf_firmware_mcu_halt_req_complete - Check if the MCU Halt request is complete + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function needs to be called after Halt request has been sent to the FW. + * + * Return: true if the Halt request is complete, otherwise false. + */ +bool kbase_csf_firmware_mcu_halt_req_complete(struct kbase_device *kbdev); + + /** * kbase_csf_firmware_trigger_mcu_halt - Send the Global request to firmware to * halt its operation and bring itself @@ -641,8 +641,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev); * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @core_mask: Mask of the enabled shader cores. */ -void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, - u64 core_mask); +void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, u64 core_mask); /** * kbase_csf_firmware_global_reinit_complete - Check the Global configuration @@ -668,8 +667,8 @@ bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev); * @core_mask: New core mask value if update_core_mask is true, * otherwise unused. */ -void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, - bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask); +void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, bool update_core_pwroff_timer, + bool update_core_mask, u64 core_mask); /** * kbase_csf_firmware_core_attr_updated - Check the Global configuration @@ -711,11 +710,11 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev); * * Return: Total number of CSs, summed across all groups. */ -u32 kbase_csf_firmware_get_glb_iface( - struct kbase_device *kbdev, struct basep_cs_group_control *group_data, - u32 max_group_num, struct basep_cs_stream_control *stream_data, - u32 max_total_stream_num, u32 *glb_version, u32 *features, - u32 *group_num, u32 *prfcnt_size, u32 *instr_features); +u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev, + struct basep_cs_group_control *group_data, u32 max_group_num, + struct basep_cs_stream_control *stream_data, + u32 max_total_stream_num, u32 *glb_version, u32 *features, + u32 *group_num, u32 *prfcnt_size, u32 *instr_features); /** * kbase_csf_firmware_get_timeline_metadata - Get CSF firmware header timeline @@ -727,8 +726,8 @@ u32 kbase_csf_firmware_get_glb_iface( * * Return: The firmware timeline metadata content which match @p name. */ -const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, - const char *name, size_t *size); +const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, const char *name, + size_t *size); /** * kbase_csf_firmware_mcu_shared_mapping_init - Allocate and map MCU shared memory. @@ -750,12 +749,10 @@ const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, * * Return: 0 if success, or an error code on failure. */ -int kbase_csf_firmware_mcu_shared_mapping_init( - struct kbase_device *kbdev, - unsigned int num_pages, - unsigned long cpu_map_properties, - unsigned long gpu_map_properties, - struct kbase_csf_mapping *csf_mapping); +int kbase_csf_firmware_mcu_shared_mapping_init(struct kbase_device *kbdev, unsigned int num_pages, + unsigned long cpu_map_properties, + unsigned long gpu_map_properties, + struct kbase_csf_mapping *csf_mapping); /** * kbase_csf_firmware_mcu_shared_mapping_term - Unmap and free MCU shared memory. @@ -763,8 +760,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init( * @kbdev: Device pointer. * @csf_mapping: Metadata of the memory mapping to terminate. */ -void kbase_csf_firmware_mcu_shared_mapping_term( - struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping); +void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev, + struct kbase_csf_mapping *csf_mapping); #ifdef CONFIG_MALI_BIFROST_DEBUG extern bool fw_debug; @@ -869,6 +866,22 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev); */ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur); +/** + * kbase_csf_firmware_reset_mcu_core_pwroff_time - Reset the MCU shader Core power-off + * time value + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Sets the MCU Shader Core power-off time value to the default. + * + * The configured MCU shader Core power-off timer will only have effect when the host + * driver has delegated the shader cores' power management to MCU. + * + * Return: the actual internal core power-off timer value in register defined + * format. + */ +u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev); + /** * kbase_csf_interface_version - Helper function to build the full firmware * interface version in a format compatible with @@ -882,8 +895,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 */ static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch) { - return ((major << GLB_VERSION_MAJOR_SHIFT) | - (minor << GLB_VERSION_MINOR_SHIFT) | + return ((major << GLB_VERSION_MAJOR_SHIFT) | (minor << GLB_VERSION_MINOR_SHIFT) | (patch << GLB_VERSION_PATCH_SHIFT)); } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c index c895b080143a..a8dc411e6884 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,6 +31,8 @@ #define CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME "Log verbosity" +#define CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME "WA_CFG0" + /** * struct firmware_config - Configuration item within the MCU firmware * @@ -66,10 +68,10 @@ struct firmware_config { u32 cur_val; }; -#define FW_CFG_ATTR(_name, _mode) \ - struct attribute fw_cfg_attr_##_name = { \ - .name = __stringify(_name), \ - .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ +#define FW_CFG_ATTR(_name, _mode) \ + struct attribute fw_cfg_attr_##_name = { \ + .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ } static FW_CFG_ATTR(min, 0444); @@ -78,17 +80,14 @@ static FW_CFG_ATTR(cur, 0644); static void fw_cfg_kobj_release(struct kobject *kobj) { - struct firmware_config *config = - container_of(kobj, struct firmware_config, kobj); + struct firmware_config *config = container_of(kobj, struct firmware_config, kobj); kfree(config); } -static ssize_t show_fw_cfg(struct kobject *kobj, - struct attribute *attr, char *buf) +static ssize_t show_fw_cfg(struct kobject *kobj, struct attribute *attr, char *buf) { - struct firmware_config *config = - container_of(kobj, struct firmware_config, kobj); + struct firmware_config *config = container_of(kobj, struct firmware_config, kobj); struct kbase_device *kbdev = config->kbdev; u32 val = 0; @@ -106,22 +105,17 @@ static ssize_t show_fw_cfg(struct kobject *kobj, val = config->cur_val; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } else { - dev_warn(kbdev->dev, - "Unexpected read from entry %s/%s", - config->name, attr->name); + dev_warn(kbdev->dev, "Unexpected read from entry %s/%s", config->name, attr->name); return -EINVAL; } - return snprintf(buf, PAGE_SIZE, "%u\n", val); + return scnprintf(buf, PAGE_SIZE, "%u\n", val); } -static ssize_t store_fw_cfg(struct kobject *kobj, - struct attribute *attr, - const char *buf, - size_t count) +static ssize_t store_fw_cfg(struct kobject *kobj, struct attribute *attr, const char *buf, + size_t count) { - struct firmware_config *config = - container_of(kobj, struct firmware_config, kobj); + struct firmware_config *config = container_of(kobj, struct firmware_config, kobj); struct kbase_device *kbdev = config->kbdev; if (!kbdev) @@ -140,6 +134,9 @@ static ssize_t store_fw_cfg(struct kobject *kobj, return -EINVAL; } + if (!strcmp(config->name, CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME)) + return -EPERM; + if ((val < config->min) || (val > config->max)) return -EINVAL; @@ -161,8 +158,7 @@ static ssize_t store_fw_cfg(struct kobject *kobj, * the User to retry the write. */ if (kbase_reset_gpu_silent(kbdev)) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return -EAGAIN; } } @@ -176,8 +172,7 @@ static ssize_t store_fw_cfg(struct kobject *kobj, * in the RONLY section of firmware image, which is not * reloaded on firmware reboot due to GPU reset. */ - kbase_csf_update_firmware_memory( - kbdev, config->address, val); + kbase_csf_update_firmware_memory(kbdev, config->address, val); config->cur_val = val; @@ -210,9 +205,7 @@ static ssize_t store_fw_cfg(struct kobject *kobj, if (!config->updatable) kbase_reset_gpu_wait(kbdev); } else { - dev_warn(kbdev->dev, - "Unexpected write to entry %s/%s", - config->name, attr->name); + dev_warn(kbdev->dev, "Unexpected write to entry %s/%s", config->name, attr->name); return -EINVAL; } @@ -248,12 +241,11 @@ int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) { struct firmware_config *config; - kbdev->csf.fw_cfg_kobj = kobject_create_and_add( - CSF_FIRMWARE_CFG_SYSFS_DIR_NAME, &kbdev->dev->kobj); + kbdev->csf.fw_cfg_kobj = + kobject_create_and_add(CSF_FIRMWARE_CFG_SYSFS_DIR_NAME, &kbdev->dev->kobj); if (!kbdev->csf.fw_cfg_kobj) { kobject_put(kbdev->csf.fw_cfg_kobj); - dev_err(kbdev->dev, - "Creation of %s sysfs sub-directory failed\n", + dev_err(kbdev->dev, "Creation of %s sysfs sub-directory failed\n", CSF_FIRMWARE_CFG_SYSFS_DIR_NAME); return -ENOMEM; } @@ -261,15 +253,25 @@ int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) list_for_each_entry(config, &kbdev->csf.firmware_config, node) { int err; - kbase_csf_read_firmware_memory(kbdev, config->address, - &config->cur_val); + kbase_csf_read_firmware_memory(kbdev, config->address, &config->cur_val); - err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type, - kbdev->csf.fw_cfg_kobj, "%s", config->name); + if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) && + (config->cur_val)) { + err = kbase_csf_firmware_log_toggle_logging_calls(config->kbdev, + config->cur_val); + + if (err) { + kobject_put(&config->kobj); + dev_err(kbdev->dev, "Failed to enable logging (result: %d)", err); + return err; + } + } + + err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type, kbdev->csf.fw_cfg_kobj, + "%s", config->name); if (err) { kobject_put(&config->kobj); - dev_err(kbdev->dev, - "Creation of %s sysfs sub-directory failed\n", + dev_err(kbdev->dev, "Creation of %s sysfs sub-directory failed\n", config->name); return err; } @@ -285,8 +287,8 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) while (!list_empty(&kbdev->csf.firmware_config)) { struct firmware_config *config; - config = list_first_entry(&kbdev->csf.firmware_config, - struct firmware_config, node); + config = + list_first_entry(&kbdev->csf.firmware_config, struct firmware_config, node); list_del(&config->node); if (config->kobj_inited) { @@ -307,6 +309,7 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, const char *name = (char *)&entry[3]; struct firmware_config *config; const unsigned int name_len = size - CONFIGURATION_ENTRY_NAME_OFFSET; + CSTD_UNUSED(fw); /* Allocate enough space for struct firmware_config and the * configuration option name (with NULL termination) @@ -318,7 +321,7 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, config->kbdev = kbdev; config->updatable = updatable; - config->name = (char *)(config+1); + config->name = (char *)(config + 1); config->address = entry[0]; config->min = entry[1]; config->max = entry[2]; @@ -328,12 +331,80 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, list_add(&config->node, &kbdev->csf.firmware_config); - dev_dbg(kbdev->dev, "Configuration option '%s' at 0x%x range %u-%u", - config->name, config->address, - config->min, config->max); + dev_dbg(kbdev->dev, "Configuration option '%s' at 0x%x range %u-%u", config->name, + config->address, config->min, config->max); return 0; } + +int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev) +{ + struct firmware_config *config; + + /* "quirks_ext" property is optional */ + if (!kbdev->csf.quirks_ext) + return 0; + + list_for_each_entry(config, &kbdev->csf.firmware_config, node) { + if (strcmp(config->name, CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME)) + continue; + dev_info(kbdev->dev, "External quirks 0: 0x%08x", kbdev->csf.quirks_ext[0]); + kbase_csf_update_firmware_memory(kbdev, config->address, kbdev->csf.quirks_ext[0]); + return 0; + } + + return -ENOENT; +} + +int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev) +{ + int ret; + int entry_count; + size_t entry_bytes; + + /* "quirks-ext" property is optional and may have no value. + * Also try fallback "quirks_ext" property if it doesn't exist. + */ + entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks-ext"); + + if (entry_count == -EINVAL) + entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks_ext"); + + if (entry_count == -EINVAL || entry_count == -ENODATA) + return 0; + + entry_bytes = entry_count * sizeof(u32); + kbdev->csf.quirks_ext = kzalloc(entry_bytes, GFP_KERNEL); + if (!kbdev->csf.quirks_ext) + return -ENOMEM; + + ret = of_property_read_u32_array(kbdev->dev->of_node, "quirks-ext", kbdev->csf.quirks_ext, + entry_count); + + if (ret == -EINVAL) + ret = of_property_read_u32_array(kbdev->dev->of_node, "quirks_ext", + kbdev->csf.quirks_ext, entry_count); + + if (ret == -EINVAL || ret == -ENODATA) { + /* This is unexpected since the property is already accessed for counting the number + * of its elements. + */ + dev_err(kbdev->dev, "\"quirks_ext\" DTB property data read failed"); + return ret; + } + if (ret == -EOVERFLOW) { + dev_err(kbdev->dev, "\"quirks_ext\" DTB property data size exceeds 32 bits"); + return ret; + } + + return kbase_csf_firmware_cfg_fw_wa_enable(kbdev); +} + +void kbase_csf_firmware_cfg_fw_wa_term(struct kbase_device *kbdev) +{ + kfree(kbdev->csf.quirks_ext); +} + #else int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) { @@ -351,4 +422,15 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, { return 0; } + +int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev) +{ + return 0; +} + +int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h index b227cf158925..a400f073bcee 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -69,4 +69,38 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev); int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, const struct kbase_csf_mcu_fw *const fw, const u32 *entry, unsigned int size, bool updatable); + +/** + * kbase_csf_firmware_cfg_fw_wa_enable() - Enable firmware workarounds configuration. + * + * @kbdev: Kbase device structure + * + * Look for the config entry that enables support in FW for workarounds and set it according to + * the firmware workaround configuration before the initial boot or reload of firmware. + * + * Return: 0 if successful, negative error code on failure + */ +int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_cfg_fw_wa_init() - Initialize firmware workarounds configuration. + * + * @kbdev: Kbase device structure + * + * Retrieve and save the firmware workarounds configuration from device-tree "quirks_ext" property. + * Then, look for the config entry that enables support in FW for workarounds and set it according + * to the configuration before the initial firmware boot. + * + * Return: 0 if successful, negative error code on failure + */ +int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_cfg_fw_wa_term - Delete local cache for firmware workarounds configuration. + * + * @kbdev: Pointer to the Kbase device + * + */ +void kbase_csf_firmware_cfg_fw_wa_term(struct kbase_device *kbdev); + #endif /* _KBASE_CSF_FIRMWARE_CFG_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c index f0a10d197eec..04a81b04e511 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,6 +25,7 @@ #include #include #include +#include #include "mali_kbase.h" #include "mali_kbase_csf_firmware_core_dump.h" @@ -41,7 +42,7 @@ #define FW_CORE_DUMP_DATA_VERSION_MINOR 1 /* Full version of the image header core dump data format */ -#define FW_CORE_DUMP_DATA_VERSION \ +#define FW_CORE_DUMP_DATA_VERSION \ ((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR) /* Validity flag to indicate if the MCU registers in the buffer are valid */ @@ -90,20 +91,20 @@ struct prstatus32_timeval { * use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs. */ struct elf_prstatus32 { - struct elf_siginfo pr_info; /* Info associated with signal. */ - short int pr_cursig; /* Current signal. */ - unsigned int pr_sigpend; /* Set of pending signals. */ - unsigned int pr_sighold; /* Set of held signals. */ + struct elf_siginfo pr_info; /* Info associated with signal. */ + short int pr_cursig; /* Current signal. */ + unsigned int pr_sigpend; /* Set of pending signals. */ + unsigned int pr_sighold; /* Set of held signals. */ pid_t pr_pid; pid_t pr_ppid; pid_t pr_pgrp; pid_t pr_sid; - struct prstatus32_timeval pr_utime; /* User time. */ - struct prstatus32_timeval pr_stime; /* System time. */ - struct prstatus32_timeval pr_cutime; /* Cumulative user time. */ - struct prstatus32_timeval pr_cstime; /* Cumulative system time. */ - u32 pr_reg[18]; /* GP registers. */ - int pr_fpvalid; /* True if math copro being used. */ + struct prstatus32_timeval pr_utime; /* User time. */ + struct prstatus32_timeval pr_stime; /* System time. */ + struct prstatus32_timeval pr_cutime; /* Cumulative user time. */ + struct prstatus32_timeval pr_cstime; /* Cumulative system time. */ + u32 pr_reg[18]; /* GP registers. */ + int pr_fpvalid; /* True if math copro being used. */ }; /** @@ -505,7 +506,7 @@ static int fw_core_dump_create(struct kbase_device *kbdev) /* Ensure MCU is active before requesting the core dump. */ kbase_csf_scheduler_pm_active(kbdev); - err = kbase_csf_scheduler_wait_mcu_active(kbdev); + err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev); if (!err) err = kbase_csf_firmware_req_core_dump(kbdev); @@ -576,6 +577,7 @@ static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos) */ static void fw_core_dump_seq_stop(struct seq_file *m, void *v) { + CSTD_UNUSED(m); kfree(v); } @@ -664,9 +666,9 @@ static int fw_core_dump_seq_show(struct seq_file *m, void *v) /* Write the current page. */ page = as_page(data->interface->phys[data->page_num]); - p = kmap_atomic(page); + p = kbase_kmap_atomic(page); seq_write(m, p, FW_PAGE_SIZE); - kunmap_atomic(p); + kbase_kunmap_atomic(p); return 0; } @@ -746,15 +748,16 @@ open_fail: static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) { - int err; + ssize_t err; struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; struct kbase_device *const kbdev = dump_data->kbdev; + CSTD_UNUSED(ubuf); CSTD_UNUSED(ppos); err = fw_core_dump_create(kbdev); - return err ? err : count; + return err ? err : (ssize_t)count; } /** diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c index 6e0d3c2f5071..b57121649966 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,24 +30,24 @@ /* * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address. */ -#define ARMV7_T1_BL_IMM_INSTR 0xd800f000 +#define ARMV7_T1_BL_IMM_INSTR 0xd800f000 /* * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum * negative jump offset. */ -#define ARMV7_T1_BL_IMM_RANGE_MIN -16777216 +#define ARMV7_T1_BL_IMM_RANGE_MIN -16777216 /* * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum * positive jump offset. */ -#define ARMV7_T1_BL_IMM_RANGE_MAX 16777214 +#define ARMV7_T1_BL_IMM_RANGE_MAX 16777214 /* * ARMv7 instruction: Double NOP instructions. */ -#define ARMV7_DOUBLE_NOP_INSTR 0xbf00bf00 +#define ARMV7_DOUBLE_NOP_INSTR 0xbf00bf00 #if defined(CONFIG_DEBUG_FS) @@ -55,7 +55,7 @@ static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val) { struct kbase_device *kbdev = (struct kbase_device *)data; struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); if (tb == NULL) { dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); @@ -70,7 +70,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val) { struct kbase_device *kbdev = (struct kbase_device *)data; struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); u64 new_mask; unsigned int enable_bits_count; @@ -115,7 +115,7 @@ static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __us int ret; struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); if (tb == NULL) { dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); @@ -125,8 +125,9 @@ static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __us if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) return -EBUSY; - /* Reading from userspace is only allowed in manual mode */ - if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) { + /* Reading from userspace is only allowed in manual mode or auto-discard mode */ + if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL && + fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD) { ret = -EINVAL; goto out; } @@ -176,8 +177,9 @@ static int kbase_csf_firmware_log_mode_write(void *data, u64 val) cancel_delayed_work_sync(&fw_log->poll_work); break; case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: + case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD: schedule_delayed_work(&fw_log->poll_work, - msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); + msecs_to_jiffies(atomic_read(&fw_log->poll_period_ms))); break; default: ret = -EINVAL; @@ -191,6 +193,24 @@ out: return ret; } +static int kbase_csf_firmware_log_poll_period_read(void *data, u64 *val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + *val = atomic_read(&fw_log->poll_period_ms); + return 0; +} + +static int kbase_csf_firmware_log_poll_period_write(void *data, u64 val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + atomic_set(&fw_log->poll_period_ms, val); + return 0; +} + DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops, kbase_csf_firmware_log_enable_mask_read, kbase_csf_firmware_log_enable_mask_write, "%llx\n"); @@ -204,48 +224,124 @@ static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = { DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read, kbase_csf_firmware_log_mode_write, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_poll_period_fops, + kbase_csf_firmware_log_poll_period_read, + kbase_csf_firmware_log_poll_period_write, "%llu\n"); #endif /* CONFIG_DEBUG_FS */ +static void kbase_csf_firmware_log_discard_buffer(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); + + if (tb == NULL) { + dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware log discard skipped"); + return; + } + + if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) + return; + + kbase_csf_firmware_trace_buffer_discard(tb); + + atomic_set(&fw_log->busy, 0); +} + static void kbase_csf_firmware_log_poll(struct work_struct *work) { struct kbase_device *kbdev = container_of(work, struct kbase_device, csf.fw_log.poll_work.work); struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; - schedule_delayed_work(&fw_log->poll_work, - msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); + if (fw_log->mode == KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT) + kbase_csf_firmware_log_dump_buffer(kbdev); + else if (fw_log->mode == KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD) + kbase_csf_firmware_log_discard_buffer(kbdev); + else + return; - kbase_csf_firmware_log_dump_buffer(kbdev); + schedule_delayed_work(&fw_log->poll_work, + msecs_to_jiffies(atomic_read(&fw_log->poll_period_ms))); } int kbase_csf_firmware_log_init(struct kbase_device *kbdev) { struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + int err = 0; +#if defined(CONFIG_DEBUG_FS) + struct dentry *dentry; +#endif /* CONFIG_DEBUG_FS */ /* Add one byte for null-termination */ fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL); - if (fw_log->dump_buf == NULL) - return -ENOMEM; + if (fw_log->dump_buf == NULL) { + err = -ENOMEM; + goto out; + } /* Ensure null-termination for all strings */ fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0; + /* Set default log polling period */ + atomic_set(&fw_log->poll_period_ms, KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT); + + INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll); +#ifdef CONFIG_MALI_FW_TRACE_MODE_AUTO_DISCARD + fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD; + schedule_delayed_work(&fw_log->poll_work, + msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT)); +#elif defined(CONFIG_MALI_FW_TRACE_MODE_AUTO_PRINT) + fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT; + schedule_delayed_work(&fw_log->poll_work, + msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT)); +#else /* CONFIG_MALI_FW_TRACE_MODE_MANUAL */ fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL; +#endif atomic_set(&fw_log->busy, 0); - INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll); -#if defined(CONFIG_DEBUG_FS) - debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev, - &kbase_csf_firmware_log_enable_mask_fops); - debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev, - &kbasep_csf_firmware_log_debugfs_fops); - debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev, - &kbase_csf_firmware_log_mode_fops); -#endif /* CONFIG_DEBUG_FS */ +#if !defined(CONFIG_DEBUG_FS) + return 0; +#else /* !CONFIG_DEBUG_FS */ + dentry = debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, + kbdev, &kbase_csf_firmware_log_enable_mask_fops); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create fw_trace_enable_mask\n"); + err = -ENOENT; + goto free_out; + } + dentry = debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_firmware_log_debugfs_fops); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create fw_traces\n"); + err = -ENOENT; + goto free_out; + } + dentry = debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_firmware_log_mode_fops); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create fw_trace_mode\n"); + err = -ENOENT; + goto free_out; + } + dentry = debugfs_create_file("fw_trace_poll_period_ms", 0644, kbdev->mali_debugfs_directory, + kbdev, &kbase_csf_firmware_log_poll_period_fops); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create fw_trace_poll_period_ms"); + err = -ENOENT; + goto free_out; + } return 0; + +free_out: + kfree(fw_log->dump_buf); + fw_log->dump_buf = NULL; +#endif /* CONFIG_DEBUG_FS */ +out: + return err; } void kbase_csf_firmware_log_term(struct kbase_device *kbdev) @@ -265,7 +361,7 @@ void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev) u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf; unsigned int read_size, remaining_size; struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); if (tb == NULL) { dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped"); @@ -349,14 +445,14 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address); /* Read callee address */ kbase_csf_read_firmware_memory(kbdev, list_entry + sizeof(uint32_t), - &callee_address); + &callee_address); diff = callee_address - calling_address - 4; sign = !!(diff & 0x80000000); if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff || - ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) { + ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) { dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping", - calling_address); + calling_address); continue; } @@ -377,9 +473,9 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo /* Patch logging func calls in their load location */ dev_dbg(kbdev->dev, "FW log patch 0x%x: 0x%x\n", calling_address, - bl_instruction); + bl_instruction); kbase_csf_update_firmware_memory_exe(kbdev, calling_address, - bl_instruction); + bl_instruction); } } else { for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) { @@ -388,7 +484,7 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo /* Overwrite logging func calls with 2 NOP instructions */ kbase_csf_update_firmware_memory_exe(kbdev, calling_address, - ARMV7_DOUBLE_NOP_INSTR); + ARMV7_DOUBLE_NOP_INSTR); } } } @@ -418,17 +514,15 @@ int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 /* Wait for the MCU to get disabled */ dev_info(kbdev->dev, "Wait for the MCU to get disabled"); - ret = kbase_pm_wait_for_desired_state(kbdev); + ret = kbase_pm_killable_wait_for_desired_state(kbdev); if (ret) { - dev_err(kbdev->dev, - "wait for PM state failed when toggling FW logging calls"); + dev_err(kbdev->dev, "wait for PM state failed when toggling FW logging calls"); ret = -EAGAIN; goto out; } spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - mcu_inactive = - kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state); + mcu_inactive = kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (!mcu_inactive) { dev_err(kbdev->dev, diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c index 833947facce3..346a28ee6772 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c @@ -55,8 +55,7 @@ */ bool fw_debug; /* Default value of 0/false */ module_param(fw_debug, bool, 0444); -MODULE_PARM_DESC(fw_debug, - "Enables effective use of a debugger for debugging firmware code."); +MODULE_PARM_DESC(fw_debug, "Enables effective use of a debugger for debugging firmware code."); #endif #define DUMMY_FW_PAGE_SIZE SZ_4K @@ -101,10 +100,12 @@ struct dummy_firmware_interface { struct list_head node; } dummy_firmware_interface; -#define CSF_GLB_REQ_CFG_MASK \ - (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ +#define CSF_GLB_REQ_CFG_MASK \ + (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) +#define CHECK_ALIGN64_VIOLATION(offset) WARN_ON((offset) % sizeof(u64)) + static inline u32 input_page_read(const u32 *const input, const u32 offset) { WARN_ON(offset % sizeof(u32)); @@ -112,8 +113,7 @@ static inline u32 input_page_read(const u32 *const input, const u32 offset) return input[offset / sizeof(u32)]; } -static inline void input_page_write(u32 *const input, const u32 offset, - const u32 value) +static inline void input_page_write(u32 *const input, const u32 offset, const u32 value) { WARN_ON(offset % sizeof(u32)); @@ -127,14 +127,14 @@ static inline u32 output_page_read(const u32 *const output, const u32 offset) return output[offset / sizeof(u32)]; } -static inline void output_page_write(u32 *const output, const u32 offset, - const u32 value) +static inline void output_page_write(u32 *const output, const u32 offset, const u32 value) { WARN_ON(offset % sizeof(u32)); output[offset / sizeof(u32)] = value; } + /** * invent_memory_setup_entry() - Invent an "interface memory setup" section * @@ -179,8 +179,8 @@ static void free_global_iface(struct kbase_device *kbdev) } static int invent_cmd_stream_group_info(struct kbase_device *kbdev, - struct kbase_csf_cmd_stream_group_info *ginfo, - struct dummy_firmware_csg *csg) + struct kbase_csf_cmd_stream_group_info *ginfo, + struct dummy_firmware_csg *csg) { unsigned int sid; @@ -208,10 +208,8 @@ static int invent_cmd_stream_group_info(struct kbase_device *kbdev, stream->kbdev = kbdev; stream->features = STREAM_FEATURES_WORK_REGISTERS_SET(0, 80) | - STREAM_FEATURES_SCOREBOARDS_SET(0, 8) | - STREAM_FEATURES_COMPUTE_SET(0, 1) | - STREAM_FEATURES_FRAGMENT_SET(0, 1) | - STREAM_FEATURES_TILER_SET(0, 1); + STREAM_FEATURES_SCOREBOARDS_SET(0, 8) | STREAM_FEATURES_COMPUTE_SET(0, 1) | + STREAM_FEATURES_FRAGMENT_SET(0, 1) | STREAM_FEATURES_TILER_SET(0, 1); } return 0; @@ -250,7 +248,7 @@ static int invent_capabilities(struct kbase_device *kbdev) int err; err = invent_cmd_stream_group_info(kbdev, &iface->groups[gid], - &interface->csg[gid]); + &interface->csg[gid]); if (err < 0) { free_global_iface(kbdev); return err; @@ -260,36 +258,30 @@ static int invent_capabilities(struct kbase_device *kbdev) return 0; } -void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, - u32 gpu_addr, u32 *value) +void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 *value) { /* NO_MALI: Nothing to do here */ } - -void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, - u32 gpu_addr, u32 value) +void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 value) { /* NO_MALI: Nothing to do here */ } -void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, - u32 gpu_addr, u32 *value) +void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, u32 gpu_addr, u32 *value) { /* NO_MALI: Nothing to do here */ } -void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, - u32 gpu_addr, u32 value) +void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, u32 gpu_addr, u32 value) { /* NO_MALI: Nothing to do here */ } -void kbase_csf_firmware_cs_input( - const struct kbase_csf_cmd_stream_info *const info, const u32 offset, - const u32 value) +void kbase_csf_firmware_cs_input(const struct kbase_csf_cmd_stream_info *const info, + const u32 offset, const u32 value) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); input_page_write(info->input, offset, value); @@ -300,48 +292,44 @@ void kbase_csf_firmware_cs_input( } } -u32 kbase_csf_firmware_cs_input_read( - const struct kbase_csf_cmd_stream_info *const info, - const u32 offset) +u32 kbase_csf_firmware_cs_input_read(const struct kbase_csf_cmd_stream_info *const info, + const u32 offset) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; u32 const val = input_page_read(info->input, offset); dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); return val; } -void kbase_csf_firmware_cs_input_mask( - const struct kbase_csf_cmd_stream_info *const info, const u32 offset, - const u32 value, const u32 mask) +void kbase_csf_firmware_cs_input_mask(const struct kbase_csf_cmd_stream_info *const info, + const u32 offset, const u32 value, const u32 mask) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; - dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", - offset, value, mask); + dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", offset, value, mask); /* NO_MALI: Go through kbase_csf_firmware_cs_input to capture writes */ - kbase_csf_firmware_cs_input(info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); + kbase_csf_firmware_cs_input( + info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); } -u32 kbase_csf_firmware_cs_output( - const struct kbase_csf_cmd_stream_info *const info, const u32 offset) +u32 kbase_csf_firmware_cs_output(const struct kbase_csf_cmd_stream_info *const info, + const u32 offset) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; u32 const val = output_page_read(info->output, offset); dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); return val; } -void kbase_csf_firmware_csg_input( - const struct kbase_csf_cmd_stream_group_info *const info, - const u32 offset, const u32 value) +void kbase_csf_firmware_csg_input(const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset, const u32 value) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; - dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", - offset, value); + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", offset, value); input_page_write(info->input, offset, value); if (offset == CSG_REQ) { @@ -350,35 +338,32 @@ void kbase_csf_firmware_csg_input( } } -u32 kbase_csf_firmware_csg_input_read( - const struct kbase_csf_cmd_stream_group_info *const info, - const u32 offset) +u32 kbase_csf_firmware_csg_input_read(const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; u32 const val = input_page_read(info->input, offset); dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); return val; } -void kbase_csf_firmware_csg_input_mask( - const struct kbase_csf_cmd_stream_group_info *const info, - const u32 offset, const u32 value, const u32 mask) +void kbase_csf_firmware_csg_input_mask(const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset, const u32 value, const u32 mask) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; - dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", - offset, value, mask); + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", offset, value, mask); /* NO_MALI: Go through kbase_csf_firmware_csg_input to capture writes */ - kbase_csf_firmware_csg_input(info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); + kbase_csf_firmware_csg_input( + info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); } -u32 kbase_csf_firmware_csg_output( - const struct kbase_csf_cmd_stream_group_info *const info, - const u32 offset) +u32 kbase_csf_firmware_csg_output(const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset) { - const struct kbase_device * const kbdev = info->kbdev; + const struct kbase_device *const kbdev = info->kbdev; u32 const val = output_page_read(info->output, offset); dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); @@ -386,11 +371,11 @@ u32 kbase_csf_firmware_csg_output( } KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); -void kbase_csf_firmware_global_input( - const struct kbase_csf_global_iface *const iface, const u32 offset, - const u32 value) + +void kbase_csf_firmware_global_input(const struct kbase_csf_global_iface *const iface, + const u32 offset, const u32 value) { - const struct kbase_device * const kbdev = iface->kbdev; + const struct kbase_device *const kbdev = iface->kbdev; dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); input_page_write(iface->input, offset, value); @@ -411,34 +396,33 @@ void kbase_csf_firmware_global_input( } KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); -void kbase_csf_firmware_global_input_mask( - const struct kbase_csf_global_iface *const iface, const u32 offset, - const u32 value, const u32 mask) +void kbase_csf_firmware_global_input_mask(const struct kbase_csf_global_iface *const iface, + const u32 offset, const u32 value, const u32 mask) { - const struct kbase_device * const kbdev = iface->kbdev; + const struct kbase_device *const kbdev = iface->kbdev; - dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", - offset, value, mask); + dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", offset, value, mask); /* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */ - kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask)); + kbase_csf_firmware_global_input( + iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask)); } KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask); -u32 kbase_csf_firmware_global_input_read( - const struct kbase_csf_global_iface *const iface, const u32 offset) +u32 kbase_csf_firmware_global_input_read(const struct kbase_csf_global_iface *const iface, + const u32 offset) { - const struct kbase_device * const kbdev = iface->kbdev; + const struct kbase_device *const kbdev = iface->kbdev; u32 const val = input_page_read(iface->input, offset); dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); return val; } -u32 kbase_csf_firmware_global_output( - const struct kbase_csf_global_iface *const iface, const u32 offset) +u32 kbase_csf_firmware_global_output(const struct kbase_csf_global_iface *const iface, + const u32 offset) { - const struct kbase_device * const kbdev = iface->kbdev; + const struct kbase_device *const kbdev = iface->kbdev; u32 const val = output_page_read(iface->output, offset); dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); @@ -574,7 +558,7 @@ static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) continue; } - fault = (struct kbase_fault) { + fault = (struct kbase_fault){ .status = GPU_EXCEPTION_TYPE_SW_FAULT_1, }; @@ -582,8 +566,7 @@ static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) kbase_ctx_sched_release_ctx_lock(kctx); } - if (kbase_prepare_to_reset_gpu(kbdev, - RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -602,20 +585,16 @@ static void firmware_error_worker(struct work_struct *const data) handle_internal_firmware_fatal(kbdev); } -static bool global_request_complete(struct kbase_device *const kbdev, - u32 const req_mask) +static bool global_request_complete(struct kbase_device *const kbdev, u32 const req_mask) { - struct kbase_csf_global_iface *global_iface = - &kbdev->csf.global_iface; + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; bool complete = false; unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); - if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & - req_mask) == - (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & - req_mask)) + if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & req_mask)) complete = true; kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -623,32 +602,27 @@ static bool global_request_complete(struct kbase_device *const kbdev, return complete; } -static int wait_for_global_request(struct kbase_device *const kbdev, - u32 const req_mask) +static int wait_for_global_request(struct kbase_device *const kbdev, u32 const req_mask) { - const long wait_timeout = - kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + const long wait_timeout = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); long remaining; int err = 0; remaining = wait_event_timeout(kbdev->csf.event_wait, - global_request_complete(kbdev, req_mask), - wait_timeout); + global_request_complete(kbdev, req_mask), wait_timeout); if (!remaining) { dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete", req_mask); err = -ETIMEDOUT; - } return err; } -static void set_global_request( - const struct kbase_csf_global_iface *const global_iface, - u32 const req_mask) +static void set_global_request(const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) { u32 glb_req; @@ -656,48 +630,41 @@ static void set_global_request( glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK); glb_req ^= req_mask; - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, - req_mask); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, req_mask); } -static void enable_endpoints_global( - const struct kbase_csf_global_iface *const global_iface, - u64 const shader_core_mask) +static void enable_endpoints_global(const struct kbase_csf_global_iface *const global_iface, + u64 const shader_core_mask) { - kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, - shader_core_mask & U32_MAX); - kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, - shader_core_mask >> 32); + kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, shader_core_mask & U32_MAX); + kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, shader_core_mask >> 32); set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK); } -static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, - const struct kbase_csf_global_iface *const global_iface) +static void set_shader_poweroff_timer(struct kbase_device *const kbdev, + const struct kbase_csf_global_iface *const global_iface) { u32 pwroff_reg; if (kbdev->csf.firmware_hctl_core_pwr) - pwroff_reg = - GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER, - GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); + pwroff_reg = GLB_PWROFF_TIMER_TIMER_SOURCE_SET( + DISABLE_GLB_PWROFF_TIMER, GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); else pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count; - kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, - pwroff_reg); + kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, pwroff_reg); set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); /* Save the programed reg value in its shadow field */ kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg; } -static void set_timeout_global( - const struct kbase_csf_global_iface *const global_iface, - u64 const timeout) +static void set_timeout_global(const struct kbase_csf_global_iface *const global_iface, + u64 const timeout) { kbase_csf_firmware_global_input(global_iface, GLB_PROGRESS_TIMER, - timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); + timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); } @@ -746,8 +713,7 @@ static void set_global_debug_request(const struct kbase_csf_global_iface *const kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); } -static void request_fw_core_dump( - const struct kbase_csf_global_iface *const global_iface) +static void request_fw_core_dump(const struct kbase_csf_global_iface *const global_iface) { uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); @@ -758,8 +724,7 @@ static void request_fw_core_dump( int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) { - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags; int ret; @@ -791,15 +756,14 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK; - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); /* Update shader core allocation enable mask */ enable_endpoints_global(global_iface, core_mask); - enable_shader_poweroff_timer(kbdev, global_iface); + set_shader_poweroff_timer(kbdev, global_iface); set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); @@ -810,8 +774,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) enable_gpu_idle_timer(kbdev); /* Unmask the interrupts */ - kbase_csf_firmware_global_input(global_iface, - GLB_ACK_IRQ_MASK, ack_irq_mask); + kbase_csf_firmware_global_input(global_iface, GLB_ACK_IRQ_MASK, ack_irq_mask); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); @@ -834,8 +797,7 @@ static int global_init_on_boot(struct kbase_device *const kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); core_mask = kbase_pm_ca_get_core_mask(kbdev); - kbdev->csf.firmware_hctl_core_pwr = - kbase_pm_no_mcu_core_pwroff(kbdev); + kbdev->csf.firmware_hctl_core_pwr = kbase_pm_no_mcu_core_pwroff(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); global_init(kbdev, core_mask); @@ -843,14 +805,12 @@ static int global_init_on_boot(struct kbase_device *const kbdev) return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); } -void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, - u64 core_mask) +void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, u64 core_mask) { lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->csf.glb_init_request_pending = true; - kbdev->csf.firmware_hctl_core_pwr = - kbase_pm_no_mcu_core_pwroff(kbdev); + kbdev->csf.firmware_hctl_core_pwr = kbase_pm_no_mcu_core_pwroff(kbdev); global_init(kbdev, core_mask); } @@ -865,8 +825,8 @@ bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev) return !kbdev->csf.glb_init_request_pending; } -void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, - bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask) +void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, bool update_core_pwroff_timer, + bool update_core_mask, u64 core_mask) { unsigned long flags; @@ -876,7 +836,7 @@ void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, if (update_core_mask) enable_endpoints_global(&kbdev->csf.global_iface, core_mask); if (update_core_pwroff_timer) - enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); + set_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -886,14 +846,14 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK | - GLB_REQ_CFG_PWROFF_TIMER_MASK); + return global_request_complete(kbdev, + GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PWROFF_TIMER_MASK); } static void kbase_csf_firmware_reload_worker(struct work_struct *work) { - struct kbase_device *kbdev = container_of(work, struct kbase_device, - csf.firmware_reload_work); + struct kbase_device *kbdev = + container_of(work, struct kbase_device, csf.firmware_reload_work); unsigned long flags; /* Reboot the firmware */ @@ -933,7 +893,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); } -static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) +static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms, u32 *modifier) { #define HYSTERESIS_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ @@ -960,17 +920,19 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; dur_val = div_u64(dur_val, 1000); + *modifier = 0; + /* Interface limits the value field to S32_MAX */ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); /* add the source flag */ if (src_system_timestamp) - reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); + reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET( + reg_val_u32, GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); else - reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); + reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET( + reg_val_u32, GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); return reg_val_u32; } @@ -981,7 +943,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 dur; kbase_csf_scheduler_spin_lock(kbdev, &flags); - dur = kbdev->csf.gpu_idle_hysteresis_us; + dur = kbdev->csf.gpu_idle_hysteresis_ns; kbase_csf_scheduler_spin_unlock(kbdev, flags); return dur; @@ -990,7 +952,9 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) { unsigned long flags; - const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); + u32 modifier = 0; + + const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur, &modifier); /* The 'fw_load_lock' is taken to synchronize against the deferred * loading of FW, where the idle timer will be enabled. @@ -998,19 +962,28 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_lock(&kbdev->fw_load_lock); if (unlikely(!kbdev->csf.firmware_inited)) { kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_hysteresis_ns = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbdev->csf.gpu_idle_dur_count_modifier = modifier; kbase_csf_scheduler_spin_unlock(kbdev, flags); mutex_unlock(&kbdev->fw_load_lock); goto end; } mutex_unlock(&kbdev->fw_load_lock); + if (kbase_reset_gpu_prevent_and_wait(kbdev)) { + dev_warn(kbdev->dev, + "Failed to prevent GPU reset when updating idle_hysteresis_time"); + return kbdev->csf.gpu_idle_dur_count; + } + kbase_csf_scheduler_pm_active(kbdev); - if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { + if (kbase_csf_scheduler_killable_wait_mcu_active(kbdev)) { dev_err(kbdev->dev, "Unable to activate the MCU, the idle hysteresis value shall remain unchanged"); kbase_csf_scheduler_pm_idle(kbdev); + kbase_reset_gpu_allow(kbdev); + return kbdev->csf.gpu_idle_dur_count; } @@ -1029,23 +1002,24 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_hysteresis_ns = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbdev->csf.gpu_idle_dur_count_modifier = modifier; kbase_csf_firmware_enable_gpu_idle_timer(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); mutex_unlock(&kbdev->csf.reg_lock); kbase_csf_scheduler_pm_idle(kbdev); - + kbase_reset_gpu_allow(kbdev); end: - dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", - hysteresis_val); + dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", hysteresis_val); return hysteresis_val; } -static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) +static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us, + u32 *modifier) { /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); @@ -1071,17 +1045,19 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3 dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; dur_val = div_u64(dur_val, 1000000); + *modifier = 0; + /* Interface limits the value field to S32_MAX */ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); /* add the source flag */ if (src_system_timestamp) - reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); + reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET( + reg_val_u32, GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); else - reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); + reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET( + reg_val_u32, GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); return reg_val_u32; } @@ -1092,7 +1068,7 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - pwroff = kbdev->csf.mcu_core_pwroff_dur_us; + pwroff = kbdev->csf.mcu_core_pwroff_dur_ns; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return pwroff; @@ -1101,11 +1077,14 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) { unsigned long flags; - const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); + u32 modifier = 0; + + const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur, &modifier); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->csf.mcu_core_pwroff_dur_us = dur; + kbdev->csf.mcu_core_pwroff_dur_ns = dur; kbdev->csf.mcu_core_pwroff_dur_count = pwroff; + kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); @@ -1113,42 +1092,50 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 return pwroff; } +u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev) +{ + return kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS); +} + int kbase_csf_firmware_early_init(struct kbase_device *kbdev) { init_waitqueue_head(&kbdev->csf.event_wait); - kbdev->csf.interrupt_received = false; - kbdev->csf.fw_timeout_ms = - kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); + kbdev->csf.fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); + kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev); INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); INIT_LIST_HEAD(&kbdev->csf.firmware_config); INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); INIT_LIST_HEAD(&kbdev->csf.user_reg.list); - INIT_WORK(&kbdev->csf.firmware_reload_work, - kbase_csf_firmware_reload_worker); + INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker); INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); mutex_init(&kbdev->csf.reg_lock); + kbase_csf_pending_gpuq_kicks_init(kbdev); return 0; } void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { + kbase_csf_pending_gpuq_kicks_term(kbdev); mutex_destroy(&kbdev->csf.reg_lock); } int kbase_csf_firmware_late_init(struct kbase_device *kbdev) { - kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; + u32 modifier = 0; + + kbdev->csf.gpu_idle_hysteresis_ns = FIRMWARE_IDLE_HYSTERESIS_TIME_NS; #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_ns /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ns); kbdev->csf.gpu_idle_dur_count = - convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &modifier); + kbdev->csf.gpu_idle_dur_count_modifier = modifier; return 0; } @@ -1163,8 +1150,7 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) return -EINVAL; kbdev->as_free &= ~MCU_AS_BITMASK; - ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, - BASE_MEM_GROUP_DEFAULT); + ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, BASE_MEM_GROUP_DEFAULT); if (ret != 0) { /* Release the address space */ @@ -1231,10 +1217,10 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) /* NO_MALI: Don't stop firmware or unload MMU tables */ - kbase_csf_scheduler_term(kbdev); - kbase_csf_free_dummy_user_reg_page(kbdev); + kbase_csf_scheduler_term(kbdev); + kbase_csf_doorbell_mapping_term(kbdev); free_global_iface(kbdev); @@ -1246,7 +1232,7 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) struct dummy_firmware_interface *interface; interface = list_first_entry(&kbdev->csf.firmware_interfaces, - struct dummy_firmware_interface, node); + struct dummy_firmware_interface, node); list_del(&interface->node); /* NO_MALI: No cleanup in dummy interface necessary */ @@ -1289,9 +1275,8 @@ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) kbase_csf_scheduler_spin_lock_assert_held(kbdev); - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, - GLB_REQ_REQ_IDLE_DISABLE, - GLB_REQ_IDLE_DISABLE_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, + GLB_REQ_IDLE_DISABLE_MASK); dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); @@ -1300,8 +1285,7 @@ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) void kbase_csf_firmware_ping(struct kbase_device *const kbdev) { - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); @@ -1317,11 +1301,9 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); } -int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, - u64 const timeout) +int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout) { - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags; int err; @@ -1385,7 +1367,7 @@ void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) * otherwise on fast reset, to exit protected mode, MCU will * not reboot by itself to enter normal mode. */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL), MCU_CONTROL_REQ_AUTO); } #ifdef KBASE_PM_RUNTIME @@ -1410,6 +1392,12 @@ bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev) } #endif +bool kbase_csf_firmware_mcu_halt_req_complete(struct kbase_device *kbdev) +{ + return kbase_csf_firmware_mcu_halted(kbdev); +} + + int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; @@ -1427,8 +1415,7 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); - err = wait_for_global_request(kbdev, - GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); + err = wait_for_global_request(kbdev, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); mutex_unlock(&kbdev->csf.reg_lock); return err; } @@ -1449,12 +1436,10 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) * * Return: Total number of CSs, summed across all groups. */ -static u32 copy_grp_and_stm( - const struct kbase_csf_global_iface * const iface, - struct basep_cs_group_control * const group_data, - u32 max_group_num, - struct basep_cs_stream_control * const stream_data, - u32 max_total_stream_num) +static u32 copy_grp_and_stm(const struct kbase_csf_global_iface *const iface, + struct basep_cs_group_control *const group_data, u32 max_group_num, + struct basep_cs_stream_control *const stream_data, + u32 max_total_stream_num) { u32 i, total_stream_num = 0; @@ -1470,8 +1455,7 @@ static u32 copy_grp_and_stm( if (i < max_group_num) { group_data[i].features = iface->groups[i].features; group_data[i].stream_num = iface->groups[i].stream_num; - group_data[i].suspend_size = - iface->groups[i].suspend_size; + group_data[i].suspend_size = iface->groups[i].suspend_size; } for (j = 0; j < iface->groups[i].stream_num; j++) { if (total_stream_num < max_total_stream_num) @@ -1484,21 +1468,18 @@ static u32 copy_grp_and_stm( return total_stream_num; } -u32 kbase_csf_firmware_get_glb_iface( - struct kbase_device *kbdev, - struct basep_cs_group_control *const group_data, - u32 const max_group_num, - struct basep_cs_stream_control *const stream_data, - u32 const max_total_stream_num, u32 *const glb_version, - u32 *const features, u32 *const group_num, u32 *const prfcnt_size, - u32 *const instr_features) +u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev, + struct basep_cs_group_control *const group_data, + u32 const max_group_num, + struct basep_cs_stream_control *const stream_data, + u32 const max_total_stream_num, u32 *const glb_version, + u32 *const features, u32 *const group_num, + u32 *const prfcnt_size, u32 *const instr_features) { - const struct kbase_csf_global_iface * const iface = - &kbdev->csf.global_iface; + const struct kbase_csf_global_iface *const iface = &kbdev->csf.global_iface; - if (WARN_ON(!glb_version) || WARN_ON(!features) || - WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || - WARN_ON(!instr_features)) + if (WARN_ON(!glb_version) || WARN_ON(!features) || WARN_ON(!group_num) || + WARN_ON(!prfcnt_size) || WARN_ON(!instr_features)) return 0; *glb_version = iface->version; @@ -1507,16 +1488,14 @@ u32 kbase_csf_firmware_get_glb_iface( *prfcnt_size = iface->prfcnt_size; *instr_features = iface->instr_features; - return copy_grp_and_stm(iface, group_data, max_group_num, - stream_data, max_total_stream_num); + return copy_grp_and_stm(iface, group_data, max_group_num, stream_data, + max_total_stream_num); } -const char *kbase_csf_firmware_get_timeline_metadata( - struct kbase_device *kbdev, const char *name, size_t *size) +const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, const char *name, + size_t *size) { - if (WARN_ON(!kbdev) || - WARN_ON(!name) || - WARN_ON(!size)) { + if (WARN_ON(!kbdev) || WARN_ON(!name) || WARN_ON(!size)) { return NULL; } @@ -1526,7 +1505,7 @@ const char *kbase_csf_firmware_get_timeline_metadata( void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) { - kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL), MCU_CONTROL_REQ_DISABLE); } void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) @@ -1534,12 +1513,10 @@ void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) /* NO_MALI: Nothing to do here */ } -int kbase_csf_firmware_mcu_shared_mapping_init( - struct kbase_device *kbdev, - unsigned int num_pages, - unsigned long cpu_map_properties, - unsigned long gpu_map_properties, - struct kbase_csf_mapping *csf_mapping) +int kbase_csf_firmware_mcu_shared_mapping_init(struct kbase_device *kbdev, unsigned int num_pages, + unsigned long cpu_map_properties, + unsigned long gpu_map_properties, + struct kbase_csf_mapping *csf_mapping) { struct tagged_addr *phys; struct kbase_va_region *va_reg; @@ -1553,11 +1530,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init( cpu_map_prot = PAGE_KERNEL_RO; if (kbdev->system_coherency == COHERENCY_ACE) { - gpu_map_prot = - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); + gpu_map_prot = KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_DEFAULT_ACE); } else { - gpu_map_prot = - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + gpu_map_prot = KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_NON_CACHEABLE); cpu_map_prot = pgprot_writecombine(cpu_map_prot); } @@ -1581,8 +1556,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( if (!cpu_addr) goto vmap_error; - va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, - KBASE_REG_ZONE_MCU_SHARED); + va_reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, num_pages); if (!va_reg) goto va_region_alloc_error; @@ -1598,7 +1572,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, &phys[0], num_pages, gpu_map_properties, - KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL); if (ret) goto mmu_insert_pages_error; @@ -1619,9 +1593,8 @@ va_region_add_error: va_region_alloc_error: vunmap(cpu_addr); vmap_error: - kbase_mem_pool_free_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false, false); + kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys, + false, false); phys_mem_pool_alloc_error: kfree(page_list); @@ -1639,8 +1612,8 @@ out: return -ENOMEM; } -void kbase_csf_firmware_mcu_shared_mapping_term( - struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping) +void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev, + struct kbase_csf_mapping *csf_mapping) { if (csf_mapping->va_reg) { mutex_lock(&kbdev->csf.reg_lock); @@ -1650,10 +1623,8 @@ void kbase_csf_firmware_mcu_shared_mapping_term( } if (csf_mapping->phys) { - kbase_mem_pool_free_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - csf_mapping->num_pages, csf_mapping->phys, false, - false); + kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + csf_mapping->num_pages, csf_mapping->phys, false, false); } vunmap(csf_mapping->cpu_addr); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c index 7c14b8eb554c..12a79b4852fb 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c @@ -43,23 +43,20 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) lockdep_assert_held(&ctx_alloc->lock); - heap_nr = find_first_zero_bit(ctx_alloc->in_use, - MAX_TILER_HEAPS); + heap_nr = find_first_zero_bit(ctx_alloc->in_use, MAX_TILER_HEAPS); if (unlikely(heap_nr >= MAX_TILER_HEAPS)) { - dev_dbg(kctx->kbdev->dev, - "No free tiler heap contexts in the pool"); + dev_dbg(kctx->kbdev->dev, "No free tiler heap contexts in the pool"); return 0; } ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned; heap_gpu_va = ctx_alloc->gpu_va + ctx_offset; - ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, - ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping); + ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, ctx_alloc->heap_context_size_aligned, + KBASE_REG_CPU_WR, &mapping); if (unlikely(!ctx_ptr)) { - dev_err(kctx->kbdev->dev, - "Failed to map tiler heap context %lu (0x%llX)\n", + dev_err(kctx->kbdev->dev, "Failed to map tiler heap context %lu (0x%llX)\n", heap_nr, heap_gpu_va); return 0; } @@ -69,8 +66,8 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) bitmap_set(ctx_alloc->in_use, heap_nr, 1); - dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n", - heap_nr, heap_gpu_va); + dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n", heap_nr, + heap_gpu_va); return heap_gpu_va; } @@ -88,14 +85,13 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) * for heap context is freed. */ static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc, - u64 const heap_gpu_va) + u64 const heap_gpu_va) { struct kbase_context *const kctx = ctx_alloc->kctx; u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va); u32 offset_within_page = offset_in_bytes & ~PAGE_MASK; u32 page_index = offset_in_bytes >> PAGE_SHIFT; - struct tagged_addr page = - kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index]; + struct tagged_addr page = kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index]; phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page; lockdep_assert_held(&ctx_alloc->lock); @@ -105,9 +101,8 @@ static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ct * disappear whilst this function is executing. Flush type is passed as FLUSH_PT * to CLN+INV L2 only. */ - kbase_mmu_flush_pa_range(kctx->kbdev, kctx, - heap_context_pa, ctx_alloc->heap_context_size_aligned, - KBASE_MMU_OP_FLUSH_PT); + kbase_mmu_flush_pa_range(kctx->kbdev, kctx, heap_context_pa, + ctx_alloc->heap_context_size_aligned, KBASE_MMU_OP_FLUSH_PT); } /** @@ -117,7 +112,7 @@ static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ct * @heap_gpu_va: The GPU virtual address of a heap context structure to free. */ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, - u64 const heap_gpu_va) + u64 const heap_gpu_va) { struct kbase_context *const kctx = ctx_alloc->kctx; u32 ctx_offset = 0; @@ -134,50 +129,41 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va); if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) || - WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned)) + WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned)) return; evict_heap_context(ctx_alloc, heap_gpu_va); heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned; - dev_dbg(kctx->kbdev->dev, - "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va); + dev_dbg(kctx->kbdev->dev, "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va); bitmap_clear(ctx_alloc->in_use, heap_nr, 1); } -int kbase_csf_heap_context_allocator_init( - struct kbase_csf_heap_context_allocator *const ctx_alloc, - struct kbase_context *const kctx) +int kbase_csf_heap_context_allocator_init(struct kbase_csf_heap_context_allocator *const ctx_alloc, + struct kbase_context *const kctx) { - const u32 gpu_cache_line_size = - (1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); + const u32 gpu_cache_line_size = (1U << kctx->kbdev->gpu_props.log2_line_size); /* We cannot pre-allocate GPU memory here because the * custom VA zone may not have been created yet. */ ctx_alloc->kctx = kctx; - ctx_alloc->region = NULL; - ctx_alloc->gpu_va = 0; - ctx_alloc->heap_context_size_aligned = - (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1); + ctx_alloc->heap_context_size_aligned = (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & + ~(gpu_cache_line_size - 1); mutex_init(&ctx_alloc->lock); - bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS); - dev_dbg(kctx->kbdev->dev, - "Initialized a tiler heap context allocator\n"); + dev_dbg(kctx->kbdev->dev, "Initialized a tiler heap context allocator\n"); return 0; } -void kbase_csf_heap_context_allocator_term( - struct kbase_csf_heap_context_allocator *const ctx_alloc) +void kbase_csf_heap_context_allocator_term(struct kbase_csf_heap_context_allocator *const ctx_alloc) { struct kbase_context *const kctx = ctx_alloc->kctx; - dev_dbg(kctx->kbdev->dev, - "Terminating tiler heap context allocator\n"); + dev_dbg(kctx->kbdev->dev, "Terminating tiler heap context allocator\n"); if (ctx_alloc->region) { kbase_gpu_vm_lock(kctx); @@ -191,8 +177,7 @@ void kbase_csf_heap_context_allocator_term( mutex_destroy(&ctx_alloc->lock); } -u64 kbase_csf_heap_context_allocator_alloc( - struct kbase_csf_heap_context_allocator *const ctx_alloc) +u64 kbase_csf_heap_context_allocator_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) { struct kbase_context *const kctx = ctx_alloc->kctx; u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | @@ -226,9 +211,8 @@ u64 kbase_csf_heap_context_allocator_alloc( return heap_gpu_va; } -void kbase_csf_heap_context_allocator_free( - struct kbase_csf_heap_context_allocator *const ctx_alloc, - u64 const heap_gpu_va) +void kbase_csf_heap_context_allocator_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, + u64 const heap_gpu_va) { mutex_lock(&ctx_alloc->lock); sub_free(ctx_alloc, heap_gpu_va); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h index 9aab7abfb649..27f58b811da1 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,19 +30,19 @@ * @ctx_alloc: Pointer to the heap context allocator to initialize. * @kctx: Pointer to the kbase context. * + * This function must be called only when a kbase context is instantiated. + * * Return: 0 if successful or a negative error code on failure. */ -int kbase_csf_heap_context_allocator_init( - struct kbase_csf_heap_context_allocator *const ctx_alloc, - struct kbase_context *const kctx); +int kbase_csf_heap_context_allocator_init(struct kbase_csf_heap_context_allocator *const ctx_alloc, + struct kbase_context *const kctx); /** * kbase_csf_heap_context_allocator_term - Terminate an allocator for heap * contexts * @ctx_alloc: Pointer to the heap context allocator to terminate. */ -void kbase_csf_heap_context_allocator_term( - struct kbase_csf_heap_context_allocator *const ctx_alloc); +void kbase_csf_heap_context_allocator_term(struct kbase_csf_heap_context_allocator *const ctx_alloc); /** * kbase_csf_heap_context_allocator_alloc - Allocate a heap context structure @@ -54,8 +54,7 @@ void kbase_csf_heap_context_allocator_term( * * Return: GPU virtual address of the allocated heap context or 0 on failure. */ -u64 kbase_csf_heap_context_allocator_alloc( - struct kbase_csf_heap_context_allocator *const ctx_alloc); +u64 kbase_csf_heap_context_allocator_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc); /** * kbase_csf_heap_context_allocator_free - Free a heap context structure @@ -68,8 +67,7 @@ u64 kbase_csf_heap_context_allocator_alloc( * contexts for possible reuse by a future call to * @kbase_csf_heap_context_allocator_alloc. */ -void kbase_csf_heap_context_allocator_free( - struct kbase_csf_heap_context_allocator *const ctx_alloc, - u64 const heap_gpu_va); +void kbase_csf_heap_context_allocator_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, + u64 const heap_gpu_va); #endif /* _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c index da8dde239972..9da7f4dea64b 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c @@ -20,11 +20,17 @@ */ #include +#include #include #include #include "device/mali_kbase_device.h" #include "mali_kbase_csf.h" +#include "mali_kbase_csf_cpu_queue.h" +#include "mali_kbase_csf_csg.h" +#include "mali_kbase_csf_sync.h" +#include "mali_kbase_csf_util.h" #include +#include #if IS_ENABLED(CONFIG_SYNC_FILE) #include "mali_kbase_fence.h" @@ -37,22 +43,16 @@ static DEFINE_SPINLOCK(kbase_csf_fence_lock); #define FENCE_WAIT_TIMEOUT_MS 3000 #endif -static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, - bool drain_queue); +static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, bool drain_queue); static void kcpu_queue_process_worker(struct work_struct *data); -static int kbase_kcpu_map_import_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_import_info *import_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_map_import_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_import_info *import_info, + struct kbase_kcpu_command *current_command) { struct kbase_context *const kctx = kcpu_queue->kctx; struct kbase_va_region *reg; - struct kbase_mem_phy_alloc *alloc; - struct page **pages; - struct tagged_addr *pa; - long i; int ret = 0; lockdep_assert_held(&kcpu_queue->lock); @@ -61,43 +61,47 @@ static int kbase_kcpu_map_import_prepare( down_read(kbase_mem_get_process_mmap_lock()); kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_enclosing_address(kctx, - import_info->handle); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, import_info->handle); - if (kbase_is_region_invalid_or_free(reg) || - !kbase_mem_is_imported(reg->gpu_alloc->type)) { + if (kbase_is_region_invalid_or_free(reg) || !kbase_mem_is_imported(reg->gpu_alloc->type)) { ret = -EINVAL; goto out; } if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { - /* Pin the physical pages backing the user buffer while - * we are in the process context and holding the mmap lock. - * The dma mapping & GPU mapping of the pages would be done - * when the MAP_IMPORT operation is executed. + /* The only step done during the preparation of the MAP_IMPORT + * command is pinning physical pages, if they're not already + * pinned (which is a possibility). This can be done now while + * the function is in the process context and holding the mmap lock. + * + * Successive steps like DMA mapping and GPU mapping of the pages + * shall be done when the MAP_IMPORT operation is executed. * * Though the pages would be pinned, no reference is taken * on the physical pages tracking object. When the last - * reference to the tracking object is dropped the pages + * reference to the tracking object is dropped, the pages * would be unpinned if they weren't unpinned before. - * - * Region should be CPU cached: abort if it isn't. */ - if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) { + switch (reg->gpu_alloc->imported.user_buf.state) { + case KBASE_USER_BUF_STATE_EMPTY: { + ret = kbase_user_buf_from_empty_to_pinned(kctx, reg); + if (ret) + goto out; + break; + } + case KBASE_USER_BUF_STATE_PINNED: + case KBASE_USER_BUF_STATE_DMA_MAPPED: + case KBASE_USER_BUF_STATE_GPU_MAPPED: { + /* Do nothing here. */ + break; + } + default: { + WARN(1, "Imported user buffer in unexpected state %d\n", + reg->gpu_alloc->imported.user_buf.state); ret = -EINVAL; goto out; } - - ret = kbase_jd_user_buf_pin_pages(kctx, reg); - if (ret) - goto out; - - alloc = reg->gpu_alloc; - pa = kbase_get_gpu_phy_pages(reg); - pages = alloc->imported.user_buf.pages; - - for (i = 0; i < alloc->nents; i++) - pa[i] = as_tagged(page_to_phys(pages[i])); + } } current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT; @@ -111,11 +115,11 @@ out: return ret; } -static int kbase_kcpu_unmap_import_prepare_internal( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_import_info *import_info, - struct kbase_kcpu_command *current_command, - enum base_kcpu_command_type type) +static int +kbase_kcpu_unmap_import_prepare_internal(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_import_info *import_info, + struct kbase_kcpu_command *current_command, + enum base_kcpu_command_type type) { struct kbase_context *const kctx = kcpu_queue->kctx; struct kbase_va_region *reg; @@ -125,11 +129,9 @@ static int kbase_kcpu_unmap_import_prepare_internal( kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_enclosing_address(kctx, - import_info->handle); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, import_info->handle); - if (kbase_is_region_invalid_or_free(reg) || - !kbase_mem_is_imported(reg->gpu_alloc->type)) { + if (kbase_is_region_invalid_or_free(reg) || !kbase_mem_is_imported(reg->gpu_alloc->type)) { ret = -EINVAL; goto out; } @@ -138,8 +140,7 @@ static int kbase_kcpu_unmap_import_prepare_internal( /* The pages should have been pinned when MAP_IMPORT * was enqueued previously. */ - if (reg->gpu_alloc->nents != - reg->gpu_alloc->imported.user_buf.nr_pages) { + if (reg->gpu_alloc->nents != reg->gpu_alloc->imported.user_buf.nr_pages) { ret = -EINVAL; goto out; } @@ -154,24 +155,20 @@ out: return ret; } -static int kbase_kcpu_unmap_import_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_import_info *import_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_unmap_import_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_import_info *import_info, + struct kbase_kcpu_command *current_command) { - return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, - import_info, current_command, - BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT); + return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, import_info, current_command, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT); } -static int kbase_kcpu_unmap_import_force_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_import_info *import_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_unmap_import_force_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_import_info *import_info, + struct kbase_kcpu_command *current_command) { - return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, - import_info, current_command, - BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE); + return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, import_info, current_command, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE); } /** @@ -180,21 +177,17 @@ static int kbase_kcpu_unmap_import_force_prepare( * @queue: The queue containing this JIT allocation * @cmd: The JIT allocation that is blocking this queue */ -static void kbase_jit_add_to_pending_alloc_list( - struct kbase_kcpu_command_queue *queue, - struct kbase_kcpu_command *cmd) +static void kbase_jit_add_to_pending_alloc_list(struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command *cmd) { struct kbase_context *const kctx = queue->kctx; - struct list_head *target_list_head = - &kctx->csf.kcpu_queues.jit_blocked_queues; + struct list_head *target_list_head = &kctx->csf.kcpu_queues.jit_blocked_queues; struct kbase_kcpu_command_queue *blocked_queue; lockdep_assert_held(&queue->lock); lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); - list_for_each_entry(blocked_queue, - &kctx->csf.kcpu_queues.jit_blocked_queues, - jit_blocked) { + list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) { struct kbase_kcpu_command const *const jit_alloc_cmd = &blocked_queue->commands[blocked_queue->start_offset]; @@ -220,13 +213,11 @@ static void kbase_jit_add_to_pending_alloc_list( * * -EAGAIN - Retry * * -ENOMEM - no memory. unable to allocate */ -static int kbase_kcpu_jit_allocate_process( - struct kbase_kcpu_command_queue *queue, - struct kbase_kcpu_command *cmd) +static int kbase_kcpu_jit_allocate_process(struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command *cmd) { struct kbase_context *const kctx = queue->kctx; - struct kbase_kcpu_command_jit_alloc_info *alloc_info = - &cmd->info.jit_alloc; + struct kbase_kcpu_command_jit_alloc_info *alloc_info = &cmd->info.jit_alloc; struct base_jit_alloc_info *info = alloc_info->info; struct kbase_vmap_struct mapping; struct kbase_va_region *reg; @@ -265,7 +256,8 @@ static int kbase_kcpu_jit_allocate_process( bool can_block = false; struct kbase_kcpu_command const *jit_cmd; - list_for_each_entry(jit_cmd, &kctx->csf.kcpu_queues.jit_cmds_head, info.jit_alloc.node) { + list_for_each_entry(jit_cmd, &kctx->csf.kcpu_queues.jit_cmds_head, + info.jit_alloc.node) { if (jit_cmd == cmd) break; @@ -289,7 +281,8 @@ static int kbase_kcpu_jit_allocate_process( * No prior JIT_FREE command is active. Roll * back previous allocations and fail. */ - dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd); + dev_warn_ratelimited(kctx->kbdev->dev, + "JIT alloc command failed: %pK\n", cmd); ret = -ENOMEM; goto fail_rollback; } @@ -322,8 +315,8 @@ static int kbase_kcpu_jit_allocate_process( * Write the address of the JIT allocation to the user provided * GPU allocation. */ - ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), - KBASE_REG_CPU_WR, &mapping); + ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), KBASE_REG_CPU_WR, + &mapping); if (!ptr) { ret = -ENOMEM; goto fail_rollback; @@ -358,10 +351,9 @@ fail: return ret; } -static int kbase_kcpu_jit_allocate_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_jit_alloc_info *alloc_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_jit_allocate_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_jit_alloc_info *alloc_info, + struct kbase_kcpu_command *current_command) { struct kbase_context *const kctx = kcpu_queue->kctx; void __user *data = u64_to_user_ptr(alloc_info->info); @@ -413,8 +405,7 @@ static int kbase_kcpu_jit_allocate_prepare( current_command->info.jit_alloc.count = count; current_command->info.jit_alloc.blocked = false; mutex_lock(&kctx->csf.kcpu_queues.jit_lock); - list_add_tail(¤t_command->info.jit_alloc.node, - &kctx->csf.kcpu_queues.jit_cmds_head); + list_add_tail(¤t_command->info.jit_alloc.node, &kctx->csf.kcpu_queues.jit_cmds_head); mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); return 0; @@ -430,9 +421,8 @@ out: * @queue: The queue containing this JIT allocation * @cmd: The JIT allocation command */ -static void kbase_kcpu_jit_allocate_finish( - struct kbase_kcpu_command_queue *queue, - struct kbase_kcpu_command *cmd) +static void kbase_kcpu_jit_allocate_finish(struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command *cmd) { lockdep_assert_held(&queue->lock); @@ -480,8 +470,7 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, struct kbase_kcpu_command *const cmd) { - struct kbase_kcpu_command_jit_free_info const *const free_info = - &cmd->info.jit_free; + struct kbase_kcpu_command_jit_free_info const *const free_info = &cmd->info.jit_free; u8 const *const ids = free_info->ids; u32 const count = free_info->count; u32 i; @@ -539,10 +528,9 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, return rc; } -static int kbase_kcpu_jit_free_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_jit_free_info *free_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_jit_free_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_jit_free_info *free_info, + struct kbase_kcpu_command *current_command) { struct kbase_context *const kctx = kcpu_queue->kctx; void __user *data = u64_to_user_ptr(free_info->ids); @@ -600,8 +588,7 @@ static int kbase_kcpu_jit_free_prepare( current_command->info.jit_free.ids = ids; current_command->info.jit_free.count = count; mutex_lock(&kctx->csf.kcpu_queues.jit_lock); - list_add_tail(¤t_command->info.jit_free.node, - &kctx->csf.kcpu_queues.jit_cmds_head); + list_add_tail(¤t_command->info.jit_free.node, &kctx->csf.kcpu_queues.jit_cmds_head); mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); return 0; @@ -612,15 +599,14 @@ out: } #if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -static int kbase_csf_queue_group_suspend_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_group_suspend_info *suspend_buf, - struct kbase_kcpu_command *current_command) +static int +kbase_csf_queue_group_suspend_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_group_suspend_info *suspend_buf, + struct kbase_kcpu_command *current_command) { struct kbase_context *const kctx = kcpu_queue->kctx; struct kbase_suspend_copy_buffer *sus_buf = NULL; - const u32 csg_suspend_buf_size = - kctx->kbdev->csf.global_iface.groups[0].suspend_size; + const u32 csg_suspend_buf_size = kctx->kbdev->csf.global_iface.groups[0].suspend_size; u64 addr = suspend_buf->buffer; u64 page_addr = addr & PAGE_MASK; u64 end_addr = addr + csg_suspend_buf_size - 1; @@ -634,8 +620,7 @@ static int kbase_csf_queue_group_suspend_prepare( if (suspend_buf->size < csg_suspend_buf_size) return -EINVAL; - ret = kbase_csf_queue_group_handle_is_valid(kctx, - suspend_buf->group_handle); + ret = kbase_csf_queue_group_handle_is_valid(kctx, suspend_buf->group_handle); if (ret) return ret; @@ -658,13 +643,11 @@ static int kbase_csf_queue_group_suspend_prepare( * memory allocated by userspace. */ kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_enclosing_address(kctx, - page_addr); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, page_addr); if (kbase_is_region_invalid_or_free(reg)) { kbase_gpu_vm_unlock(kctx); - pinned_pages = get_user_pages_fast(page_addr, nr_pages, 1, - sus_buf->pages); + pinned_pages = get_user_pages_fast(page_addr, nr_pages, 1, sus_buf->pages); kbase_gpu_vm_lock(kctx); if (pinned_pages < 0) { @@ -677,10 +660,11 @@ static int kbase_csf_queue_group_suspend_prepare( } } else { struct tagged_addr *page_array; - u64 start, end, i; + u64 start, end; + int i; - if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) || - (kbase_reg_current_backed_size(reg) < nr_pages) || + if ((kbase_bits_to_zone(reg->flags) != SAME_VA_ZONE) || + (kbase_reg_current_backed_size(reg) < (size_t)nr_pages) || !(reg->flags & KBASE_REG_CPU_WR) || (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) || (kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) { @@ -708,8 +692,7 @@ static int kbase_csf_queue_group_suspend_prepare( kbase_gpu_vm_unlock(kctx); current_command->type = BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND; current_command->info.suspend_buf_copy.sus_buf = sus_buf; - current_command->info.suspend_buf_copy.group_handle = - suspend_buf->group_handle; + current_command->info.suspend_buf_copy.group_handle = suspend_buf->group_handle; return ret; out_clean_pages: @@ -722,8 +705,8 @@ out_clean_sus_buf: } static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx, - struct kbase_suspend_copy_buffer *sus_buf, - u8 group_handle) + struct kbase_suspend_copy_buffer *sus_buf, + u8 group_handle) { return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle); } @@ -731,8 +714,7 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx, static enum kbase_csf_event_callback_action event_cqs_callback(void *param) { - struct kbase_kcpu_command_queue *kcpu_queue = - (struct kbase_kcpu_command_queue *)param; + struct kbase_kcpu_command_queue *kcpu_queue = (struct kbase_kcpu_command_queue *)param; queue_work(kcpu_queue->wq, &kcpu_queue->work); @@ -740,7 +722,7 @@ static enum kbase_csf_event_callback_action event_cqs_callback(void *param) } static void cleanup_cqs_wait(struct kbase_kcpu_command_queue *queue, - struct kbase_kcpu_command_cqs_wait_info *cqs_wait) + struct kbase_kcpu_command_cqs_wait_info *cqs_wait) { WARN_ON(!cqs_wait->nr_objs); WARN_ON(!cqs_wait->objs); @@ -748,8 +730,7 @@ static void cleanup_cqs_wait(struct kbase_kcpu_command_queue *queue, WARN_ON(!queue->cqs_wait_count); if (--queue->cqs_wait_count == 0) { - kbase_csf_event_wait_remove(queue->kctx, - event_cqs_callback, queue); + kbase_csf_event_wait_remove(queue->kctx, event_cqs_callback, queue); } kfree(cqs_wait->signaled); @@ -759,8 +740,8 @@ static void cleanup_cqs_wait(struct kbase_kcpu_command_queue *queue, } static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, - struct kbase_kcpu_command_queue *queue, - struct kbase_kcpu_command_cqs_wait_info *cqs_wait) + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_info *cqs_wait) { u32 i; @@ -770,30 +751,31 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, return -EINVAL; /* Skip the CQS waits that have already been signaled when processing */ - for (i = find_first_zero_bit(cqs_wait->signaled, cqs_wait->nr_objs); i < cqs_wait->nr_objs; i++) { + for (i = find_first_zero_bit(cqs_wait->signaled, cqs_wait->nr_objs); i < cqs_wait->nr_objs; + i++) { if (!test_bit(i, cqs_wait->signaled)) { struct kbase_vmap_struct *mapping; bool sig_set; - u32 *evt = (u32 *)kbase_phy_alloc_mapping_get(queue->kctx, - cqs_wait->objs[i].addr, &mapping); + u32 *evt = (u32 *)kbase_phy_alloc_mapping_get( + queue->kctx, cqs_wait->objs[i].addr, &mapping); if (!queue->command_started) { KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev, queue); queue->command_started = true; - KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_START, - queue, cqs_wait->nr_objs, 0); + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_START, queue, + cqs_wait->nr_objs, 0); } if (!evt) { - dev_warn(kbdev->dev, - "Sync memory %llx already freed", cqs_wait->objs[i].addr); + dev_warn(kbdev->dev, "Sync memory %llx already freed", + cqs_wait->objs[i].addr); queue->has_error = true; return -EINVAL; } - sig_set = - evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > cqs_wait->objs[i].val; + sig_set = evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > + cqs_wait->objs[i].val; if (sig_set) { bool error = false; @@ -804,9 +786,8 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, error = true; } - KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_END, - queue, cqs_wait->objs[i].addr, - error); + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_END, queue, + cqs_wait->objs[i].addr, error); KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( kbdev, queue, evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); @@ -848,8 +829,8 @@ static inline bool kbase_kcpu_cqs_is_aligned(u64 addr, u8 data_type) } static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, - struct base_kcpu_command_cqs_wait_info *cqs_wait_info, - struct kbase_kcpu_command *current_command) + struct base_kcpu_command_cqs_wait_info *cqs_wait_info, + struct kbase_kcpu_command *current_command) { struct base_cqs_wait_info *objs; unsigned int nr_objs = cqs_wait_info->nr_objs; @@ -867,8 +848,7 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, if (!objs) return -ENOMEM; - if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_info->objs), - nr_objs * sizeof(*objs))) { + if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_info->objs), nr_objs * sizeof(*objs))) { kfree(objs); return -ENOMEM; } @@ -885,8 +865,7 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, } if (++queue->cqs_wait_count == 1) { - if (kbase_csf_event_wait_add(queue->kctx, - event_cqs_callback, queue)) { + if (kbase_csf_event_wait_add(queue->kctx, event_cqs_callback, queue)) { kfree(objs); queue->cqs_wait_count--; return -ENOMEM; @@ -896,15 +875,14 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT; current_command->info.cqs_wait.nr_objs = nr_objs; current_command->info.cqs_wait.objs = objs; - current_command->info.cqs_wait.inherit_err_flags = - cqs_wait_info->inherit_err_flags; + current_command->info.cqs_wait.inherit_err_flags = cqs_wait_info->inherit_err_flags; - current_command->info.cqs_wait.signaled = kcalloc(BITS_TO_LONGS(nr_objs), - sizeof(*current_command->info.cqs_wait.signaled), GFP_KERNEL); + current_command->info.cqs_wait.signaled = + kcalloc(BITS_TO_LONGS(nr_objs), sizeof(*current_command->info.cqs_wait.signaled), + GFP_KERNEL); if (!current_command->info.cqs_wait.signaled) { if (--queue->cqs_wait_count == 0) { - kbase_csf_event_wait_remove(queue->kctx, - event_cqs_callback, queue); + kbase_csf_event_wait_remove(queue->kctx, event_cqs_callback, queue); } kfree(objs); @@ -915,8 +893,8 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, } static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, - struct kbase_kcpu_command_queue *queue, - struct kbase_kcpu_command_cqs_set_info *cqs_set) + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_set_info *cqs_set) { unsigned int i; @@ -929,14 +907,14 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, struct kbase_vmap_struct *mapping; u32 *evt; - evt = (u32 *)kbase_phy_alloc_mapping_get( - queue->kctx, cqs_set->objs[i].addr, &mapping); + evt = (u32 *)kbase_phy_alloc_mapping_get(queue->kctx, cqs_set->objs[i].addr, + &mapping); KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, evt ? 0 : 1); if (!evt) { - dev_warn(kbdev->dev, - "Sync memory %llx already freed", cqs_set->objs[i].addr); + dev_warn(kbdev->dev, "Sync memory %llx already freed", + cqs_set->objs[i].addr); queue->has_error = true; } else { evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] = queue->has_error; @@ -955,10 +933,9 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, cqs_set->objs = NULL; } -static int kbase_kcpu_cqs_set_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_cqs_set_info *cqs_set_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_cqs_set_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_cqs_set_info *cqs_set_info, + struct kbase_kcpu_command *current_command) { struct base_cqs_set *objs; unsigned int nr_objs = cqs_set_info->nr_objs; @@ -976,8 +953,7 @@ static int kbase_kcpu_cqs_set_prepare( if (!objs) return -ENOMEM; - if (copy_from_user(objs, u64_to_user_ptr(cqs_set_info->objs), - nr_objs * sizeof(*objs))) { + if (copy_from_user(objs, u64_to_user_ptr(cqs_set_info->objs), nr_objs * sizeof(*objs))) { kfree(objs); return -ENOMEM; } @@ -1000,8 +976,9 @@ static int kbase_kcpu_cqs_set_prepare( return 0; } -static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue, - struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) +static void +cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) { WARN_ON(!cqs_wait_operation->nr_objs); WARN_ON(!cqs_wait_operation->objs); @@ -1009,8 +986,7 @@ static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue, WARN_ON(!queue->cqs_wait_count); if (--queue->cqs_wait_count == 0) { - kbase_csf_event_wait_remove(queue->kctx, - event_cqs_callback, queue); + kbase_csf_event_wait_remove(queue->kctx, event_cqs_callback, queue); } kfree(cqs_wait_operation->signaled); @@ -1019,9 +995,9 @@ static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue, cqs_wait_operation->objs = NULL; } -static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, - struct kbase_kcpu_command_queue *queue, - struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) +static int kbase_kcpu_cqs_wait_operation_process( + struct kbase_device *kbdev, struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) { u32 i; @@ -1031,7 +1007,8 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, return -EINVAL; /* Skip the CQS waits that have already been signaled when processing */ - for (i = find_first_zero_bit(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); i < cqs_wait_operation->nr_objs; i++) { + for (i = find_first_zero_bit(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); + i < cqs_wait_operation->nr_objs; i++) { if (!test_bit(i, cqs_wait_operation->signaled)) { struct kbase_vmap_struct *mapping; bool sig_set; @@ -1045,10 +1022,9 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, kbdev, queue); } - if (!evt) { - dev_warn(kbdev->dev, - "Sync memory %llx already freed", cqs_wait_operation->objs[i].addr); + dev_warn(kbdev->dev, "Sync memory %llx already freed", + cqs_wait_operation->objs[i].addr); queue->has_error = true; return -EINVAL; } @@ -1078,8 +1054,8 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, sig_set = val > cqs_wait_operation->objs[i].val; break; default: - dev_dbg(kbdev->dev, - "Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation); + dev_dbg(kbdev->dev, "Unsupported CQS wait operation %d", + cqs_wait_operation->objs[i].operation); kbase_phy_alloc_mapping_put(queue->kctx, mapping); queue->has_error = true; @@ -1113,9 +1089,10 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, return bitmap_full(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); } -static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue *queue, - struct base_kcpu_command_cqs_wait_operation_info *cqs_wait_operation_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_cqs_wait_operation_prepare( + struct kbase_kcpu_command_queue *queue, + struct base_kcpu_command_cqs_wait_operation_info *cqs_wait_operation_info, + struct kbase_kcpu_command *current_command) { struct base_cqs_wait_operation_info *objs; unsigned int nr_objs = cqs_wait_operation_info->nr_objs; @@ -1134,7 +1111,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue return -ENOMEM; if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs), - nr_objs * sizeof(*objs))) { + nr_objs * sizeof(*objs))) { kfree(objs); return -ENOMEM; } @@ -1152,8 +1129,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue } if (++queue->cqs_wait_count == 1) { - if (kbase_csf_event_wait_add(queue->kctx, - event_cqs_callback, queue)) { + if (kbase_csf_event_wait_add(queue->kctx, event_cqs_callback, queue)) { kfree(objs); queue->cqs_wait_count--; return -ENOMEM; @@ -1164,14 +1140,14 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue current_command->info.cqs_wait_operation.nr_objs = nr_objs; current_command->info.cqs_wait_operation.objs = objs; current_command->info.cqs_wait_operation.inherit_err_flags = - cqs_wait_operation_info->inherit_err_flags; + cqs_wait_operation_info->inherit_err_flags; - current_command->info.cqs_wait_operation.signaled = kcalloc(BITS_TO_LONGS(nr_objs), - sizeof(*current_command->info.cqs_wait_operation.signaled), GFP_KERNEL); + current_command->info.cqs_wait_operation.signaled = + kcalloc(BITS_TO_LONGS(nr_objs), + sizeof(*current_command->info.cqs_wait_operation.signaled), GFP_KERNEL); if (!current_command->info.cqs_wait_operation.signaled) { if (--queue->cqs_wait_count == 0) { - kbase_csf_event_wait_remove(queue->kctx, - event_cqs_callback, queue); + kbase_csf_event_wait_remove(queue->kctx, event_cqs_callback, queue); } kfree(objs); @@ -1220,9 +1196,8 @@ static void kbasep_kcpu_cqs_do_set_operation_64(struct kbase_kcpu_command_queue } static void kbase_kcpu_cqs_set_operation_process( - struct kbase_device *kbdev, - struct kbase_kcpu_command_queue *queue, - struct kbase_kcpu_command_cqs_set_operation_info *cqs_set_operation) + struct kbase_device *kbdev, struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_set_operation_info *cqs_set_operation) { unsigned int i; @@ -1239,8 +1214,8 @@ static void kbase_kcpu_cqs_set_operation_process( queue->kctx, cqs_set_operation->objs[i].addr, &mapping); if (!evt) { - dev_warn(kbdev->dev, - "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); + dev_warn(kbdev->dev, "Sync memory %llx already freed", + cqs_set_operation->objs[i].addr); queue->has_error = true; } else { struct base_cqs_set_operation_info *obj = &cqs_set_operation->objs[i]; @@ -1280,9 +1255,9 @@ skip_err_propagation: } static int kbase_kcpu_cqs_set_operation_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, - struct kbase_kcpu_command *current_command) + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, + struct kbase_kcpu_command *current_command) { struct base_cqs_set_operation_info *objs; unsigned int nr_objs = cqs_set_operation_info->nr_objs; @@ -1301,7 +1276,7 @@ static int kbase_kcpu_cqs_set_operation_prepare( return -ENOMEM; if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs), - nr_objs * sizeof(*objs))) { + nr_objs * sizeof(*objs))) { kfree(objs); return -ENOMEM; } @@ -1327,15 +1302,13 @@ static int kbase_kcpu_cqs_set_operation_prepare( #if IS_ENABLED(CONFIG_SYNC_FILE) #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -static void kbase_csf_fence_wait_callback(struct fence *fence, - struct fence_cb *cb) +static void kbase_csf_fence_wait_callback(struct fence *fence, struct fence_cb *cb) #else -static void kbase_csf_fence_wait_callback(struct dma_fence *fence, - struct dma_fence_cb *cb) +static void kbase_csf_fence_wait_callback(struct dma_fence *fence, struct dma_fence_cb *cb) #endif { - struct kbase_kcpu_command_fence_info *fence_info = container_of(cb, - struct kbase_kcpu_command_fence_info, fence_cb); + struct kbase_kcpu_command_fence_info *fence_info = + container_of(cb, struct kbase_kcpu_command_fence_info, fence_cb); struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue; struct kbase_context *const kctx = kcpu_queue->kctx; @@ -1343,8 +1316,9 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, /* Fence gets signaled. Deactivate the timer for fence-wait timeout */ del_timer(&kcpu_queue->fence_timeout); #endif - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, - fence->context, fence->seqno); + + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, fence->context, + fence->seqno); /* Resume kcpu command queue processing. */ queue_work(kcpu_queue->wq, &kcpu_queue->work); @@ -1361,8 +1335,7 @@ static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_ return; if (kcpu_queue->fence_wait_processed) { - bool removed = dma_fence_remove_callback(fence_info->fence, - &fence_info->fence_cb); + bool removed = dma_fence_remove_callback(fence_info->fence, &fence_info->fence_cb); #ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG /* Fence-wait cancelled or fence signaled. In the latter case @@ -1372,9 +1345,9 @@ static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_ del_timer_sync(&kcpu_queue->fence_timeout); #endif if (removed) - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, - kcpu_queue, fence_info->fence->context, - fence_info->fence->seqno); + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, + fence_info->fence->context, + fence_info->fence->seqno); } /* Release the reference which is kept by the kcpu_queue */ @@ -1445,14 +1418,14 @@ static void fence_timeout_callback(struct timer_list *timer) } /** - * fence_timeout_start() - Start a timer to check fence-wait timeout + * fence_wait_timeout_start() - Start a timer to check fence-wait timeout * * @cmd: KCPU command queue * * Activate a timer to check whether a fence-wait command in the queue * gets completed within FENCE_WAIT_TIMEOUT_MS */ -static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd) +static void fence_wait_timeout_start(struct kbase_kcpu_command_queue *cmd) { mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS)); } @@ -1467,9 +1440,8 @@ static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd) * Return: 0 if fence wait is blocked, 1 if it is unblocked, negative error if * an error has occurred and fence should no longer be waited on. */ -static int kbase_kcpu_fence_wait_process( - struct kbase_kcpu_command_queue *kcpu_queue, - struct kbase_kcpu_command_fence_info *fence_info) +static int kbase_kcpu_fence_wait_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) { int fence_status = 0; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) @@ -1489,18 +1461,19 @@ static int kbase_kcpu_fence_wait_process( if (kcpu_queue->fence_wait_processed) { fence_status = dma_fence_get_status(fence); } else { - int cb_err = dma_fence_add_callback(fence, - &fence_info->fence_cb, - kbase_csf_fence_wait_callback); + int cb_err; - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, - KCPU_FENCE_WAIT_START, kcpu_queue, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_START, kcpu_queue, fence->context, fence->seqno); + + cb_err = dma_fence_add_callback(fence, &fence_info->fence_cb, + kbase_csf_fence_wait_callback); + fence_status = cb_err; if (cb_err == 0) { kcpu_queue->fence_wait_processed = true; #ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG - fence_timeout_start(kcpu_queue); + fence_wait_timeout_start(kcpu_queue); #endif } else if (cb_err == -ENOENT) { fence_status = dma_fence_get_status(fence); @@ -1508,10 +1481,17 @@ static int kbase_kcpu_fence_wait_process( struct kbase_sync_fence_info info; kbase_sync_fence_info_get(fence, &info); - dev_warn(kctx->kbdev->dev, - "Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u", - info.name, kctx->tgid, kctx->id, kcpu_queue->id); + dev_warn( + kctx->kbdev->dev, + "Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u", + info.name, kctx->tgid, kctx->id, kcpu_queue->id); } + + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, + fence->context, fence->seqno); + } else { + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, + fence->context, fence->seqno); } } @@ -1557,12 +1537,186 @@ static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_q return 0; } +/** + * fence_signal_timeout_start() - Start a timer to check enqueued fence-signal command is + * blocked for too long a duration + * + * @kcpu_queue: KCPU command queue + * + * Activate the queue's fence_signal_timeout timer to check whether a fence-signal command + * enqueued has been blocked for longer than a configured wait duration. + */ +static void fence_signal_timeout_start(struct kbase_kcpu_command_queue *kcpu_queue) +{ + struct kbase_device *kbdev = kcpu_queue->kctx->kbdev; + unsigned int wait_ms = kbase_get_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT); + + if (atomic_read(&kbdev->fence_signal_timeout_enabled)) + mod_timer(&kcpu_queue->fence_signal_timeout, jiffies + msecs_to_jiffies(wait_ms)); +} + +static void +kbase_kcpu_command_fence_force_signaled_set(struct kbase_kcpu_command_fence_info *fence_info, + bool has_force_signaled) +{ + fence_info->fence_has_force_signaled = has_force_signaled; +} + +bool kbase_kcpu_command_fence_has_force_signaled(struct kbase_kcpu_command_fence_info *fence_info) +{ + return fence_info->fence_has_force_signaled; +} + +static int kbase_kcpu_fence_force_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + int ret; + + /* already force signaled just return*/ + if (kbase_kcpu_command_fence_has_force_signaled(fence_info)) + return 0; + + if (WARN_ON(!fence_info->fence)) + return -EINVAL; + + ret = dma_fence_signal(fence_info->fence); + if (unlikely(ret < 0)) { + dev_warn(kctx->kbdev->dev, "dma_fence(%d) has been signalled already\n", ret); + /* Treated as a success */ + ret = 0; + } + + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue, + fence_info->fence->context, fence_info->fence->seqno); + +#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) + dev_info(kctx->kbdev->dev, + "ctx:%d_%d kcpu queue[%pK]:%u signal fence[%pK] context#seqno:%llu#%u\n", + kctx->tgid, kctx->id, kcpu_queue, kcpu_queue->id, fence_info->fence, + fence_info->fence->context, fence_info->fence->seqno); +#else + dev_info(kctx->kbdev->dev, + "ctx:%d_%d kcpu queue[%pK]:%u signal fence[%pK] context#seqno:%llu#%llu\n", + kctx->tgid, kctx->id, kcpu_queue, kcpu_queue->id, fence_info->fence, + fence_info->fence->context, fence_info->fence->seqno); +#endif + + /* dma_fence refcount needs to be decreased to release it. */ + dma_fence_put(fence_info->fence); + fence_info->fence = NULL; + + return ret; +} + +static void kcpu_force_signal_fence(struct kbase_kcpu_command_queue *kcpu_queue) +{ + int status; + int i; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + struct kbase_context *const kctx = kcpu_queue->kctx; +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + int del; +#endif + + /* Force trigger all pending fence-signal commands */ + for (i = 0; i != kcpu_queue->num_pending_cmds; ++i) { + struct kbase_kcpu_command *cmd = + &kcpu_queue->commands[(u8)(kcpu_queue->start_offset + i)]; + + if (cmd->type == BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL) { + /* If a fence had already force-signalled previously, + * just skip it in this round of force signalling. + */ + if (kbase_kcpu_command_fence_has_force_signaled(&cmd->info.fence)) + continue; + + fence = kbase_fence_get(&cmd->info.fence); + + dev_info(kctx->kbdev->dev, + "kbase KCPU[%pK] cmd%d fence[%pK] force signaled\n", kcpu_queue, + i + 1, fence); + + /* set ETIMEDOUT error flag before signal the fence*/ + dma_fence_set_error_helper(fence, -ETIMEDOUT); + + /* force signal fence */ + status = + kbase_kcpu_fence_force_signal_process(kcpu_queue, &cmd->info.fence); + if (status < 0) + dev_err(kctx->kbdev->dev, "kbase signal failed\n"); + else + kbase_kcpu_command_fence_force_signaled_set(&cmd->info.fence, true); + + kcpu_queue->has_error = true; + } + } + + /* set fence_signal_pending_cnt to 0 + * and del_timer of the kcpu_queue + * because we signaled all the pending fence in the queue + */ + atomic_set(&kcpu_queue->fence_signal_pending_cnt, 0); +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + del = del_timer_sync(&kcpu_queue->fence_signal_timeout); + dev_info(kctx->kbdev->dev, "kbase KCPU [%pK] delete fence signal timeout timer ret: %d", + kcpu_queue, del); +#else + del_timer_sync(&kcpu_queue->fence_signal_timeout); +#endif +} + +static void kcpu_queue_force_fence_signal(struct kbase_kcpu_command_queue *kcpu_queue) +{ + mutex_lock(&kcpu_queue->lock); + kcpu_force_signal_fence(kcpu_queue); + mutex_unlock(&kcpu_queue->lock); +} + +/** + * fence_signal_timeout_cb() - Timeout callback function for fence-signal-wait + * + * @timer: Timer struct + * + * Callback function on an enqueued fence signal command has expired on its configured wait + * duration. At the moment it's just a simple place-holder for other tasks to expand on actual + * sync state dump via a bottom-half workqueue item. + */ +static void fence_signal_timeout_cb(struct timer_list *timer) +{ + struct kbase_kcpu_command_queue *kcpu_queue = + container_of(timer, struct kbase_kcpu_command_queue, fence_signal_timeout); + struct kbase_context *const kctx = kcpu_queue->kctx; +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + dev_warn(kctx->kbdev->dev, "kbase KCPU fence signal timeout callback triggered"); +#endif + + /* If we have additional pending fence signal commands in the queue, re-arm for the + * remaining fence signal commands, and dump the work to dmesg, only if the + * global configuration option is set. + */ + if (atomic_read(&kctx->kbdev->fence_signal_timeout_enabled)) { + if (atomic_read(&kcpu_queue->fence_signal_pending_cnt) > 1) + fence_signal_timeout_start(kcpu_queue); + + queue_work(kcpu_queue->wq, &kcpu_queue->timeout_work); + } +} + static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, struct kbase_kcpu_command_fence_info *fence_info) { struct kbase_context *const kctx = kcpu_queue->kctx; int ret; + /* already force signaled */ + if (kbase_kcpu_command_fence_has_force_signaled(fence_info)) + return 0; + if (WARN_ON(!fence_info->fence)) return -EINVAL; @@ -1575,8 +1729,26 @@ static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcp } KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue, - fence_info->fence->context, - fence_info->fence->seqno); + fence_info->fence->context, fence_info->fence->seqno); + + /* If one has multiple enqueued fence signal commands, re-arm the timer */ + if (atomic_dec_return(&kcpu_queue->fence_signal_pending_cnt) > 0) { + fence_signal_timeout_start(kcpu_queue); +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + dev_dbg(kctx->kbdev->dev, + "kbase re-arm KCPU fence signal timeout timer for next signal command"); +#endif + } else { +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + int del = del_timer_sync(&kcpu_queue->fence_signal_timeout); + + dev_dbg(kctx->kbdev->dev, "kbase KCPU delete fence signal timeout timer ret: %d", + del); + CSTD_UNUSED(del); +#else + del_timer_sync(&kcpu_queue->fence_signal_timeout); +#endif + } /* dma_fence refcount needs to be decreased to release it. */ kbase_fence_put(fence_info->fence); @@ -1614,11 +1786,8 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q fence_out = (struct dma_fence *)kcpu_fence; #endif - dma_fence_init(fence_out, - &kbase_fence_ops, - &kbase_csf_fence_lock, - kcpu_queue->fence_context, - ++kcpu_queue->fence_seqno); + dma_fence_init(fence_out, &kbase_fence_ops, &kbase_csf_fence_lock, + kcpu_queue->fence_context, ++kcpu_queue->fence_seqno); #if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) /* Take an extra reference to the fence on behalf of the sync file. @@ -1646,7 +1815,7 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL; current_command->info.fence.fence = fence_out; - + kbase_kcpu_command_fence_force_signaled_set(¤t_command->info.fence, false); return 0; fd_flags_fail: @@ -1681,8 +1850,7 @@ static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu if (ret) return ret; - if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, - sizeof(fence))) { + if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, sizeof(fence))) { ret = -EFAULT; goto fail; } @@ -1692,6 +1860,10 @@ static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu * before returning success. */ fd_install(fd, sync_file->file); + + if (atomic_inc_return(&kcpu_queue->fence_signal_pending_cnt) == 1) + fence_signal_timeout_start(kcpu_queue); + return 0; fail: @@ -1724,10 +1896,102 @@ int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init); #endif /* CONFIG_SYNC_FILE */ +static void kcpu_fence_timeout_dump(struct kbase_kcpu_command_queue *queue, + struct kbasep_printer *kbpr) +{ + struct kbase_context *kctx = queue->kctx; + struct kbase_kcpu_command *cmd; + struct kbase_kcpu_command_fence_info *fence_info; + struct kbase_kcpu_dma_fence *kcpu_fence; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + struct kbase_sync_fence_info info; + u16 i; + + mutex_lock(&queue->lock); + + /* Find the next fence signal command in the queue */ + for (i = 0; i != queue->num_pending_cmds; ++i) { + cmd = &queue->commands[(u8)(queue->start_offset + i)]; + if (cmd->type == BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL) { + fence_info = &cmd->info.fence; + /* find the first unforce signaled fence */ + if (!kbase_kcpu_command_fence_has_force_signaled(fence_info)) + break; + } + } + + if (i == queue->num_pending_cmds) { + dev_err(kctx->kbdev->dev, + "%s: No fence signal command found in ctx:%d_%d kcpu queue:%u", __func__, + kctx->tgid, kctx->id, queue->id); + mutex_unlock(&queue->lock); + return; + } + + fence = kbase_fence_get(fence_info); + if (!fence) { + dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid, + kctx->id, queue->id); + mutex_unlock(&queue->lock); + return; + } + + kcpu_fence = kbase_kcpu_dma_fence_get(fence); + if (!kcpu_fence) { + dev_err(kctx->kbdev->dev, "no fence metadata found in ctx:%d_%d kcpu queue:%u", + kctx->tgid, kctx->id, queue->id); + kbase_fence_put(fence); + mutex_unlock(&queue->lock); + return; + } + + kbase_sync_fence_info_get(fence, &info); + + kbasep_print(kbpr, "------------------------------------------------\n"); + kbasep_print(kbpr, "KCPU Fence signal timeout detected for ctx:%d_%d\n", kctx->tgid, + kctx->id); + kbasep_print(kbpr, "------------------------------------------------\n"); + kbasep_print(kbpr, "Kcpu queue:%u still waiting for fence[%pK] context#seqno:%s\n", + queue->id, fence, info.name); + kbasep_print(kbpr, "Fence metadata timeline name: %s\n", + kcpu_fence->metadata->timeline_name); + + kbase_fence_put(fence); + mutex_unlock(&queue->lock); + + kbasep_csf_csg_active_dump_print(kctx->kbdev, kbpr); + kbasep_csf_csg_dump_print(kctx, kbpr); + kbasep_csf_sync_gpu_dump_print(kctx, kbpr); + kbasep_csf_sync_kcpu_dump_print(kctx, kbpr); + kbasep_csf_cpu_queue_dump_print(kctx, kbpr); + + kbasep_print(kbpr, "-----------------------------------------------\n"); +} + +static void kcpu_queue_timeout_worker(struct work_struct *data) +{ + struct kbase_kcpu_command_queue *queue = + container_of(data, struct kbase_kcpu_command_queue, timeout_work); + struct kbasep_printer *kbpr = NULL; + + kbpr = kbasep_printer_buffer_init(queue->kctx->kbdev, KBASEP_PRINT_TYPE_DEV_WARN); + if (kbpr) { + kcpu_fence_timeout_dump(queue, kbpr); + kbasep_printer_buffer_flush(kbpr); + kbasep_printer_term(kbpr); + } + + kcpu_queue_force_fence_signal(queue); +} + static void kcpu_queue_process_worker(struct work_struct *data) { - struct kbase_kcpu_command_queue *queue = container_of(data, - struct kbase_kcpu_command_queue, work); + struct kbase_kcpu_command_queue *queue = + container_of(data, struct kbase_kcpu_command_queue, work); mutex_lock(&queue->lock); kcpu_queue_process(queue, false); @@ -1741,11 +2005,10 @@ static int delete_queue(struct kbase_context *kctx, u32 id) mutex_lock(&kctx->csf.kcpu_queues.lock); if ((id < KBASEP_MAX_KCPU_QUEUES) && kctx->csf.kcpu_queues.array[id]) { - struct kbase_kcpu_command_queue *queue = - kctx->csf.kcpu_queues.array[id]; + struct kbase_kcpu_command_queue *queue = kctx->csf.kcpu_queues.array[id]; - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE, - queue, queue->num_pending_cmds, queue->cqs_wait_count); + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE, queue, + queue->num_pending_cmds, queue->cqs_wait_count); /* Disassociate the queue from the system to prevent further * submissions. Draining pending commands would be acceptable @@ -1779,15 +2042,16 @@ static int delete_queue(struct kbase_context *kctx, u32 id) mutex_unlock(&queue->lock); + cancel_work_sync(&queue->timeout_work); cancel_work_sync(&queue->work); + destroy_workqueue(queue->wq); mutex_destroy(&queue->lock); kfree(queue); } else { - dev_dbg(kctx->kbdev->dev, - "Attempt to delete a non-existent KCPU queue"); + dev_dbg(kctx->kbdev->dev, "Attempt to delete a non-existent KCPU queue"); mutex_unlock(&kctx->csf.kcpu_queues.lock); err = -EINVAL; } @@ -1795,10 +2059,8 @@ static int delete_queue(struct kbase_context *kctx, u32 id) } static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( - struct kbase_device *kbdev, - const struct kbase_kcpu_command_queue *queue, - const struct kbase_kcpu_command_jit_alloc_info *jit_alloc, - int alloc_status) + struct kbase_device *kbdev, const struct kbase_kcpu_command_queue *queue, + const struct kbase_kcpu_command_jit_alloc_info *jit_alloc, int alloc_status) { u8 i; @@ -1812,15 +2074,14 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( if ((alloc_status == 0) && !WARN_ON(!reg) && !WARN_ON(reg == KBASE_RESERVED_REG_JIT_ALLOC)) { #ifdef CONFIG_MALI_VECTOR_DUMP - struct tagged_addr phy = {0}; + struct tagged_addr phy = { 0 }; #endif /* CONFIG_MALI_VECTOR_DUMP */ gpu_alloc_addr = reg->start_pfn << PAGE_SHIFT; #ifdef CONFIG_MALI_VECTOR_DUMP - mmu_flags = kbase_mmu_create_ate(kbdev, - phy, reg->flags, - MIDGARD_MMU_BOTTOMLEVEL, - queue->kctx->jit_group_id); + mmu_flags = kbase_mmu_create_ate(kbdev, phy, reg->flags, + MIDGARD_MMU_BOTTOMLEVEL, + queue->kctx->jit_group_id); #endif /* CONFIG_MALI_VECTOR_DUMP */ } KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( @@ -1829,21 +2090,19 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( } static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( - struct kbase_device *kbdev, - const struct kbase_kcpu_command_queue *queue) + struct kbase_device *kbdev, const struct kbase_kcpu_command_queue *queue) { KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue); } -static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( - struct kbase_device *kbdev, - const struct kbase_kcpu_command_queue *queue) +static void +KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(struct kbase_device *kbdev, + const struct kbase_kcpu_command_queue *queue) { KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue); } -static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, - bool drain_queue) +static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue) { struct kbase_device *kbdev = queue->kctx->kbdev; bool process_next = true; @@ -1852,8 +2111,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, lockdep_assert_held(&queue->lock); for (i = 0; i != queue->num_pending_cmds; ++i) { - struct kbase_kcpu_command *cmd = - &queue->commands[(u8)(queue->start_offset + i)]; + struct kbase_kcpu_command *cmd = &queue->commands[(u8)(queue->start_offset + i)]; int status; switch (cmd->type) { @@ -1869,8 +2127,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, if (drain_queue) { kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence); } else { - status = kbase_kcpu_fence_wait_process(queue, - &cmd->info.fence); + status = kbase_kcpu_fence_wait_process(queue, &cmd->info.fence); if (status == 0) process_next = false; @@ -1878,8 +2135,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, queue->has_error = true; } #else - dev_warn(kbdev->dev, - "unexpected fence wait command found\n"); + dev_warn(kbdev->dev, "unexpected fence wait command found\n"); status = -EINVAL; queue->has_error = true; @@ -1902,8 +2158,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, if (status < 0) queue->has_error = true; #else - dev_warn(kbdev->dev, - "unexpected fence signal command found\n"); + dev_warn(kbdev->dev, "unexpected fence signal command found\n"); status = -EINVAL; queue->has_error = true; @@ -1913,8 +2168,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, status); break; case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: - status = kbase_kcpu_cqs_wait_process(kbdev, queue, - &cmd->info.cqs_wait); + status = kbase_kcpu_cqs_wait_process(kbdev, queue, &cmd->info.cqs_wait); if (!status && !drain_queue) { process_next = false; @@ -1925,18 +2179,17 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, * In all cases can move to the next command. * TBD: handle the error */ - cleanup_cqs_wait(queue, &cmd->info.cqs_wait); + cleanup_cqs_wait(queue, &cmd->info.cqs_wait); } break; case BASE_KCPU_COMMAND_TYPE_CQS_SET: - kbase_kcpu_cqs_set_process(kbdev, queue, - &cmd->info.cqs_set); + kbase_kcpu_cqs_set_process(kbdev, queue, &cmd->info.cqs_set); break; case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: - status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue, - &cmd->info.cqs_wait_operation); + status = kbase_kcpu_cqs_wait_operation_process( + kbdev, queue, &cmd->info.cqs_wait_operation); if (!status && !drain_queue) { process_next = false; @@ -1947,13 +2200,13 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, * In all cases can move to the next command. * TBD: handle the error */ - cleanup_cqs_wait_operation(queue, &cmd->info.cqs_wait_operation); + cleanup_cqs_wait_operation(queue, &cmd->info.cqs_wait_operation); } break; case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: kbase_kcpu_cqs_set_operation_process(kbdev, queue, - &cmd->info.cqs_set_operation); + &cmd->info.cqs_set_operation); break; case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: @@ -1970,15 +2223,13 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, queue); kbase_gpu_vm_lock(queue->kctx); - meta = kbase_sticky_resource_acquire( - queue->kctx, cmd->info.import.gpu_va); + meta = kbase_sticky_resource_acquire(queue->kctx, + cmd->info.import.gpu_va); kbase_gpu_vm_unlock(queue->kctx); if (meta == NULL) { queue->has_error = true; - dev_dbg( - kbdev->dev, - "failed to map an external resource"); + dev_dbg(kbdev->dev, "failed to map an external resource"); } KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( @@ -1992,14 +2243,14 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue); kbase_gpu_vm_lock(queue->kctx); - ret = kbase_sticky_resource_release( - queue->kctx, NULL, cmd->info.import.gpu_va); + ret = kbase_sticky_resource_release(queue->kctx, NULL, + cmd->info.import.gpu_va); kbase_gpu_vm_unlock(queue->kctx); if (!ret) { queue->has_error = true; dev_dbg(kbdev->dev, - "failed to release the reference. resource not found"); + "failed to release the reference. resource not found"); } KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(kbdev, queue, @@ -2013,22 +2264,21 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, queue); kbase_gpu_vm_lock(queue->kctx); - ret = kbase_sticky_resource_release_force( - queue->kctx, NULL, cmd->info.import.gpu_va); + ret = kbase_sticky_resource_release_force(queue->kctx, NULL, + cmd->info.import.gpu_va); kbase_gpu_vm_unlock(queue->kctx); if (!ret) { queue->has_error = true; dev_dbg(kbdev->dev, - "failed to release the reference. resource not found"); + "failed to release the reference. resource not found"); } KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( kbdev, queue, ret ? 0 : 1); break; } - case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: - { + case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: { if (drain_queue) { /* We still need to call this function to clean the JIT alloc info up */ kbase_kcpu_jit_allocate_finish(queue, cmd); @@ -2036,8 +2286,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(kbdev, queue); - status = kbase_kcpu_jit_allocate_process(queue, - cmd); + status = kbase_kcpu_jit_allocate_process(queue, cmd); if (status == -EAGAIN) { process_next = false; } else { @@ -2045,11 +2294,9 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, queue->has_error = true; KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( - kbdev, queue, - &cmd->info.jit_alloc, status); + kbdev, queue, &cmd->info.jit_alloc, status); - kbase_kcpu_jit_allocate_finish(queue, - cmd); + kbase_kcpu_jit_allocate_finish(queue, cmd); KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( kbdev, queue); } @@ -2064,14 +2311,13 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, if (status) queue->has_error = true; - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue); break; } #if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: { struct kbase_suspend_copy_buffer *sus_buf = - cmd->info.suspend_buf_copy.sus_buf; + cmd->info.suspend_buf_copy.sus_buf; if (!drain_queue) { KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( @@ -2080,6 +2326,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, status = kbase_csf_queue_group_suspend_process( queue->kctx, sus_buf, cmd->info.suspend_buf_copy.group_handle); + if (status) queue->has_error = true; @@ -2088,15 +2335,13 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, } if (!sus_buf->cpu_alloc) { - int i; + uint i; for (i = 0; i < sus_buf->nr_pages; i++) put_page(sus_buf->pages[i]); } else { - kbase_mem_phy_alloc_kernel_unmapped( - sus_buf->cpu_alloc); - kbase_mem_phy_alloc_put( - sus_buf->cpu_alloc); + kbase_mem_phy_alloc_kernel_unmapped(sus_buf->cpu_alloc); + kbase_mem_phy_alloc_put(sus_buf->cpu_alloc); } kfree(sus_buf->pages); @@ -2105,8 +2350,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, } #endif default: - dev_dbg(kbdev->dev, - "Unrecognized command type"); + dev_dbg(kbdev->dev, "Unrecognized command type"); break; } /* switch */ @@ -2136,9 +2380,9 @@ static size_t kcpu_queue_get_space(struct kbase_kcpu_command_queue *queue) return KBASEP_KCPU_QUEUE_SIZE - queue->num_pending_cmds; } -static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( - const struct kbase_kcpu_command_queue *queue, - const struct kbase_kcpu_command *cmd) +static void +KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(const struct kbase_kcpu_command_queue *queue, + const struct kbase_kcpu_command *cmd) { struct kbase_device *kbdev = queue->kctx->kbdev; @@ -2151,10 +2395,8 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(kbdev, queue, cmd->info.fence.fence); break; - case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: - { - const struct base_cqs_wait_info *waits = - cmd->info.cqs_wait.objs; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: { + const struct base_cqs_wait_info *waits = cmd->info.cqs_wait.objs; u32 inherit_err_flags = cmd->info.cqs_wait.inherit_err_flags; unsigned int i; @@ -2165,8 +2407,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( } break; } - case BASE_KCPU_COMMAND_TYPE_CQS_SET: - { + case BASE_KCPU_COMMAND_TYPE_CQS_SET: { const struct base_cqs_set *sets = cmd->info.cqs_set.objs; unsigned int i; @@ -2176,8 +2417,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( } break; } - case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: - { + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: { const struct base_cqs_wait_operation_info *waits = cmd->info.cqs_wait_operation.objs; u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags; @@ -2185,21 +2425,20 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) { KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( - kbdev, queue, waits[i].addr, waits[i].val, - waits[i].operation, waits[i].data_type, + kbdev, queue, waits[i].addr, waits[i].val, waits[i].operation, + waits[i].data_type, (inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0); } break; } - case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: - { + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: { const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs; unsigned int i; for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( - kbdev, queue, sets[i].addr, sets[i].val, - sets[i].operation, sets[i].data_type); + kbdev, queue, sets[i].addr, sets[i].val, sets[i].operation, + sets[i].data_type); } break; } @@ -2218,14 +2457,12 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( kbdev, queue, cmd->info.import.gpu_va); break; - case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: - { + case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: { u8 i; KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue); for (i = 0; i < cmd->info.jit_alloc.count; i++) { - const struct base_jit_alloc_info *info = - &cmd->info.jit_alloc.info[i]; + const struct base_jit_alloc_info *info = &cmd->info.jit_alloc.info[i]; KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( kbdev, queue, info->gpu_alloc_addr, info->va_pages, @@ -2235,8 +2472,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue); break; } - case BASE_KCPU_COMMAND_TYPE_JIT_FREE: - { + case BASE_KCPU_COMMAND_TYPE_JIT_FREE: { u8 i; KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue); @@ -2261,7 +2497,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( } int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, - struct kbase_ioctl_kcpu_queue_enqueue *enq) + struct kbase_ioctl_kcpu_queue_enqueue *enq) { struct kbase_kcpu_command_queue *queue = NULL; void __user *user_cmds = u64_to_user_ptr(enq->addr); @@ -2284,8 +2520,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, * in the set. */ if (enq->nr_commands != 1) { - dev_dbg(kctx->kbdev->dev, - "More than one commands enqueued"); + dev_dbg(kctx->kbdev->dev, "More than one commands enqueued"); return -EINVAL; } @@ -2338,13 +2573,12 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, goto out; } - user_cmds = (void __user *)((uintptr_t)user_cmds + - sizeof(struct base_kcpu_command)); + user_cmds = + (void __user *)((uintptr_t)user_cmds + sizeof(struct base_kcpu_command)); for (j = 0; j < sizeof(command.padding); j++) { if (command.padding[j] != 0) { - dev_dbg(kctx->kbdev->dev, - "base_kcpu_command padding not 0\n"); + dev_dbg(kctx->kbdev->dev, "base_kcpu_command padding not 0\n"); ret = -EINVAL; goto out; } @@ -2354,8 +2588,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, switch (command.type) { case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: #if IS_ENABLED(CONFIG_SYNC_FILE) - ret = kbase_kcpu_fence_wait_prepare(queue, - &command.info.fence, kcpu_cmd); + ret = kbase_kcpu_fence_wait_prepare(queue, &command.info.fence, kcpu_cmd); #else ret = -EINVAL; dev_warn(kctx->kbdev->dev, "fence wait command unsupported\n"); @@ -2363,63 +2596,56 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, break; case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: #if IS_ENABLED(CONFIG_SYNC_FILE) - ret = kbase_kcpu_fence_signal_prepare(queue, - &command.info.fence, kcpu_cmd); + ret = kbase_kcpu_fence_signal_prepare(queue, &command.info.fence, kcpu_cmd); #else ret = -EINVAL; dev_warn(kctx->kbdev->dev, "fence signal command unsupported\n"); #endif break; case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: - ret = kbase_kcpu_cqs_wait_prepare(queue, - &command.info.cqs_wait, kcpu_cmd); + ret = kbase_kcpu_cqs_wait_prepare(queue, &command.info.cqs_wait, kcpu_cmd); break; case BASE_KCPU_COMMAND_TYPE_CQS_SET: - ret = kbase_kcpu_cqs_set_prepare(queue, - &command.info.cqs_set, kcpu_cmd); + ret = kbase_kcpu_cqs_set_prepare(queue, &command.info.cqs_set, kcpu_cmd); break; case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: - ret = kbase_kcpu_cqs_wait_operation_prepare(queue, - &command.info.cqs_wait_operation, kcpu_cmd); + ret = kbase_kcpu_cqs_wait_operation_prepare( + queue, &command.info.cqs_wait_operation, kcpu_cmd); break; case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: - ret = kbase_kcpu_cqs_set_operation_prepare(queue, - &command.info.cqs_set_operation, kcpu_cmd); + ret = kbase_kcpu_cqs_set_operation_prepare( + queue, &command.info.cqs_set_operation, kcpu_cmd); break; case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER; ret = 0; break; case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: - ret = kbase_kcpu_map_import_prepare(queue, - &command.info.import, kcpu_cmd); + ret = kbase_kcpu_map_import_prepare(queue, &command.info.import, kcpu_cmd); break; case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: - ret = kbase_kcpu_unmap_import_prepare(queue, - &command.info.import, kcpu_cmd); + ret = kbase_kcpu_unmap_import_prepare(queue, &command.info.import, + kcpu_cmd); break; case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: - ret = kbase_kcpu_unmap_import_force_prepare(queue, - &command.info.import, kcpu_cmd); + ret = kbase_kcpu_unmap_import_force_prepare(queue, &command.info.import, + kcpu_cmd); break; case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: - ret = kbase_kcpu_jit_allocate_prepare(queue, - &command.info.jit_alloc, kcpu_cmd); + ret = kbase_kcpu_jit_allocate_prepare(queue, &command.info.jit_alloc, + kcpu_cmd); break; case BASE_KCPU_COMMAND_TYPE_JIT_FREE: - ret = kbase_kcpu_jit_free_prepare(queue, - &command.info.jit_free, kcpu_cmd); + ret = kbase_kcpu_jit_free_prepare(queue, &command.info.jit_free, kcpu_cmd); break; #if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: - ret = kbase_csf_queue_group_suspend_prepare(queue, - &command.info.suspend_buf_copy, - kcpu_cmd); + ret = kbase_csf_queue_group_suspend_prepare( + queue, &command.info.suspend_buf_copy, kcpu_cmd); break; #endif default: - dev_dbg(queue->kctx->kbdev->dev, - "Unknown command type %u", command.type); + dev_dbg(queue->kctx->kbdev->dev, "Unknown command type %u", command.type); ret = -EINVAL; break; } @@ -2450,26 +2676,15 @@ out: int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx) { - int idx; - - bitmap_zero(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); - - for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx) - kctx->csf.kcpu_queues.array[idx] = NULL; - mutex_init(&kctx->csf.kcpu_queues.lock); - atomic64_set(&kctx->csf.kcpu_queues.cmd_seq_num, 0); - return 0; } void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) { - while (!bitmap_empty(kctx->csf.kcpu_queues.in_use, - KBASEP_MAX_KCPU_QUEUES)) { - int id = find_first_bit(kctx->csf.kcpu_queues.in_use, - KBASEP_MAX_KCPU_QUEUES); + while (!bitmap_empty(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES)) { + int id = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); if (WARN_ON(!kctx->csf.kcpu_queues.array[id])) clear_bit(id, kctx->csf.kcpu_queues.in_use); @@ -2482,13 +2697,12 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term); int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, - struct kbase_ioctl_kcpu_queue_delete *del) + struct kbase_ioctl_kcpu_queue_delete *del) { return delete_queue(kctx, (u32)del->id); } -int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, - struct kbase_ioctl_kcpu_queue_new *newq) +int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_new *newq) { struct kbase_kcpu_command_queue *queue; int idx; @@ -2505,8 +2719,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, mutex_lock(&kctx->csf.kcpu_queues.lock); - idx = find_first_zero_bit(kctx->csf.kcpu_queues.in_use, - KBASEP_MAX_KCPU_QUEUES); + idx = find_first_zero_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); if (idx >= (int)KBASEP_MAX_KCPU_QUEUES) { ret = -ENOMEM; goto out; @@ -2572,6 +2785,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, INIT_LIST_HEAD(&queue->jit_blocked); queue->has_error = false; INIT_WORK(&queue->work, kcpu_queue_process_worker); + INIT_WORK(&queue->timeout_work, kcpu_queue_timeout_worker); queue->id = idx; newq->id = idx; @@ -2582,14 +2796,100 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id, queue->num_pending_cmds); - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, - queue->fence_context, 0); + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, queue->fence_context, 0); #ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); #endif + +#if IS_ENABLED(CONFIG_SYNC_FILE) + atomic_set(&queue->fence_signal_pending_cnt, 0); + kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb); +#endif out: mutex_unlock(&kctx->csf.kcpu_queues.lock); return ret; } KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new); + +int kbase_csf_kcpu_queue_halt_timers(struct kbase_device *kbdev) +{ + struct kbase_context *kctx; + + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + unsigned long queue_idx; + struct kbase_csf_kcpu_queue_context *kcpu_ctx = &kctx->csf.kcpu_queues; + + mutex_lock(&kcpu_ctx->lock); + + for_each_set_bit(queue_idx, kcpu_ctx->in_use, KBASEP_MAX_KCPU_QUEUES) { + struct kbase_kcpu_command_queue *kcpu_queue = kcpu_ctx->array[queue_idx]; + + if (unlikely(!kcpu_queue)) + continue; + + mutex_lock(&kcpu_queue->lock); + + if (atomic_read(&kcpu_queue->fence_signal_pending_cnt)) { + int ret = del_timer_sync(&kcpu_queue->fence_signal_timeout); + + dev_dbg(kbdev->dev, + "Fence signal timeout on KCPU queue(%lu), kctx (%d_%d) was %s on suspend", + queue_idx, kctx->tgid, kctx->id, + ret ? "pending" : "not pending"); + } + +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + if (kcpu_queue->fence_wait_processed) { + int ret = del_timer_sync(&kcpu_queue->fence_timeout); + + dev_dbg(kbdev->dev, + "Fence wait timeout on KCPU queue(%lu), kctx (%d_%d) was %s on suspend", + queue_idx, kctx->tgid, kctx->id, + ret ? "pending" : "not pending"); + } +#endif + mutex_unlock(&kcpu_queue->lock); + } + mutex_unlock(&kcpu_ctx->lock); + } + return 0; +} + +void kbase_csf_kcpu_queue_resume_timers(struct kbase_device *kbdev) +{ + struct kbase_context *kctx; + + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + unsigned long queue_idx; + struct kbase_csf_kcpu_queue_context *kcpu_ctx = &kctx->csf.kcpu_queues; + + mutex_lock(&kcpu_ctx->lock); + + for_each_set_bit(queue_idx, kcpu_ctx->in_use, KBASEP_MAX_KCPU_QUEUES) { + struct kbase_kcpu_command_queue *kcpu_queue = kcpu_ctx->array[queue_idx]; + + if (unlikely(!kcpu_queue)) + continue; + + mutex_lock(&kcpu_queue->lock); +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + if (kcpu_queue->fence_wait_processed) { + fence_wait_timeout_start(kcpu_queue); + dev_dbg(kbdev->dev, + "Fence wait timeout on KCPU queue(%lu), kctx (%d_%d) has been resumed on system resume", + queue_idx, kctx->tgid, kctx->id); + } +#endif + if (atomic_read(&kbdev->fence_signal_timeout_enabled) && + atomic_read(&kcpu_queue->fence_signal_pending_cnt)) { + fence_signal_timeout_start(kcpu_queue); + dev_dbg(kbdev->dev, + "Fence signal timeout on KCPU queue(%lu), kctx (%d_%d) has been resumed on system resume", + queue_idx, kctx->tgid, kctx->id); + } + mutex_unlock(&kcpu_queue->lock); + } + mutex_unlock(&kcpu_ctx->lock); + } +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h index 5cad8b200c93..9ca33773941e 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h @@ -53,6 +53,7 @@ struct kbase_kcpu_command_import_info { * @fence_cb: Fence callback * @fence: Fence * @kcpu_queue: kcpu command queue + * @fence_has_force_signaled: fence has forced signaled after fence timeouted */ struct kbase_kcpu_command_fence_info { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) @@ -63,6 +64,7 @@ struct kbase_kcpu_command_fence_info { struct dma_fence *fence; #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ struct kbase_kcpu_command_queue *kcpu_queue; + bool fence_has_force_signaled; }; /** @@ -181,7 +183,7 @@ struct kbase_kcpu_command_jit_free_info { struct kbase_suspend_copy_buffer { size_t size; struct page **pages; - int nr_pages; + unsigned int nr_pages; size_t offset; struct kbase_mem_phy_alloc *cpu_alloc; }; @@ -252,6 +254,9 @@ struct kbase_kcpu_command { * the function which handles processing of kcpu * commands enqueued into a kcpu command queue; * part of kernel API for processing workqueues + * @timeout_work: struct work_struct which contains a pointer to the + * function which handles post-timeout actions + * queue when a fence signal timeout occurs. * @start_offset: Index of the command to be executed next * @id: KCPU command queue ID. * @num_pending_cmds: The number of commands enqueued but not yet @@ -283,6 +288,9 @@ struct kbase_kcpu_command { * @fence_timeout: Timer used to detect the fence wait timeout. * @metadata: Metadata structure containing basic information about * this queue for any fence objects associated with this queue. + * @fence_signal_timeout: Timer used for detect a fence signal command has + * been blocked for too long. + * @fence_signal_pending_cnt: Enqueued fence signal commands in the queue. */ struct kbase_kcpu_command_queue { struct mutex lock; @@ -290,6 +298,7 @@ struct kbase_kcpu_command_queue { struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; struct workqueue_struct *wq; struct work_struct work; + struct work_struct timeout_work; u8 start_offset; u8 id; u16 num_pending_cmds; @@ -307,6 +316,8 @@ struct kbase_kcpu_command_queue { #if IS_ENABLED(CONFIG_SYNC_FILE) struct kbase_kcpu_dma_fence_meta *metadata; #endif /* CONFIG_SYNC_FILE */ + struct timer_list fence_signal_timeout; + atomic_t fence_signal_pending_cnt; }; /** @@ -319,8 +330,7 @@ struct kbase_kcpu_command_queue { * * Return: 0 if successful or a negative error code on failure. */ -int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, - struct kbase_ioctl_kcpu_queue_new *newq); +int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_new *newq); /** * kbase_csf_kcpu_queue_delete - Delete KCPU command queue. @@ -333,7 +343,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, * Return: 0 if successful or a negative error code on failure. */ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, - struct kbase_ioctl_kcpu_queue_delete *del); + struct kbase_ioctl_kcpu_queue_delete *del); /** * kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command @@ -356,6 +366,8 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, * * @kctx: Pointer to the kbase context being initialized. * + * This function must be called only when a kbase context is instantiated. + * * Return: 0 if successful or a negative error code on failure. */ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx); @@ -381,4 +393,32 @@ int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, struct base_fence *fence, struct sync_file **sync_file, int *fd); #endif /* CONFIG_SYNC_FILE */ +/* + * kbase_csf_kcpu_queue_halt_timers - Halt the KCPU fence timers associated with + * the kbase device. + * + * @kbdev: Kbase device + * + * Note that this function assumes that the caller has ensured that the + * kbase_device::kctx_list does not get updated during this function's runtime. + * At the moment, the function is only safe to call during system suspend, when + * the device PM active count has reached zero. + * + * Return: 0 on success, negative value otherwise. + */ +int kbase_csf_kcpu_queue_halt_timers(struct kbase_device *kbdev); + +/* + * kbase_csf_kcpu_queue_resume_timers - Resume the KCPU fence timers associated + * with the kbase device. + * + * @kbdev: Kbase device + * + * Note that this function assumes that the caller has ensured that the + * kbase_device::kctx_list does not get updated during this function's runtime. + * At the moment, the function is only safe to call during system resume. + */ +void kbase_csf_kcpu_queue_resume_timers(struct kbase_device *kbdev); + +bool kbase_kcpu_command_fence_has_force_signaled(struct kbase_kcpu_command_fence_info *fence_info); #endif /* _KBASE_CSF_KCPU_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c index fa877778ca79..ec839b252bec 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,8 +38,8 @@ * @waits: Pointer to the KCPU CQS wait command info */ static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file, - struct kbase_context *kctx, - struct kbase_kcpu_command_cqs_wait_info *waits) + struct kbase_context *kctx, + struct kbase_kcpu_command_cqs_wait_info *waits) { unsigned int i; @@ -47,8 +47,8 @@ static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file, struct kbase_vmap_struct *mapping; u32 val; char const *msg; - u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, - waits->objs[i].addr, &mapping); + u32 *const cpu_ptr = + (u32 *)kbase_phy_alloc_mapping_get(kctx, waits->objs[i].addr, &mapping); if (!cpu_ptr) return; @@ -57,10 +57,9 @@ static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file, kbase_phy_alloc_mapping_put(kctx, mapping); - msg = (waits->inherit_err_flags && (1U << i)) ? "true" : - "false"; - seq_printf(file, " %llx(%u > %u, inherit_err: %s), ", - waits->objs[i].addr, val, waits->objs[i].val, msg); + msg = (waits->inherit_err_flags && (1U << i)) ? "true" : "false"; + seq_printf(file, " %llx(%u > %u, inherit_err: %s), ", waits->objs[i].addr, val, + waits->objs[i].val, msg); } } @@ -71,40 +70,34 @@ static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file, * @kctx: The context of the KCPU queue * @queue: Pointer to the KCPU queue */ -static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file, - struct kbase_context *kctx, - struct kbase_kcpu_command_queue *queue) +static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file, struct kbase_context *kctx, + struct kbase_kcpu_command_queue *queue) { if (WARN_ON(!queue)) return; lockdep_assert_held(&kctx->csf.kcpu_queues.lock); - seq_printf(file, "%16u, %11u, %7u, %13llu %8u", - queue->num_pending_cmds, queue->enqueue_failed, - queue->command_started ? 1 : 0, - queue->fence_context, queue->fence_seqno); + seq_printf(file, "%16u, %11u, %7u, %13llu %8u", queue->num_pending_cmds, + queue->enqueue_failed, queue->command_started ? 1 : 0, queue->fence_context, + queue->fence_seqno); if (queue->command_started) { - struct kbase_kcpu_command *cmd = - &queue->commands[queue->start_offset]; + struct kbase_kcpu_command *cmd = &queue->commands[queue->start_offset]; switch (cmd->type) { #if IS_ENABLED(CONFIG_SYNC_FILE) - case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: - { + case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: { struct kbase_sync_fence_info info; kbase_sync_fence_info_get(cmd->info.fence.fence, &info); - seq_printf(file, ", Fence %pK %s %s", - info.fence, info.name, + seq_printf(file, ", Fence %pK %s %s", info.fence, info.name, kbase_sync_status_string(info.status)); break; } #endif case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: seq_puts(file, ", CQS "); - kbasep_csf_kcpu_debugfs_print_cqs_waits(file, kctx, - &cmd->info.cqs_wait); + kbasep_csf_kcpu_debugfs_print_cqs_waits(file, kctx, &cmd->info.cqs_wait); break; default: seq_puts(file, ", U, Unknown blocking command"); @@ -128,24 +121,23 @@ static int kbasep_csf_kcpu_debugfs_show(struct seq_file *file, void *data) struct kbase_context *kctx = file->private; unsigned long idx; + CSTD_UNUSED(data); + seq_printf(file, "MALI_CSF_KCPU_DEBUGFS_VERSION: v%u\n", MALI_CSF_KCPU_DEBUGFS_VERSION); - seq_puts(file, "Queue Idx(err-mode), Pending Commands, Enqueue err, Blocked, Fence context & seqno, (Wait Type, Additional info)\n"); + seq_puts( + file, + "Queue Idx(err-mode), Pending Commands, Enqueue err, Blocked, Fence context & seqno, (Wait Type, Additional info)\n"); mutex_lock(&kctx->csf.kcpu_queues.lock); - idx = find_first_bit(kctx->csf.kcpu_queues.in_use, - KBASEP_MAX_KCPU_QUEUES); + idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); while (idx < KBASEP_MAX_KCPU_QUEUES) { - struct kbase_kcpu_command_queue *queue = - kctx->csf.kcpu_queues.array[idx]; + struct kbase_kcpu_command_queue *queue = kctx->csf.kcpu_queues.array[idx]; - seq_printf(file, "%9lu( %s ), ", idx, - queue->has_error ? "InErr" : "NoErr"); - kbasep_csf_kcpu_debugfs_print_queue(file, kctx, - kctx->csf.kcpu_queues.array[idx]); + seq_printf(file, "%9lu( %s ), ", idx, queue->has_error ? "InErr" : "NoErr"); + kbasep_csf_kcpu_debugfs_print_queue(file, kctx, kctx->csf.kcpu_queues.array[idx]); - idx = find_next_bit(kctx->csf.kcpu_queues.in_use, - KBASEP_MAX_KCPU_QUEUES, idx + 1); + idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES, idx + 1); } mutex_unlock(&kctx->csf.kcpu_queues.lock); @@ -172,16 +164,14 @@ void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx) if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) return; - file = debugfs_create_file("kcpu_queues", mode, kctx->kctx_dentry, - kctx, &kbasep_csf_kcpu_debugfs_fops); + file = debugfs_create_file("kcpu_queues", mode, kctx->kctx_dentry, kctx, + &kbasep_csf_kcpu_debugfs_fops); if (IS_ERR_OR_NULL(file)) { - dev_warn(kctx->kbdev->dev, - "Unable to create KCPU debugfs entry"); + dev_warn(kctx->kbdev->dev, "Unable to create KCPU debugfs entry"); } } - #else /* * Stub functions for when debugfs is disabled diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.c new file mode 100644 index 000000000000..9cbf7bab5caa --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ +#include +#include +#include +#if IS_ENABLED(CONFIG_DEBUG_FS) +#include +#endif + +#include +#include +#include + +#define BUF_SIZE 10 + +#if IS_ENABLED(CONFIG_DEBUG_FS) +static ssize_t kbase_csf_kcpu_queue_fence_signal_enabled_get(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int ret; + struct kbase_device *kbdev = file->private_data; + + if (atomic_read(&kbdev->fence_signal_timeout_enabled)) + ret = simple_read_from_buffer(buf, count, ppos, "1\n", 2); + else + ret = simple_read_from_buffer(buf, count, ppos, "0\n", 2); + + return ret; +}; + +static ssize_t kbase_csf_kcpu_queue_fence_signal_enabled_set(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + int ret; + unsigned int enabled; + struct kbase_device *kbdev = file->private_data; + + CSTD_UNUSED(ppos); + + ret = kstrtouint_from_user(buf, count, 10, &enabled); + if (ret < 0) + return ret; + + atomic_set(&kbdev->fence_signal_timeout_enabled, enabled); + + return count; +} + +static const struct file_operations kbase_csf_kcpu_queue_fence_signal_fops = { + .owner = THIS_MODULE, + .read = kbase_csf_kcpu_queue_fence_signal_enabled_get, + .write = kbase_csf_kcpu_queue_fence_signal_enabled_set, + .open = simple_open, + .llseek = default_llseek, +}; + +static ssize_t kbase_csf_kcpu_queue_fence_signal_timeout_get(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int size; + char buffer[BUF_SIZE]; + struct kbase_device *kbdev = file->private_data; + unsigned int timeout_ms = kbase_get_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT); + + size = scnprintf(buffer, sizeof(buffer), "%u\n", timeout_ms); + return simple_read_from_buffer(buf, count, ppos, buffer, size); +} + +static ssize_t kbase_csf_kcpu_queue_fence_signal_timeout_set(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + int ret; + unsigned int timeout_ms; + struct kbase_device *kbdev = file->private_data; + + CSTD_UNUSED(ppos); + + ret = kstrtouint_from_user(buf, count, 10, &timeout_ms); + if (ret < 0) + return ret; + + /* The timeout passed by the user is bounded when trying to insert it into + * the precomputed timeout table, so we don't need to do any more validation + * before-hand. + */ + kbase_device_set_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT, timeout_ms); + + return count; +} + +static const struct file_operations kbase_csf_kcpu_queue_fence_signal_timeout_fops = { + .owner = THIS_MODULE, + .read = kbase_csf_kcpu_queue_fence_signal_timeout_get, + .write = kbase_csf_kcpu_queue_fence_signal_timeout_set, + .open = simple_open, + .llseek = default_llseek, +}; + +int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev) +{ + struct dentry *file; + const mode_t mode = 0644; + + if (WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) + return -1; + + file = debugfs_create_file("fence_signal_timeout_enable", mode, + kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_kcpu_queue_fence_signal_fops); + + if (IS_ERR_OR_NULL(file)) { + dev_warn(kbdev->dev, "Unable to create fence signal timer toggle entry"); + return -1; + } + + file = debugfs_create_file("fence_signal_timeout_ms", mode, kbdev->mali_debugfs_directory, + kbdev, &kbase_csf_kcpu_queue_fence_signal_timeout_fops); + + if (IS_ERR_OR_NULL(file)) { + dev_warn(kbdev->dev, "Unable to create fence signal timeout entry"); + return -1; + } + return 0; +} + +#else +int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); + return 0; +} + +#endif +void kbase_csf_fence_timer_debugfs_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_bits.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.h similarity index 50% rename from drivers/gpu/arm/bifrost/mali_kbase_bits.h rename to drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.h index a085fd86c488..e3799fb8d1e0 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_bits.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_fence_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,14 +18,25 @@ * http://www.gnu.org/licenses/gpl-2.0.html. * */ +#ifndef _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_ +#define _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_ -#ifndef _KBASE_BITS_H_ -#define _KBASE_BITS_H_ +struct kbase_device; -#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) -#include -#else -#include -#endif +/* + * kbase_csf_fence_timer_debugfs_init - Initialize fence signal timeout debugfs + * entries. + * @kbdev: Kbase device. + * + * Return: 0 on success, -1 on failure. + */ +int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev); -#endif /* _KBASE_BITS_H_ */ +/* + * kbase_csf_fence_timer_debugfs_term - Terminate fence signal timeout debugfs + * entries. + * @kbdev: Kbase device. + */ +void kbase_csf_fence_timer_debugfs_term(struct kbase_device *kbdev); + +#endif /* _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c index 4056a9d933d7..11c0ba499596 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c @@ -21,6 +21,7 @@ #include #include +#include #include "mali_kbase_csf.h" #include "mali_kbase_csf_mcu_shared_reg.h" #include @@ -40,12 +41,13 @@ #define CSG_REG_USERIO_VPFN(reg, csi, nr_susp_pages) (reg->start_pfn + 2 * (nr_susp_pages + csi)) /* MCU shared segment dummy page mapping flags */ -#define DUMMY_PAGE_MAP_FLAGS (KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX) +#define DUMMY_PAGE_MAP_FLAGS \ + (KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX) /* MCU shared segment suspend buffer mapping flags */ -#define SUSP_PAGE_MAP_FLAGS \ - (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | \ - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT)) +#define SUSP_PAGE_MAP_FLAGS \ + (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | \ + KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_DEFAULT)) /** * struct kbase_csg_shared_region - Wrapper object for use with a CSG on runtime @@ -72,18 +74,18 @@ static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev) unsigned long userio_map_flags; if (kbdev->system_coherency == COHERENCY_NONE) - userio_map_flags = - KBASE_REG_GPU_RD | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + userio_map_flags = KBASE_REG_GPU_RD | + KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_NON_CACHEABLE); else userio_map_flags = KBASE_REG_GPU_RD | KBASE_REG_SHARE_BOTH | - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); + KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_SHARED); return (userio_map_flags | KBASE_REG_GPU_NX); } static void set_page_meta_status_not_movable(struct tagged_addr phy) { - if (kbase_page_migration_enabled) { + if (kbase_is_page_migration_enabled()) { struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy)); if (page_md) { @@ -117,7 +119,7 @@ static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 n return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, - mmu_sync_info, NULL, false); + mmu_sync_info, NULL); } /* Reset consecutive retry count to zero */ @@ -607,14 +609,14 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev, struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; + u32 i; const size_t nr_csg_reg_pages = 2 * (nr_susp_pages + nr_csis); struct kbase_va_region *reg; u64 vpfn; - int err, i; + int err; INIT_LIST_HEAD(&csg_reg->link); - reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages, - KBASE_REG_ZONE_MCU_SHARED); + reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, nr_csg_reg_pages); if (!reg) { dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n", @@ -667,18 +669,19 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev, fail_userio_pages_map_fail: while (i-- > 0) { vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES, - MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, + shared_regs->dummy_phys, + KBASEP_NUM_CS_USER_IO_PAGES, + KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR); } vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, nr_susp_pages, MCU_AS_NR); fail_pmod_map_fail: vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, nr_susp_pages, MCU_AS_NR); fail_susp_map_fail: mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(kbdev, reg); @@ -697,21 +700,22 @@ static void shared_mcu_csg_reg_term(struct kbase_device *kbdev, const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; u64 vpfn; - int i; + u32 i; for (i = 0; i < nr_csis; i++) { vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES, - MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, + shared_regs->dummy_phys, + KBASEP_NUM_CS_USER_IO_PAGES, + KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR); } vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, nr_susp_pages, MCU_AS_NR); vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, nr_susp_pages, MCU_AS_NR); mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(kbdev, reg); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c index bf1835b5be25..e78144ac4bf3 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,27 +31,28 @@ int kbase_csf_protected_memory_init(struct kbase_device *const kbdev) int err = 0; #if IS_ENABLED(CONFIG_OF) - struct device_node *pma_node = of_parse_phandle(kbdev->dev->of_node, - "protected-memory-allocator", 0); + struct device_node *pma_node = + of_parse_phandle(kbdev->dev->of_node, "protected-memory-allocator", 0); if (!pma_node) { dev_info(kbdev->dev, "Protected memory allocator not available\n"); } else { - struct platform_device *const pdev = - of_find_device_by_node(pma_node); + struct platform_device *const pdev = of_find_device_by_node(pma_node); - kbdev->csf.pma_dev = NULL; if (!pdev) { - dev_err(kbdev->dev, "Platform device for Protected memory allocator not found\n"); + dev_err(kbdev->dev, + "Platform device for Protected memory allocator not found\n"); } else { kbdev->csf.pma_dev = platform_get_drvdata(pdev); if (!kbdev->csf.pma_dev) { dev_info(kbdev->dev, "Protected memory allocator is not ready\n"); err = -EPROBE_DEFER; } else if (!try_module_get(kbdev->csf.pma_dev->owner)) { - dev_err(kbdev->dev, "Failed to get Protected memory allocator module\n"); + dev_err(kbdev->dev, + "Failed to get Protected memory allocator module\n"); err = -ENODEV; } else { - dev_info(kbdev->dev, "Protected memory allocator successfully loaded\n"); + dev_info(kbdev->dev, + "Protected memory allocator successfully loaded\n"); } } of_node_put(pma_node); @@ -68,15 +69,11 @@ void kbase_csf_protected_memory_term(struct kbase_device *const kbdev) } struct protected_memory_allocation ** - kbase_csf_protected_memory_alloc( - struct kbase_device *const kbdev, - struct tagged_addr *phys, - size_t num_pages, - bool is_small_page) +kbase_csf_protected_memory_alloc(struct kbase_device *const kbdev, struct tagged_addr *phys, + size_t num_pages, bool is_small_page) { size_t i; - struct protected_memory_allocator_device *pma_dev = - kbdev->csf.pma_dev; + struct protected_memory_allocator_device *pma_dev = kbdev->csf.pma_dev; struct protected_memory_allocation **pma = NULL; unsigned int order = KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER; unsigned int num_pages_order; @@ -114,9 +111,7 @@ struct protected_memory_allocation ** *phys++ = as_tagged_tag(phys_addr, HUGE_HEAD | HUGE_PAGE); for (j = 1; j < num_pages_order; j++) { - *phys++ = as_tagged_tag(phys_addr + - PAGE_SIZE * j, - HUGE_PAGE); + *phys++ = as_tagged_tag(phys_addr + PAGE_SIZE * j, HUGE_PAGE); } } else { phys[i] = as_tagged(phys_addr); @@ -131,15 +126,12 @@ struct protected_memory_allocation ** return pma; } -void kbase_csf_protected_memory_free( - struct kbase_device *const kbdev, - struct protected_memory_allocation **pma, - size_t num_pages, - bool is_small_page) +void kbase_csf_protected_memory_free(struct kbase_device *const kbdev, + struct protected_memory_allocation **pma, size_t num_pages, + bool is_small_page) { size_t i; - struct protected_memory_allocator_device *pma_dev = - kbdev->csf.pma_dev; + struct protected_memory_allocator_device *pma_dev = kbdev->csf.pma_dev; unsigned int num_pages_order = (1u << KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER); if (is_small_page) diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h index 8c1aa919fbb0..cf40987680e4 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,9 +25,10 @@ #include "mali_kbase.h" /** * kbase_csf_protected_memory_init - Initilaise protected memory allocator. - * * @kbdev: Device pointer. * + * This function must be called only when a kbase device is initialized. + * * Return: 0 if success, or an error code on failure. */ int kbase_csf_protected_memory_init(struct kbase_device *const kbdev); @@ -52,11 +53,8 @@ void kbase_csf_protected_memory_term(struct kbase_device *const kbdev); * or NULL on failure. */ struct protected_memory_allocation ** - kbase_csf_protected_memory_alloc( - struct kbase_device *const kbdev, - struct tagged_addr *phys, - size_t num_pages, - bool is_small_page); +kbase_csf_protected_memory_alloc(struct kbase_device *const kbdev, struct tagged_addr *phys, + size_t num_pages, bool is_small_page); /** * kbase_csf_protected_memory_free - Free the allocated @@ -67,9 +65,7 @@ struct protected_memory_allocation ** * @num_pages: Number of pages to be freed. * @is_small_page: Flag used to select the order of protected memory page. */ -void kbase_csf_protected_memory_free( - struct kbase_device *const kbdev, - struct protected_memory_allocation **pma, - size_t num_pages, - bool is_small_page); +void kbase_csf_protected_memory_free(struct kbase_device *const kbdev, + struct protected_memory_allocation **pma, size_t num_pages, + bool is_small_page); #endif diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h index b5bf7bbbc056..c4e6e4ac0df7 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h @@ -101,8 +101,10 @@ #define CS_INSTR_BUFFER_SIZE 0x0044 /* () Instrumentation buffer size */ #define CS_INSTR_BUFFER_BASE_LO 0x0048 /* () Instrumentation buffer base pointer, low word */ #define CS_INSTR_BUFFER_BASE_HI 0x004C /* () Instrumentation buffer base pointer, high word */ -#define CS_INSTR_BUFFER_OFFSET_POINTER_LO 0x0050 /* () Instrumentation buffer pointer to insert offset, low word */ -#define CS_INSTR_BUFFER_OFFSET_POINTER_HI 0x0054 /* () Instrumentation buffer pointer to insert offset, high word */ +#define CS_INSTR_BUFFER_OFFSET_POINTER_LO \ + 0x0050 /* () Instrumentation buffer pointer to insert offset, low word */ +#define CS_INSTR_BUFFER_OFFSET_POINTER_HI \ + 0x0054 /* () Instrumentation buffer pointer to insert offset, high word */ /* CS_KERNEL_OUTPUT_BLOCK register offsets */ #define CS_ACK 0x0000 /* () CS acknowledge flags */ @@ -119,8 +121,10 @@ #define CS_FATAL 0x0084 /* () Unrecoverable fault information */ #define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */ #define CS_FAULT_INFO_HI 0x008C /* () Additional information about a recoverable fault, high word */ -#define CS_FATAL_INFO_LO 0x0090 /* () Additional information about a non-recoverable fault, low word */ -#define CS_FATAL_INFO_HI 0x0094 /* () Additional information about a non-recoverable fault, high word */ +#define CS_FATAL_INFO_LO \ + 0x0090 /* () Additional information about a non-recoverable fault, low word */ +#define CS_FATAL_INFO_HI \ + 0x0094 /* () Additional information about a non-recoverable fault, high word */ #define CS_HEAP_VT_START 0x00C0 /* () Number of vertex/tiling operations started */ #define CS_HEAP_VT_END 0x00C4 /* () Number of vertex/tiling operations completed */ #define CS_HEAP_FRAG_END 0x00CC /* () Number of fragment completed */ @@ -143,12 +147,15 @@ #define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ #define CSG_DB_REQ 0x0008 /* () Global doorbell request */ #define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */ + + #define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */ #define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */ #define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */ #define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */ #define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */ -#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */ +#define CSG_EP_REQ_LO 0x0034 /* () Maximum number of endpoints allowed, low word */ +#define CSG_EP_REQ_HI 0x0038 /* () Maximum number of endpoints allowed, high word */ #define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */ #define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */ #define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */ @@ -174,8 +181,7 @@ #define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */ #define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */ #define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */ -#define GLB_INSTR_FEATURES \ - 0x001C /* () TRACE_POINT instrumentation. (csf >= 1.1.0) */ +#define GLB_INSTR_FEATURES 0x001C /* () TRACE_POINT instrumentation. (csf >= 1.1.0) */ #define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */ #define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256) #define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r)) @@ -221,6 +227,7 @@ #define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */ #define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */ + #define GLB_DEBUG_ARG_IN0 0x0FE0 /* Firmware Debug argument array element 0 */ #define GLB_DEBUG_ARG_IN1 0x0FE4 /* Firmware Debug argument array element 1 */ #define GLB_DEBUG_ARG_IN2 0x0FE8 /* Firmware Debug argument array element 2 */ @@ -265,19 +272,25 @@ /* GLB_VERSION register */ #define GLB_VERSION_PATCH_SHIFT (0) #define GLB_VERSION_PATCH_MASK ((0xFFFF) << GLB_VERSION_PATCH_SHIFT) -#define GLB_VERSION_PATCH_GET(reg_val) (((reg_val)&GLB_VERSION_PATCH_MASK) >> GLB_VERSION_PATCH_SHIFT) -#define GLB_VERSION_PATCH_SET(reg_val, value) \ - (((reg_val) & ~GLB_VERSION_PATCH_MASK) | (((value) << GLB_VERSION_PATCH_SHIFT) & GLB_VERSION_PATCH_MASK)) +#define GLB_VERSION_PATCH_GET(reg_val) \ + (((reg_val)&GLB_VERSION_PATCH_MASK) >> GLB_VERSION_PATCH_SHIFT) +#define GLB_VERSION_PATCH_SET(reg_val, value) \ + (((reg_val) & ~GLB_VERSION_PATCH_MASK) | \ + (((value) << GLB_VERSION_PATCH_SHIFT) & GLB_VERSION_PATCH_MASK)) #define GLB_VERSION_MINOR_SHIFT (16) #define GLB_VERSION_MINOR_MASK ((0xFF) << GLB_VERSION_MINOR_SHIFT) -#define GLB_VERSION_MINOR_GET(reg_val) (((reg_val)&GLB_VERSION_MINOR_MASK) >> GLB_VERSION_MINOR_SHIFT) -#define GLB_VERSION_MINOR_SET(reg_val, value) \ - (((reg_val) & ~GLB_VERSION_MINOR_MASK) | (((value) << GLB_VERSION_MINOR_SHIFT) & GLB_VERSION_MINOR_MASK)) +#define GLB_VERSION_MINOR_GET(reg_val) \ + (((reg_val)&GLB_VERSION_MINOR_MASK) >> GLB_VERSION_MINOR_SHIFT) +#define GLB_VERSION_MINOR_SET(reg_val, value) \ + (((reg_val) & ~GLB_VERSION_MINOR_MASK) | \ + (((value) << GLB_VERSION_MINOR_SHIFT) & GLB_VERSION_MINOR_MASK)) #define GLB_VERSION_MAJOR_SHIFT (24) #define GLB_VERSION_MAJOR_MASK ((0xFF) << GLB_VERSION_MAJOR_SHIFT) -#define GLB_VERSION_MAJOR_GET(reg_val) (((reg_val)&GLB_VERSION_MAJOR_MASK) >> GLB_VERSION_MAJOR_SHIFT) -#define GLB_VERSION_MAJOR_SET(reg_val, value) \ - (((reg_val) & ~GLB_VERSION_MAJOR_MASK) | (((value) << GLB_VERSION_MAJOR_SHIFT) & GLB_VERSION_MAJOR_MASK)) +#define GLB_VERSION_MAJOR_GET(reg_val) \ + (((reg_val)&GLB_VERSION_MAJOR_MASK) >> GLB_VERSION_MAJOR_SHIFT) +#define GLB_VERSION_MAJOR_SET(reg_val, value) \ + (((reg_val) & ~GLB_VERSION_MAJOR_MASK) | \ + (((value) << GLB_VERSION_MAJOR_SHIFT) & GLB_VERSION_MAJOR_MASK)) /* CS_REQ register */ #define CS_REQ_STATE_SHIFT 0 @@ -291,51 +304,60 @@ /* End of CS_REQ_STATE values */ #define CS_REQ_EXTRACT_EVENT_SHIFT 4 #define CS_REQ_EXTRACT_EVENT_MASK (0x1 << CS_REQ_EXTRACT_EVENT_SHIFT) -#define CS_REQ_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT) -#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK)) +#define CS_REQ_EXTRACT_EVENT_GET(reg_val) \ + (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT) +#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | \ + (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK)) #define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8 #define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT) -#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT) -#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) | \ +#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) \ + (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT) +#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) | \ (((value) << CS_REQ_IDLE_SYNC_WAIT_SHIFT) & CS_REQ_IDLE_SYNC_WAIT_MASK)) #define CS_REQ_IDLE_PROTM_PEND_SHIFT 9 #define CS_REQ_IDLE_PROTM_PEND_MASK (0x1 << CS_REQ_IDLE_PROTM_PEND_SHIFT) -#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT) -#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) | \ +#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) \ + (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT) +#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) | \ (((value) << CS_REQ_IDLE_PROTM_PEND_SHIFT) & CS_REQ_IDLE_PROTM_PEND_MASK)) #define CS_REQ_IDLE_EMPTY_SHIFT 10 #define CS_REQ_IDLE_EMPTY_MASK (0x1 << CS_REQ_IDLE_EMPTY_SHIFT) -#define CS_REQ_IDLE_EMPTY_GET(reg_val) (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT) -#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK)) +#define CS_REQ_IDLE_EMPTY_GET(reg_val) \ + (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT) +#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | \ + (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK)) #define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11 #define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) #define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \ - (((reg_val) & CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) -#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \ + (((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) +#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \ (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK)) #define CS_REQ_IDLE_SHARED_SB_DEC_SHIFT 12 #define CS_REQ_IDLE_SHARED_SB_DEC_MASK (0x1 << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) #define CS_REQ_IDLE_SHARED_SB_DEC_GET(reg_val) \ - (((reg_val) & CS_REQ_IDLE_SHARED_SB_DEC_MASK) >> CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) + (((reg_val)&CS_REQ_IDLE_SHARED_SB_DEC_MASK) >> CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) #define CS_REQ_IDLE_SHARED_SB_DEC_REQ_SET(reg_val, value) \ (((reg_val) & ~CS_REQ_IDLE_SHARED_SB_DEC_MASK) | \ (((value) << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) & CS_REQ_IDLE_SHARED_SB_DEC_MASK)) #define CS_REQ_TILER_OOM_SHIFT 26 #define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT) #define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT) -#define CS_REQ_TILER_OOM_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_TILER_OOM_MASK) | (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK)) +#define CS_REQ_TILER_OOM_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_TILER_OOM_MASK) | \ + (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK)) #define CS_REQ_PROTM_PEND_SHIFT 27 #define CS_REQ_PROTM_PEND_MASK (0x1 << CS_REQ_PROTM_PEND_SHIFT) -#define CS_REQ_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT) -#define CS_REQ_PROTM_PEND_SET(reg_val, value) \ - (((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK)) +#define CS_REQ_PROTM_PEND_GET(reg_val) \ + (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT) +#define CS_REQ_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | \ + (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK)) #define CS_REQ_FATAL_SHIFT 30 #define CS_REQ_FATAL_MASK (0x1 << CS_REQ_FATAL_SHIFT) #define CS_REQ_FATAL_GET(reg_val) (((reg_val)&CS_REQ_FATAL_MASK) >> CS_REQ_FATAL_SHIFT) @@ -350,22 +372,26 @@ /* CS_CONFIG register */ #define CS_CONFIG_PRIORITY_SHIFT 0 #define CS_CONFIG_PRIORITY_MASK (0xF << CS_CONFIG_PRIORITY_SHIFT) -#define CS_CONFIG_PRIORITY_GET(reg_val) (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT) -#define CS_CONFIG_PRIORITY_SET(reg_val, value) \ - (((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK)) +#define CS_CONFIG_PRIORITY_GET(reg_val) \ + (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT) +#define CS_CONFIG_PRIORITY_SET(reg_val, value) \ + (((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | \ + (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK)) #define CS_CONFIG_USER_DOORBELL_SHIFT 8 #define CS_CONFIG_USER_DOORBELL_MASK (0xFF << CS_CONFIG_USER_DOORBELL_SHIFT) -#define CS_CONFIG_USER_DOORBELL_GET(reg_val) (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT) -#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \ - (((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) | \ +#define CS_CONFIG_USER_DOORBELL_GET(reg_val) \ + (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT) +#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) | \ (((value) << CS_CONFIG_USER_DOORBELL_SHIFT) & CS_CONFIG_USER_DOORBELL_MASK)) /* CS_ACK_IRQ_MASK register */ #define CS_ACK_IRQ_MASK_STATE_SHIFT 0 #define CS_ACK_IRQ_MASK_STATE_MASK (0x7 << CS_ACK_IRQ_MASK_STATE_SHIFT) -#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT) -#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) | \ +#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) \ + (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT) +#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) | \ (((value) << CS_ACK_IRQ_MASK_STATE_SHIFT) & CS_ACK_IRQ_MASK_STATE_MASK)) /* CS_ACK_IRQ_MASK_STATE values */ #define CS_ACK_IRQ_MASK_STATE_DISABLED 0x0 @@ -375,42 +401,45 @@ #define CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) #define CS_ACK_IRQ_MASK_EXTRACT_EVENT_GET(reg_val) \ (((reg_val)&CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) >> CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) -#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) | \ +#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) | \ (((value) << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) & CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK)) #define CS_ACK_IRQ_MASK_TILER_OOM_SHIFT 26 #define CS_ACK_IRQ_MASK_TILER_OOM_MASK (0x1 << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) #define CS_ACK_IRQ_MASK_TILER_OOM_GET(reg_val) \ (((reg_val)&CS_ACK_IRQ_MASK_TILER_OOM_MASK) >> CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) -#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) | \ +#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) | \ (((value) << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) & CS_ACK_IRQ_MASK_TILER_OOM_MASK)) #define CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT 27 #define CS_ACK_IRQ_MASK_PROTM_PEND_MASK (0x1 << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) #define CS_ACK_IRQ_MASK_PROTM_PEND_GET(reg_val) \ (((reg_val)&CS_ACK_IRQ_MASK_PROTM_PEND_MASK) >> CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) -#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) | \ +#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) | \ (((value) << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) & CS_ACK_IRQ_MASK_PROTM_PEND_MASK)) #define CS_ACK_IRQ_MASK_FATAL_SHIFT 30 #define CS_ACK_IRQ_MASK_FATAL_MASK (0x1 << CS_ACK_IRQ_MASK_FATAL_SHIFT) -#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT) -#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) | \ +#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) \ + (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT) +#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) | \ (((value) << CS_ACK_IRQ_MASK_FATAL_SHIFT) & CS_ACK_IRQ_MASK_FATAL_MASK)) #define CS_ACK_IRQ_MASK_FAULT_SHIFT 31 #define CS_ACK_IRQ_MASK_FAULT_MASK (0x1 << CS_ACK_IRQ_MASK_FAULT_SHIFT) -#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT) -#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) | \ +#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) \ + (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT) +#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) | \ (((value) << CS_ACK_IRQ_MASK_FAULT_SHIFT) & CS_ACK_IRQ_MASK_FAULT_MASK)) /* CS_BASE register */ #define CS_BASE_POINTER_SHIFT 0 #define CS_BASE_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_BASE_POINTER_SHIFT) #define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT) -#define CS_BASE_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK)) +#define CS_BASE_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_BASE_POINTER_MASK) | \ + (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK)) /* CS_SIZE register */ #define CS_SIZE_SIZE_SHIFT 0 @@ -421,24 +450,24 @@ /* CS_TILER_HEAP_START register */ #define CS_TILER_HEAP_START_POINTER_SHIFT 0 -#define CS_TILER_HEAP_START_POINTER_MASK \ +#define CS_TILER_HEAP_START_POINTER_MASK \ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_START_POINTER_SHIFT) #define CS_TILER_HEAP_START_POINTER_GET(reg_val) \ (((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT) -#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) | \ +#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) | \ (((value) << CS_TILER_HEAP_START_POINTER_SHIFT) & CS_TILER_HEAP_START_POINTER_MASK)) /* HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ /* End of HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ /* CS_TILER_HEAP_END register */ #define CS_TILER_HEAP_END_POINTER_SHIFT 0 -#define CS_TILER_HEAP_END_POINTER_MASK \ +#define CS_TILER_HEAP_END_POINTER_MASK \ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_END_POINTER_SHIFT) #define CS_TILER_HEAP_END_POINTER_GET(reg_val) \ (((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT) -#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) | \ +#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) | \ (((value) << CS_TILER_HEAP_END_POINTER_SHIFT) & CS_TILER_HEAP_END_POINTER_MASK)) /* HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ /* End of HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ @@ -446,39 +475,42 @@ /* CS_USER_INPUT register */ #define CS_USER_INPUT_POINTER_SHIFT 0 #define CS_USER_INPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_INPUT_POINTER_SHIFT) -#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT) -#define CS_USER_INPUT_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \ +#define CS_USER_INPUT_POINTER_GET(reg_val) \ + (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT) +#define CS_USER_INPUT_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \ (((value) << CS_USER_INPUT_POINTER_SHIFT) & CS_USER_INPUT_POINTER_MASK)) /* CS_USER_OUTPUT register */ #define CS_USER_OUTPUT_POINTER_SHIFT 0 #define CS_USER_OUTPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_OUTPUT_POINTER_SHIFT) -#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT) -#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \ +#define CS_USER_OUTPUT_POINTER_GET(reg_val) \ + (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT) +#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \ (((value) << CS_USER_OUTPUT_POINTER_SHIFT) & CS_USER_OUTPUT_POINTER_MASK)) /* CS_INSTR_CONFIG register */ #define CS_INSTR_CONFIG_JASID_SHIFT (0) #define CS_INSTR_CONFIG_JASID_MASK ((u32)0xF << CS_INSTR_CONFIG_JASID_SHIFT) -#define CS_INSTR_CONFIG_JASID_GET(reg_val) (((reg_val)&CS_INSTR_CONFIG_JASID_MASK) >> CS_INSTR_CONFIG_JASID_SHIFT) -#define CS_INSTR_CONFIG_JASID_SET(reg_val, value) \ - (((reg_val) & ~CS_INSTR_CONFIG_JASID_MASK) | \ +#define CS_INSTR_CONFIG_JASID_GET(reg_val) \ + (((reg_val)&CS_INSTR_CONFIG_JASID_MASK) >> CS_INSTR_CONFIG_JASID_SHIFT) +#define CS_INSTR_CONFIG_JASID_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_CONFIG_JASID_MASK) | \ (((value) << CS_INSTR_CONFIG_JASID_SHIFT) & CS_INSTR_CONFIG_JASID_MASK)) #define CS_INSTR_CONFIG_EVENT_SIZE_SHIFT (4) #define CS_INSTR_CONFIG_EVENT_SIZE_MASK ((u32)0xF << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) #define CS_INSTR_CONFIG_EVENT_SIZE_GET(reg_val) \ (((reg_val)&CS_INSTR_CONFIG_EVENT_SIZE_MASK) >> CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) -#define CS_INSTR_CONFIG_EVENT_SIZE_SET(reg_val, value) \ - (((reg_val) & ~CS_INSTR_CONFIG_EVENT_SIZE_MASK) | \ +#define CS_INSTR_CONFIG_EVENT_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_CONFIG_EVENT_SIZE_MASK) | \ (((value) << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) & CS_INSTR_CONFIG_EVENT_SIZE_MASK)) #define CS_INSTR_CONFIG_EVENT_STATE_SHIFT (16) #define CS_INSTR_CONFIG_EVENT_STATE_MASK ((u32)0xFF << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) #define CS_INSTR_CONFIG_EVENT_STATE_GET(reg_val) \ (((reg_val)&CS_INSTR_CONFIG_EVENT_STATE_MASK) >> CS_INSTR_CONFIG_EVENT_STATE_SHIFT) -#define CS_INSTR_CONFIG_EVENT_STATE_SET(reg_val, value) \ - (((reg_val) & ~CS_INSTR_CONFIG_EVENT_STATE_MASK) | \ +#define CS_INSTR_CONFIG_EVENT_STATE_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_CONFIG_EVENT_STATE_MASK) | \ (((value) << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) & CS_INSTR_CONFIG_EVENT_STATE_MASK)) /* CS_INSTR_BUFFER_SIZE register */ @@ -486,29 +518,31 @@ #define CS_INSTR_BUFFER_SIZE_SIZE_MASK ((u32)0xFFFFFFFF << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) #define CS_INSTR_BUFFER_SIZE_SIZE_GET(reg_val) \ (((reg_val)&CS_INSTR_BUFFER_SIZE_SIZE_MASK) >> CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) -#define CS_INSTR_BUFFER_SIZE_SIZE_SET(reg_val, value) \ - (((reg_val) & ~CS_INSTR_BUFFER_SIZE_SIZE_MASK) | \ +#define CS_INSTR_BUFFER_SIZE_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_BUFFER_SIZE_SIZE_MASK) | \ (((value) << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) & CS_INSTR_BUFFER_SIZE_SIZE_MASK)) /* CS_INSTR_BUFFER_BASE register */ #define CS_INSTR_BUFFER_BASE_POINTER_SHIFT (0) -#define CS_INSTR_BUFFER_BASE_POINTER_MASK \ +#define CS_INSTR_BUFFER_BASE_POINTER_MASK \ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) #define CS_INSTR_BUFFER_BASE_POINTER_GET(reg_val) \ (((reg_val)&CS_INSTR_BUFFER_BASE_POINTER_MASK) >> CS_INSTR_BUFFER_BASE_POINTER_SHIFT) -#define CS_INSTR_BUFFER_BASE_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_INSTR_BUFFER_BASE_POINTER_MASK) | \ +#define CS_INSTR_BUFFER_BASE_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_BUFFER_BASE_POINTER_MASK) | \ (((value) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) & CS_INSTR_BUFFER_BASE_POINTER_MASK)) /* CS_INSTR_BUFFER_OFFSET_POINTER register */ #define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0) -#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \ +#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \ ((GPU_ULL(0xFFFFFFFFFFFFFFFF)) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) -#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \ - (((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) -#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) | \ - (((value) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) & CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK)) +#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \ + (((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> \ + CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) +#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) | \ + (((value) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) & \ + CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK)) /* End of CS_KERNEL_INPUT_BLOCK register set definitions */ @@ -526,19 +560,24 @@ /* End of CS_ACK_STATE values */ #define CS_ACK_EXTRACT_EVENT_SHIFT 4 #define CS_ACK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_EXTRACT_EVENT_SHIFT) -#define CS_ACK_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT) -#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK)) +#define CS_ACK_EXTRACT_EVENT_GET(reg_val) \ + (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT) +#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | \ + (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK)) #define CS_ACK_TILER_OOM_SHIFT 26 #define CS_ACK_TILER_OOM_MASK (0x1 << CS_ACK_TILER_OOM_SHIFT) #define CS_ACK_TILER_OOM_GET(reg_val) (((reg_val)&CS_ACK_TILER_OOM_MASK) >> CS_ACK_TILER_OOM_SHIFT) -#define CS_ACK_TILER_OOM_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_TILER_OOM_MASK) | (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK)) +#define CS_ACK_TILER_OOM_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_TILER_OOM_MASK) | \ + (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK)) #define CS_ACK_PROTM_PEND_SHIFT 27 #define CS_ACK_PROTM_PEND_MASK (0x1 << CS_ACK_PROTM_PEND_SHIFT) -#define CS_ACK_PROTM_PEND_GET(reg_val) (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT) -#define CS_ACK_PROTM_PEND_SET(reg_val, value) \ - (((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK)) +#define CS_ACK_PROTM_PEND_GET(reg_val) \ + (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT) +#define CS_ACK_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | \ + (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK)) #define CS_ACK_FATAL_SHIFT 30 #define CS_ACK_FATAL_MASK (0x1 << CS_ACK_FATAL_SHIFT) #define CS_ACK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_FATAL_MASK) >> CS_ACK_FATAL_SHIFT) @@ -552,144 +591,160 @@ /* CS_STATUS_CMD_PTR register */ #define CS_STATUS_CMD_PTR_POINTER_SHIFT 0 -#define CS_STATUS_CMD_PTR_POINTER_MASK \ +#define CS_STATUS_CMD_PTR_POINTER_MASK \ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_CMD_PTR_POINTER_SHIFT) #define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \ (((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT) -#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) | \ +#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) | \ (((value) << CS_STATUS_CMD_PTR_POINTER_SHIFT) & CS_STATUS_CMD_PTR_POINTER_MASK)) /* CS_STATUS_WAIT register */ #define CS_STATUS_WAIT_SB_MASK_SHIFT 0 #define CS_STATUS_WAIT_SB_MASK_MASK (0xFFFF << CS_STATUS_WAIT_SB_MASK_SHIFT) -#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT) -#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \ +#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT) +#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \ (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK)) #define CS_STATUS_WAIT_SB_SOURCE_SHIFT 16 #define CS_STATUS_WAIT_SB_SOURCE_MASK (0xF << CS_STATUS_WAIT_SB_SOURCE_SHIFT) -#define CS_STATUS_WAIT_SB_SOURCE_GET(reg_val) \ +#define CS_STATUS_WAIT_SB_SOURCE_GET(reg_val) \ (((reg_val)&CS_STATUS_WAIT_SB_SOURCE_MASK) >> CS_STATUS_WAIT_SB_SOURCE_SHIFT) -#define CS_STATUS_WAIT_SB_SOURCE_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SB_SOURCE_MASK) | \ +#define CS_STATUS_WAIT_SB_SOURCE_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SB_SOURCE_MASK) | \ (((value) << CS_STATUS_WAIT_SB_SOURCE_SHIFT) & CS_STATUS_WAIT_SB_SOURCE_MASK)) #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24 #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) -#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \ - (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) -#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) | \ - (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK)) +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> \ + CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & \ + CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK)) /* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0 #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1 #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE 0x5 /* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ +/* PROGRESS_WAIT is only for before v14.x.4 */ #define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28 #define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) #define CS_STATUS_WAIT_PROGRESS_WAIT_GET(reg_val) \ (((reg_val)&CS_STATUS_WAIT_PROGRESS_WAIT_MASK) >> CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) -#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) | \ +#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) | \ (((value) << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) & CS_STATUS_WAIT_PROGRESS_WAIT_MASK)) #define CS_STATUS_WAIT_PROTM_PEND_SHIFT 29 #define CS_STATUS_WAIT_PROTM_PEND_MASK (0x1 << CS_STATUS_WAIT_PROTM_PEND_SHIFT) #define CS_STATUS_WAIT_PROTM_PEND_GET(reg_val) \ (((reg_val)&CS_STATUS_WAIT_PROTM_PEND_MASK) >> CS_STATUS_WAIT_PROTM_PEND_SHIFT) -#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \ +#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \ (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK)) #define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT 30 #define CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) -#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(reg_val) \ +#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(reg_val) \ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) -#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) | \ +#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) | \ (((value) << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK)) #define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31 #define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) #define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SHIFT) -#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) | \ +#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) | \ (((value) << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_MASK)) /* CS_STATUS_REQ_RESOURCE register */ #define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT 0 -#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \ - (((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) | \ - (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK)) +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK \ + (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> \ + CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & \ + CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK)) #define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT 1 -#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \ - (((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) | \ - (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK)) +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK \ + (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> \ + CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & \ + CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK)) #define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT 2 -#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \ - (((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) | \ - (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK)) +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK \ + (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> \ + CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & \ + CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK)) #define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT 3 -#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \ - (((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) -#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \ - (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK)) +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK \ + (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> \ + CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & \ + CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK)) + /* CS_STATUS_WAIT_SYNC_POINTER register */ #define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0 -#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK \ +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK \ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) -#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \ - (((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) -#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) | \ - (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK)) +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> \ + CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & \ + CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK)) /* CS_STATUS_WAIT_SYNC_VALUE register */ #define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT 0 #define CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK (0xFFFFFFFF << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) #define CS_STATUS_WAIT_SYNC_VALUE_VALUE_GET(reg_val) \ (((reg_val)&CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) >> CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) -#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) | \ - (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK)) +#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & \ + CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK)) /* CS_STATUS_SCOREBOARDS register */ #define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0) -#define CS_STATUS_SCOREBOARDS_NONZERO_MASK \ - ((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) -#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val) \ - (((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >> \ - CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) -#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) | \ - (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) & \ - CS_STATUS_SCOREBOARDS_NONZERO_MASK)) +#define CS_STATUS_SCOREBOARDS_NONZERO_MASK ((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) +#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val) \ + (((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >> CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) +#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) | \ + (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) & CS_STATUS_SCOREBOARDS_NONZERO_MASK)) /* CS_STATUS_BLOCKED_REASON register */ #define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0) -#define CS_STATUS_BLOCKED_REASON_REASON_MASK \ - ((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) -#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val) \ - (((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >> \ - CS_STATUS_BLOCKED_REASON_REASON_SHIFT) -#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value) \ - (((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) | \ - (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) & \ +#define CS_STATUS_BLOCKED_REASON_REASON_MASK ((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) +#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val) \ + (((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >> CS_STATUS_BLOCKED_REASON_REASON_SHIFT) +#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) | \ + (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) & \ CS_STATUS_BLOCKED_REASON_REASON_MASK)) /* CS_STATUS_BLOCKED_REASON_reason values */ #define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0 #define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1 +/* PROGRESS_WAIT is only for before v14.x.4 */ #define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2 #define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3 #define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4 @@ -700,9 +755,10 @@ /* CS_FAULT register */ #define CS_FAULT_EXCEPTION_TYPE_SHIFT 0 #define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT) -#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT) -#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \ - (((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \ +#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT) +#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \ + (((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \ (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK)) /* CS_FAULT_EXCEPTION_TYPE values */ #define CS_FAULT_EXCEPTION_TYPE_KABOOM 0x05 @@ -741,17 +797,19 @@ /* End of CS_FAULT_EXCEPTION_TYPE values */ #define CS_FAULT_EXCEPTION_DATA_SHIFT 8 #define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT) -#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT) -#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \ - (((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) | \ +#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) \ + (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT) +#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) | \ (((value) << CS_FAULT_EXCEPTION_DATA_SHIFT) & CS_FAULT_EXCEPTION_DATA_MASK)) /* CS_FATAL register */ #define CS_FATAL_EXCEPTION_TYPE_SHIFT 0 #define CS_FATAL_EXCEPTION_TYPE_MASK (0xFF << CS_FATAL_EXCEPTION_TYPE_SHIFT) -#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT) -#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \ - (((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) | \ +#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT) +#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \ + (((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) | \ (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK)) /* CS_FATAL_EXCEPTION_TYPE values */ #define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40 @@ -764,60 +822,66 @@ /* End of CS_FATAL_EXCEPTION_TYPE values */ #define CS_FATAL_EXCEPTION_DATA_SHIFT 8 #define CS_FATAL_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FATAL_EXCEPTION_DATA_SHIFT) -#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT) -#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \ - (((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) | \ +#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) \ + (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT) +#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) | \ (((value) << CS_FATAL_EXCEPTION_DATA_SHIFT) & CS_FATAL_EXCEPTION_DATA_MASK)) /* CS_FAULT_INFO register */ #define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0 -#define CS_FAULT_INFO_EXCEPTION_DATA_MASK \ +#define CS_FAULT_INFO_EXCEPTION_DATA_MASK \ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) #define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \ (((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) -#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \ - (((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) | \ +#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) | \ (((value) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) & CS_FAULT_INFO_EXCEPTION_DATA_MASK)) /* CS_FATAL_INFO register */ #define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0 -#define CS_FATAL_INFO_EXCEPTION_DATA_MASK \ +#define CS_FATAL_INFO_EXCEPTION_DATA_MASK \ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) #define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \ (((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) -#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \ - (((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) | \ +#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) | \ (((value) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) & CS_FATAL_INFO_EXCEPTION_DATA_MASK)) /* CS_HEAP_VT_START register */ #define CS_HEAP_VT_START_VALUE_SHIFT 0 #define CS_HEAP_VT_START_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_START_VALUE_SHIFT) -#define CS_HEAP_VT_START_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT) -#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) | \ +#define CS_HEAP_VT_START_VALUE_GET(reg_val) \ + (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT) +#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) | \ (((value) << CS_HEAP_VT_START_VALUE_SHIFT) & CS_HEAP_VT_START_VALUE_MASK)) /* CS_HEAP_VT_END register */ #define CS_HEAP_VT_END_VALUE_SHIFT 0 #define CS_HEAP_VT_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_END_VALUE_SHIFT) -#define CS_HEAP_VT_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT) -#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK)) +#define CS_HEAP_VT_END_VALUE_GET(reg_val) \ + (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT) +#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | \ + (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK)) /* CS_HEAP_FRAG_END register */ #define CS_HEAP_FRAG_END_VALUE_SHIFT 0 #define CS_HEAP_FRAG_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_FRAG_END_VALUE_SHIFT) -#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT) -#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) | \ +#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) \ + (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT) +#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) | \ (((value) << CS_HEAP_FRAG_END_VALUE_SHIFT) & CS_HEAP_FRAG_END_VALUE_MASK)) /* CS_HEAP_ADDRESS register */ #define CS_HEAP_ADDRESS_POINTER_SHIFT 0 #define CS_HEAP_ADDRESS_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_HEAP_ADDRESS_POINTER_SHIFT) -#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT) -#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \ +#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) \ + (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT) +#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \ (((value) << CS_HEAP_ADDRESS_POINTER_SHIFT) & CS_HEAP_ADDRESS_POINTER_MASK)) /* End of CS_KERNEL_OUTPUT_BLOCK register set definitions */ @@ -827,15 +891,17 @@ #define CS_INSERT_VALUE_SHIFT 0 #define CS_INSERT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSERT_VALUE_SHIFT) #define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT) -#define CS_INSERT_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK)) +#define CS_INSERT_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_INSERT_VALUE_MASK) | \ + (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK)) /* CS_EXTRACT_INIT register */ #define CS_EXTRACT_INIT_VALUE_SHIFT 0 #define CS_EXTRACT_INIT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_INIT_VALUE_SHIFT) -#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT) -#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \ +#define CS_EXTRACT_INIT_VALUE_GET(reg_val) \ + (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT) +#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \ (((value) << CS_EXTRACT_INIT_VALUE_SHIFT) & CS_EXTRACT_INIT_VALUE_MASK)) /* End of CS_USER_INPUT_BLOCK register set definitions */ @@ -845,15 +911,18 @@ #define CS_EXTRACT_VALUE_SHIFT 0 #define CS_EXTRACT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_VALUE_SHIFT) #define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT) -#define CS_EXTRACT_VALUE_SET(reg_val, value) \ - (((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK)) +#define CS_EXTRACT_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_EXTRACT_VALUE_MASK) | \ + (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK)) /* CS_ACTIVE register */ #define CS_ACTIVE_HW_ACTIVE_SHIFT 0 #define CS_ACTIVE_HW_ACTIVE_MASK (0x1 << CS_ACTIVE_HW_ACTIVE_SHIFT) -#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT) -#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \ - (((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK)) +#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) \ + (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT) +#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \ + (((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | \ + (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK)) /* End of CS_USER_OUTPUT_BLOCK register set definitions */ /* CSG_INPUT_BLOCK register set definitions */ @@ -862,8 +931,9 @@ #define CSG_REQ_STATE_SHIFT 0 #define CSG_REQ_STATE_MASK (0x7 << CSG_REQ_STATE_SHIFT) #define CSG_REQ_STATE_GET(reg_val) (((reg_val)&CSG_REQ_STATE_MASK) >> CSG_REQ_STATE_SHIFT) -#define CSG_REQ_STATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_STATE_MASK) | (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK)) +#define CSG_REQ_STATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_STATE_MASK) | \ + (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK)) /* CSG_REQ_STATE values */ #define CSG_REQ_STATE_TERMINATE 0x0 #define CSG_REQ_STATE_START 0x1 @@ -873,19 +943,23 @@ #define CSG_REQ_EP_CFG_SHIFT 4 #define CSG_REQ_EP_CFG_MASK (0x1 << CSG_REQ_EP_CFG_SHIFT) #define CSG_REQ_EP_CFG_GET(reg_val) (((reg_val)&CSG_REQ_EP_CFG_MASK) >> CSG_REQ_EP_CFG_SHIFT) -#define CSG_REQ_EP_CFG_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_EP_CFG_MASK) | (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK)) +#define CSG_REQ_EP_CFG_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_EP_CFG_MASK) | \ + (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK)) #define CSG_REQ_STATUS_UPDATE_SHIFT 5 #define CSG_REQ_STATUS_UPDATE_MASK (0x1 << CSG_REQ_STATUS_UPDATE_SHIFT) -#define CSG_REQ_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT) -#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) | \ +#define CSG_REQ_STATUS_UPDATE_GET(reg_val) \ + (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT) +#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) | \ (((value) << CSG_REQ_STATUS_UPDATE_SHIFT) & CSG_REQ_STATUS_UPDATE_MASK)) #define CSG_REQ_SYNC_UPDATE_SHIFT 28 #define CSG_REQ_SYNC_UPDATE_MASK (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT) -#define CSG_REQ_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT) -#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK)) +#define CSG_REQ_SYNC_UPDATE_GET(reg_val) \ + (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT) +#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | \ + (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK)) #define CSG_REQ_IDLE_SHIFT 29 #define CSG_REQ_IDLE_MASK (0x1 << CSG_REQ_IDLE_SHIFT) #define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT) @@ -895,16 +969,17 @@ #define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) #define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \ (((reg_val)&CSG_REQ_PROGRESS_TIMER_EVENT_MASK) >> CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) -#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) | \ +#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) | \ (((value) << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK)) /* CSG_ACK_IRQ_MASK register */ #define CSG_ACK_IRQ_MASK_STATE_SHIFT 0 #define CSG_ACK_IRQ_MASK_STATE_MASK (0x7 << CSG_ACK_IRQ_MASK_STATE_SHIFT) -#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT) -#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) | \ +#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT) +#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) | \ (((value) << CSG_ACK_IRQ_MASK_STATE_SHIFT) & CSG_ACK_IRQ_MASK_STATE_MASK)) /* CSG_ACK_IRQ_MASK_STATE values */ #define CSG_ACK_IRQ_MASK_STATE_DISABLED 0x0 @@ -912,115 +987,132 @@ /* End of CSG_ACK_IRQ_MASK_STATE values */ #define CSG_ACK_IRQ_MASK_EP_CFG_SHIFT 4 #define CSG_ACK_IRQ_MASK_EP_CFG_MASK (0x1 << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) -#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) -#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) | \ +#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) +#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) | \ (((value) << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) & CSG_ACK_IRQ_MASK_EP_CFG_MASK)) #define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT 5 #define CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) #define CSG_ACK_IRQ_MASK_STATUS_UPDATE_GET(reg_val) \ (((reg_val)&CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) -#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) | \ - (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK)) +#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & \ + CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK)) #define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT 28 #define CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) #define CSG_ACK_IRQ_MASK_SYNC_UPDATE_GET(reg_val) \ (((reg_val)&CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) -#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) | \ +#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) | \ (((value) << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK)) #define CSG_ACK_IRQ_MASK_IDLE_SHIFT 29 #define CSG_ACK_IRQ_MASK_IDLE_MASK (0x1 << CSG_ACK_IRQ_MASK_IDLE_SHIFT) -#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT) -#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) | \ +#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT) +#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) | \ (((value) << CSG_ACK_IRQ_MASK_IDLE_SHIFT) & CSG_ACK_IRQ_MASK_IDLE_MASK)) #define CSG_ACK_IRQ_MASK_DOORBELL_SHIFT 30 #define CSG_ACK_IRQ_MASK_DOORBELL_MASK (0x1 << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) #define CSG_ACK_IRQ_MASK_DOORBELL_GET(reg_val) \ (((reg_val)&CSG_ACK_IRQ_MASK_DOORBELL_MASK) >> CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) -#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) | \ +#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) | \ (((value) << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) & CSG_ACK_IRQ_MASK_DOORBELL_MASK)) #define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT 31 -#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) -#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \ - (((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) -#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) | \ - (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK)) +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK \ + (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> \ + CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & \ + CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK)) /* CSG_EP_REQ register */ #define CSG_EP_REQ_COMPUTE_EP_SHIFT 0 -#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT) -#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT) -#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \ - (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK)) +#define CSG_EP_REQ_COMPUTE_EP_MASK ((u64)0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) \ + (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \ + ((((u64)value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK)) #define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8 -#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT) -#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT) -#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \ - (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK)) +#define CSG_EP_REQ_FRAGMENT_EP_MASK ((u64)0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) \ + (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \ + ((((u64)value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK)) #define CSG_EP_REQ_TILER_EP_SHIFT 16 -#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT) -#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT) -#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK)) +#define CSG_EP_REQ_TILER_EP_MASK ((u64)0xF << CSG_EP_REQ_TILER_EP_SHIFT) +#define CSG_EP_REQ_TILER_EP_GET(reg_val) \ + (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT) +#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | \ + ((((u64)value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK)) #define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 -#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK ((u64)0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) #define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ (((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ - (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ + ((((u64)value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & \ + CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) #define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 -#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK ((u64)0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) #define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ (((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ - (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ + ((((u64)value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & \ + CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) #define CSG_EP_REQ_PRIORITY_SHIFT 28 -#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT) -#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT) -#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK)) +#define CSG_EP_REQ_PRIORITY_MASK ((u64)0xF << CSG_EP_REQ_PRIORITY_SHIFT) +#define CSG_EP_REQ_PRIORITY_GET(reg_val) \ + (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT) +#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | \ + ((((u64)value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK)) + /* CSG_SUSPEND_BUF register */ #define CSG_SUSPEND_BUF_POINTER_SHIFT 0 #define CSG_SUSPEND_BUF_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_SUSPEND_BUF_POINTER_SHIFT) -#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT) -#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \ +#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) \ + (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \ (((value) << CSG_SUSPEND_BUF_POINTER_SHIFT) & CSG_SUSPEND_BUF_POINTER_MASK)) /* CSG_PROTM_SUSPEND_BUF register */ #define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0 -#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK \ +#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK \ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) #define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \ (((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) -#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ +#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK)) /* CSG_DVS_BUF_BUFFER register */ #define CSG_DVS_BUF_BUFFER_SIZE_SHIFT GPU_U(0) #define CSG_DVS_BUF_BUFFER_SIZE_MASK (GPU_U(0xFFF) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) -#define CSG_DVS_BUF_BUFFER_SIZE_GET(reg_val) (((reg_val)&CSG_DVS_BUF_BUFFER_SIZE_MASK) >> CSG_DVS_BUF_BUFFER_SIZE_SHIFT) -#define CSG_DVS_BUF_BUFFER_SIZE_SET(reg_val, value) \ - (((reg_val) & ~CSG_DVS_BUF_BUFFER_SIZE_MASK) | \ +#define CSG_DVS_BUF_BUFFER_SIZE_GET(reg_val) \ + (((reg_val)&CSG_DVS_BUF_BUFFER_SIZE_MASK) >> CSG_DVS_BUF_BUFFER_SIZE_SHIFT) +#define CSG_DVS_BUF_BUFFER_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CSG_DVS_BUF_BUFFER_SIZE_MASK) | \ (((value) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) & CSG_DVS_BUF_BUFFER_SIZE_MASK)) #define CSG_DVS_BUF_BUFFER_POINTER_SHIFT GPU_U(12) -#define CSG_DVS_BUF_BUFFER_POINTER_MASK \ +#define CSG_DVS_BUF_BUFFER_POINTER_MASK \ (GPU_ULL(0xFFFFFFFFFFFFF) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) #define CSG_DVS_BUF_BUFFER_POINTER_GET(reg_val) \ (((reg_val)&CSG_DVS_BUF_BUFFER_POINTER_MASK) >> CSG_DVS_BUF_BUFFER_POINTER_SHIFT) -#define CSG_DVS_BUF_BUFFER_POINTER_SET(reg_val, value) \ - (((reg_val) & ~CSG_DVS_BUF_BUFFER_POINTER_MASK) | \ +#define CSG_DVS_BUF_BUFFER_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CSG_DVS_BUF_BUFFER_POINTER_MASK) | \ (((value) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) & CSG_DVS_BUF_BUFFER_POINTER_MASK)) /* End of CSG_INPUT_BLOCK register set definitions */ @@ -1031,8 +1123,9 @@ #define CSG_ACK_STATE_SHIFT 0 #define CSG_ACK_STATE_MASK (0x7 << CSG_ACK_STATE_SHIFT) #define CSG_ACK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_STATE_MASK) >> CSG_ACK_STATE_SHIFT) -#define CSG_ACK_STATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_STATE_MASK) | (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK)) +#define CSG_ACK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_STATE_MASK) | \ + (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK)) /* CSG_ACK_STATE values */ #define CSG_ACK_STATE_TERMINATE 0x0 #define CSG_ACK_STATE_START 0x1 @@ -1042,19 +1135,23 @@ #define CSG_ACK_EP_CFG_SHIFT 4 #define CSG_ACK_EP_CFG_MASK (0x1 << CSG_ACK_EP_CFG_SHIFT) #define CSG_ACK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_EP_CFG_MASK) >> CSG_ACK_EP_CFG_SHIFT) -#define CSG_ACK_EP_CFG_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_EP_CFG_MASK) | (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK)) +#define CSG_ACK_EP_CFG_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_EP_CFG_MASK) | \ + (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK)) #define CSG_ACK_STATUS_UPDATE_SHIFT 5 #define CSG_ACK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_STATUS_UPDATE_SHIFT) -#define CSG_ACK_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT) -#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) | \ +#define CSG_ACK_STATUS_UPDATE_GET(reg_val) \ + (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT) +#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) | \ (((value) << CSG_ACK_STATUS_UPDATE_SHIFT) & CSG_ACK_STATUS_UPDATE_MASK)) #define CSG_ACK_SYNC_UPDATE_SHIFT 28 #define CSG_ACK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_SYNC_UPDATE_SHIFT) -#define CSG_ACK_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT) -#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK)) +#define CSG_ACK_SYNC_UPDATE_GET(reg_val) \ + (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT) +#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | \ + (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK)) #define CSG_ACK_IDLE_SHIFT 29 #define CSG_ACK_IDLE_MASK (0x1 << CSG_ACK_IDLE_SHIFT) #define CSG_ACK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IDLE_MASK) >> CSG_ACK_IDLE_SHIFT) @@ -1063,75 +1160,88 @@ #define CSG_ACK_DOORBELL_SHIFT 30 #define CSG_ACK_DOORBELL_MASK (0x1 << CSG_ACK_DOORBELL_SHIFT) #define CSG_ACK_DOORBELL_GET(reg_val) (((reg_val)&CSG_ACK_DOORBELL_MASK) >> CSG_ACK_DOORBELL_SHIFT) -#define CSG_ACK_DOORBELL_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_DOORBELL_MASK) | (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK)) +#define CSG_ACK_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_DOORBELL_MASK) | \ + (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK)) #define CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT 31 #define CSG_ACK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) #define CSG_ACK_PROGRESS_TIMER_EVENT_GET(reg_val) \ (((reg_val)&CSG_ACK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) -#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) | \ +#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) | \ (((value) << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_PROGRESS_TIMER_EVENT_MASK)) /* CSG_STATUS_EP_CURRENT register */ #define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT 0 #define CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) | \ - (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK)) +#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> \ + CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) | \ + (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & \ + CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK)) #define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT 8 #define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) | \ - (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK)) +#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> \ + CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) | \ + (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & \ + CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK)) #define CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT 16 #define CSG_STATUS_EP_CURRENT_TILER_EP_MASK (0xF << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) #define CSG_STATUS_EP_CURRENT_TILER_EP_GET(reg_val) \ (((reg_val)&CSG_STATUS_EP_CURRENT_TILER_EP_MASK) >> CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) -#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \ - (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK)) +#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \ + (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & \ + CSG_STATUS_EP_CURRENT_TILER_EP_MASK)) + /* CSG_STATUS_EP_REQ register */ #define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0 #define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) #define CSG_STATUS_EP_REQ_COMPUTE_EP_GET(reg_val) \ (((reg_val)&CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) >> CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) -#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) | \ +#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) | \ (((value) << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_REQ_COMPUTE_EP_MASK)) #define CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT 8 #define CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) #define CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(reg_val) \ (((reg_val)&CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) -#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) | \ +#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) | \ (((value) << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK)) #define CSG_STATUS_EP_REQ_TILER_EP_SHIFT 16 #define CSG_STATUS_EP_REQ_TILER_EP_MASK (0xF << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) #define CSG_STATUS_EP_REQ_TILER_EP_GET(reg_val) \ (((reg_val)&CSG_STATUS_EP_REQ_TILER_EP_MASK) >> CSG_STATUS_EP_REQ_TILER_EP_SHIFT) -#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) | \ +#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) | \ (((value) << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) & CSG_STATUS_EP_REQ_TILER_EP_MASK)) #define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 #define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ - (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> \ + CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & \ + CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) #define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 -#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ - (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ - (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK \ + (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> \ + CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & \ + CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) + /* End of CSG_OUTPUT_BLOCK register set definitions */ @@ -1142,50 +1252,55 @@ #define STREAM_FEATURES_WORK_REGISTERS_MASK (0xFF << STREAM_FEATURES_WORK_REGISTERS_SHIFT) #define STREAM_FEATURES_WORK_REGISTERS_GET(reg_val) \ (((reg_val)&STREAM_FEATURES_WORK_REGISTERS_MASK) >> STREAM_FEATURES_WORK_REGISTERS_SHIFT) -#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \ - (((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) | \ - (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & STREAM_FEATURES_WORK_REGISTERS_MASK)) +#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) | \ + (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & \ + STREAM_FEATURES_WORK_REGISTERS_MASK)) #define STREAM_FEATURES_SCOREBOARDS_SHIFT 8 #define STREAM_FEATURES_SCOREBOARDS_MASK (0xFF << STREAM_FEATURES_SCOREBOARDS_SHIFT) #define STREAM_FEATURES_SCOREBOARDS_GET(reg_val) \ (((reg_val)&STREAM_FEATURES_SCOREBOARDS_MASK) >> STREAM_FEATURES_SCOREBOARDS_SHIFT) -#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \ - (((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) | \ +#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) | \ (((value) << STREAM_FEATURES_SCOREBOARDS_SHIFT) & STREAM_FEATURES_SCOREBOARDS_MASK)) #define STREAM_FEATURES_COMPUTE_SHIFT 16 #define STREAM_FEATURES_COMPUTE_MASK (0x1 << STREAM_FEATURES_COMPUTE_SHIFT) -#define STREAM_FEATURES_COMPUTE_GET(reg_val) (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT) -#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) | \ +#define STREAM_FEATURES_COMPUTE_GET(reg_val) \ + (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT) +#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) | \ (((value) << STREAM_FEATURES_COMPUTE_SHIFT) & STREAM_FEATURES_COMPUTE_MASK)) #define STREAM_FEATURES_FRAGMENT_SHIFT 17 #define STREAM_FEATURES_FRAGMENT_MASK (0x1 << STREAM_FEATURES_FRAGMENT_SHIFT) #define STREAM_FEATURES_FRAGMENT_GET(reg_val) \ (((reg_val)&STREAM_FEATURES_FRAGMENT_MASK) >> STREAM_FEATURES_FRAGMENT_SHIFT) -#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) | \ +#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) | \ (((value) << STREAM_FEATURES_FRAGMENT_SHIFT) & STREAM_FEATURES_FRAGMENT_MASK)) #define STREAM_FEATURES_TILER_SHIFT 18 #define STREAM_FEATURES_TILER_MASK (0x1 << STREAM_FEATURES_TILER_SHIFT) -#define STREAM_FEATURES_TILER_GET(reg_val) (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT) -#define STREAM_FEATURES_TILER_SET(reg_val, value) \ - (((reg_val) & ~STREAM_FEATURES_TILER_MASK) | \ +#define STREAM_FEATURES_TILER_GET(reg_val) \ + (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT) +#define STREAM_FEATURES_TILER_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_TILER_MASK) | \ (((value) << STREAM_FEATURES_TILER_SHIFT) & STREAM_FEATURES_TILER_MASK)) /* STREAM_INPUT_VA register */ #define STREAM_INPUT_VA_VALUE_SHIFT 0 #define STREAM_INPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_INPUT_VA_VALUE_SHIFT) -#define STREAM_INPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT) -#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \ - (((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) | \ +#define STREAM_INPUT_VA_VALUE_GET(reg_val) \ + (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT) +#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \ + (((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) | \ (((value) << STREAM_INPUT_VA_VALUE_SHIFT) & STREAM_INPUT_VA_VALUE_MASK)) /* STREAM_OUTPUT_VA register */ #define STREAM_OUTPUT_VA_VALUE_SHIFT 0 #define STREAM_OUTPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_OUTPUT_VA_VALUE_SHIFT) -#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT) -#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \ - (((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) | \ +#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) \ + (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT) +#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \ + (((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) | \ (((value) << STREAM_OUTPUT_VA_VALUE_SHIFT) & STREAM_OUTPUT_VA_VALUE_MASK)) /* End of STREAM_CONTROL_BLOCK register set definitions */ @@ -1201,43 +1316,50 @@ #define GLB_REQ_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) #define GLB_REQ_CFG_PROGRESS_TIMER_GET(reg_val) \ (((reg_val)&GLB_REQ_CFG_PROGRESS_TIMER_MASK) >> GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) -#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) | \ +#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) | \ (((value) << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) & GLB_REQ_CFG_PROGRESS_TIMER_MASK)) #define GLB_REQ_CFG_ALLOC_EN_SHIFT 2 #define GLB_REQ_CFG_ALLOC_EN_MASK (0x1 << GLB_REQ_CFG_ALLOC_EN_SHIFT) -#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT) -#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK)) +#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) \ + (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT) +#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | \ + (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK)) #define GLB_REQ_CFG_PWROFF_TIMER_SHIFT 3 #define GLB_REQ_CFG_PWROFF_TIMER_MASK (0x1 << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) #define GLB_REQ_CFG_PWROFF_TIMER_GET(reg_val) \ (((reg_val)&GLB_REQ_CFG_PWROFF_TIMER_MASK) >> GLB_REQ_CFG_PWROFF_TIMER_SHIFT) -#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) | \ +#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) | \ (((value) << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) & GLB_REQ_CFG_PWROFF_TIMER_MASK)) #define GLB_REQ_PROTM_ENTER_SHIFT 4 #define GLB_REQ_PROTM_ENTER_MASK (0x1 << GLB_REQ_PROTM_ENTER_SHIFT) -#define GLB_REQ_PROTM_ENTER_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT) -#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK)) +#define GLB_REQ_PROTM_ENTER_GET(reg_val) \ + (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT) +#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | \ + (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK)) #define GLB_REQ_PRFCNT_ENABLE_SHIFT 5 #define GLB_REQ_PRFCNT_ENABLE_MASK (0x1 << GLB_REQ_PRFCNT_ENABLE_SHIFT) -#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT) -#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) | \ +#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) \ + (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT) +#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) | \ (((value) << GLB_REQ_PRFCNT_ENABLE_SHIFT) & GLB_REQ_PRFCNT_ENABLE_MASK)) #define GLB_REQ_PRFCNT_SAMPLE_SHIFT 6 #define GLB_REQ_PRFCNT_SAMPLE_MASK (0x1 << GLB_REQ_PRFCNT_SAMPLE_SHIFT) -#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT) -#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) | \ +#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) \ + (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT) +#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) | \ (((value) << GLB_REQ_PRFCNT_SAMPLE_SHIFT) & GLB_REQ_PRFCNT_SAMPLE_MASK)) #define GLB_REQ_COUNTER_ENABLE_SHIFT 7 #define GLB_REQ_COUNTER_ENABLE_MASK (0x1 << GLB_REQ_COUNTER_ENABLE_SHIFT) -#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT) -#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) | \ +#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) \ + (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT) +#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) | \ (((value) << GLB_REQ_COUNTER_ENABLE_SHIFT) & GLB_REQ_COUNTER_ENABLE_MASK)) #define GLB_REQ_PING_SHIFT 8 #define GLB_REQ_PING_MASK (0x1 << GLB_REQ_PING_SHIFT) @@ -1245,195 +1367,205 @@ #define GLB_REQ_PING_SET(reg_val, value) \ (((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK)) #define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9 -#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK \ - (0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) -#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ - (((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >> \ - GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) -#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \ - (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK (0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ + (((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >> GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \ + (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK)) #define GLB_REQ_SLEEP_SHIFT 12 #define GLB_REQ_SLEEP_MASK (0x1 << GLB_REQ_SLEEP_SHIFT) -#define GLB_REQ_SLEEP_GET(reg_val) \ - (((reg_val) & GLB_REQ_SLEEP_MASK) >> GLB_REQ_SLEEP_SHIFT) -#define GLB_REQ_SLEEP_SET(reg_val, value) \ +#define GLB_REQ_SLEEP_GET(reg_val) (((reg_val)&GLB_REQ_SLEEP_MASK) >> GLB_REQ_SLEEP_SHIFT) +#define GLB_REQ_SLEEP_SET(reg_val, value) \ (((reg_val) & ~GLB_REQ_SLEEP_MASK) | \ (((value) << GLB_REQ_SLEEP_SHIFT) & GLB_REQ_SLEEP_MASK)) #define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20 #define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT) #define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \ (((reg_val)&GLB_REQ_INACTIVE_COMPUTE_MASK) >> GLB_REQ_INACTIVE_COMPUTE_SHIFT) -#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) | \ +#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) | \ (((value) << GLB_REQ_INACTIVE_COMPUTE_SHIFT) & GLB_REQ_INACTIVE_COMPUTE_MASK)) #define GLB_REQ_INACTIVE_FRAGMENT_SHIFT 21 #define GLB_REQ_INACTIVE_FRAGMENT_MASK (0x1 << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) #define GLB_REQ_INACTIVE_FRAGMENT_GET(reg_val) \ (((reg_val)&GLB_REQ_INACTIVE_FRAGMENT_MASK) >> GLB_REQ_INACTIVE_FRAGMENT_SHIFT) -#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) | \ +#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) | \ (((value) << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) & GLB_REQ_INACTIVE_FRAGMENT_MASK)) #define GLB_REQ_INACTIVE_TILER_SHIFT 22 #define GLB_REQ_INACTIVE_TILER_MASK (0x1 << GLB_REQ_INACTIVE_TILER_SHIFT) -#define GLB_REQ_INACTIVE_TILER_GET(reg_val) (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT) -#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) | \ +#define GLB_REQ_INACTIVE_TILER_GET(reg_val) \ + (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT) +#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) | \ (((value) << GLB_REQ_INACTIVE_TILER_SHIFT) & GLB_REQ_INACTIVE_TILER_MASK)) #define GLB_REQ_PROTM_EXIT_SHIFT 23 #define GLB_REQ_PROTM_EXIT_MASK (0x1 << GLB_REQ_PROTM_EXIT_SHIFT) -#define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT) -#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK)) +#define GLB_REQ_PROTM_EXIT_GET(reg_val) \ + (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT) +#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | \ + (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK)) #define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24 #define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) #define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \ - (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> \ - GLB_REQ_PRFCNT_THRESHOLD_SHIFT) -#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \ + (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> GLB_REQ_PRFCNT_THRESHOLD_SHIFT) +#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \ (((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \ - (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & \ - GLB_REQ_PRFCNT_THRESHOLD_MASK)) + (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & GLB_REQ_PRFCNT_THRESHOLD_MASK)) #define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25 #define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) #define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) \ - (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> \ - GLB_REQ_PRFCNT_OVERFLOW_SHIFT) -#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ + (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> GLB_REQ_PRFCNT_OVERFLOW_SHIFT) +#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ - (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & \ - GLB_REQ_PRFCNT_OVERFLOW_MASK)) + (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & GLB_REQ_PRFCNT_OVERFLOW_MASK)) #define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 #define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) -#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT) -#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) | \ +#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) \ + (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT) +#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) | \ (((value) << GLB_REQ_DEBUG_CSF_REQ_SHIFT) & GLB_REQ_DEBUG_CSF_REQ_MASK)) #define GLB_REQ_DEBUG_HOST_REQ_SHIFT 31 #define GLB_REQ_DEBUG_HOST_REQ_MASK (0x1 << GLB_REQ_DEBUG_HOST_REQ_SHIFT) -#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT) -#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) | \ +#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) \ + (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT) +#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) | \ (((value) << GLB_REQ_DEBUG_HOST_REQ_SHIFT) & GLB_REQ_DEBUG_HOST_REQ_MASK)) /* GLB_ACK_IRQ_MASK register */ #define GLB_ACK_IRQ_MASK_HALT_SHIFT 0 #define GLB_ACK_IRQ_MASK_HALT_MASK (0x1 << GLB_ACK_IRQ_MASK_HALT_SHIFT) -#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT) -#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) | \ +#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT) +#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) | \ (((value) << GLB_ACK_IRQ_MASK_HALT_SHIFT) & GLB_ACK_IRQ_MASK_HALT_MASK)) #define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT 1 #define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK)) +#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> \ + GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & \ + GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK)) #define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT 2 #define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) #define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) >> GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) | \ +#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) | \ (((value) << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK)) #define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT 3 #define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) -#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK)) +#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> \ + GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & \ + GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK)) #define GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT 4 #define GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) #define GLB_ACK_IRQ_MASK_PROTM_ENTER_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) >> GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) -#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) | \ +#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) | \ (((value) << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK)) #define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT 5 #define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) #define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK)) +#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & \ + GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK)) #define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT 6 #define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) #define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK)) +#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & \ + GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK)) #define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT 7 #define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) #define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) -#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK)) +#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & \ + GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK)) #define GLB_ACK_IRQ_MASK_PING_SHIFT 8 #define GLB_ACK_IRQ_MASK_PING_MASK (0x1 << GLB_ACK_IRQ_MASK_PING_SHIFT) -#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT) -#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) | \ +#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT) +#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) | \ (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK)) #define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9 -#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK \ +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK \ (0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) -#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >> \ +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >> \ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) -#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK)) #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20 #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK)) +#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> \ + GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & \ + GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK)) #define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT 21 #define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK)) +#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> \ + GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & \ + GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK)) #define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT 22 #define GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) #define GLB_ACK_IRQ_MASK_INACTIVE_TILER_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) -#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK)) +#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & \ + GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK)) #define GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT 23 #define GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) #define GLB_ACK_IRQ_MASK_PROTM_EXIT_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) >> GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) -#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) | \ +#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) | \ (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK)) #define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24 -#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK \ - (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \ +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> \ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \ +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & \ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK)) #define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25 -#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK \ - (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \ +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> \ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) -#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \ +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & \ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK)) @@ -1441,24 +1573,26 @@ #define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) #define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) -#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK)) +#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & \ + GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK)) #define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT 31 #define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) #define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_GET(reg_val) \ (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) -#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK)) +#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & \ + GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK)) /* GLB_PROGRESS_TIMER register */ #define GLB_PROGRESS_TIMER_TIMEOUT_SHIFT 0 #define GLB_PROGRESS_TIMER_TIMEOUT_MASK (0xFFFFFFFF << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) #define GLB_PROGRESS_TIMER_TIMEOUT_GET(reg_val) \ (((reg_val)&GLB_PROGRESS_TIMER_TIMEOUT_MASK) >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) -#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \ - (((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) | \ +#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \ + (((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) | \ (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK)) /* GLB_PWROFF_TIMER register */ @@ -1466,27 +1600,43 @@ #define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) #define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \ (((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT) -#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \ - (((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) | \ +#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \ + (((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) | \ (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK)) #define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31 #define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) #define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \ (((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) -#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \ - (((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) | \ +#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \ + (((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) | \ (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK)) /* GLB_PWROFF_TIMER_TIMER_SOURCE values */ #define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 #define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 /* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */ +/* GLB_PWROFF_TIMER_CONFIG register */ +#ifndef GLB_PWROFF_TIMER_CONFIG +#define GLB_PWROFF_TIMER_CONFIG 0x0088 /* () Configuration fields for GLB_PWROFF_TIMER */ +#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT 0 +#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK (0x1 << GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT) +#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_GET(reg_val) \ + (((reg_val)&GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK) >> \ + GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT) +#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SET(reg_val, value) \ + (((reg_val) & ~GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK) | \ + (((value) << GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \ + GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK)) +#endif /* End of GLB_PWROFF_TIMER_CONFIG values */ + /* GLB_ALLOC_EN register */ #define GLB_ALLOC_EN_MASK_SHIFT 0 #define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT) -#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT) -#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \ - (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK)) +#define GLB_ALLOC_EN_MASK_GET(reg_val) \ + (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT) +#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \ + (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | \ + (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK)) /* GLB_OUTPUT_BLOCK register set definitions */ @@ -1495,14 +1645,16 @@ #define GLB_ACK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) #define GLB_ACK_CFG_PROGRESS_TIMER_GET(reg_val) \ (((reg_val)&GLB_ACK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) -#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) | \ +#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) | \ (((value) << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_CFG_PROGRESS_TIMER_MASK)) #define GLB_ACK_CFG_ALLOC_EN_SHIFT 2 #define GLB_ACK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_CFG_ALLOC_EN_SHIFT) -#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT) -#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK)) +#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) \ + (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT) +#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | \ + (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK)) /* End of GLB_OUTPUT_BLOCK register set definitions */ /* The following register and fields are for headers before 10.x.7/11.x.4 */ @@ -1530,37 +1682,57 @@ /* GLB_IDLE_TIMER register */ #define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0) #define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) -#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT) -#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \ - (((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) | \ +#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) \ + (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT) +#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \ + (((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) | \ (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK)) #define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31) #define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) #define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \ (((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) -#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \ - (((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) | \ +#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \ + (((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) | \ (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK)) /* GLB_IDLE_TIMER_TIMER_SOURCE values */ #define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 #define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 /* End of GLB_IDLE_TIMER_TIMER_SOURCE values */ +/* GLB_IDLE_TIMER_CONFIG values */ +#ifndef GLB_IDLE_TIMER_CONFIG +#define GLB_IDLE_TIMER_CONFIG 0x0084 /* () Configuration fields for GLB_IDLE_TIMER */ +#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT 0 +#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK (0x1 << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT) +#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_GET(reg_val) \ + (((reg_val)&GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK) >> \ + GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT) +#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SET(reg_val, value) \ + (((reg_val) & ~GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK) | \ + (((value) << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \ + GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK)) +#endif /* End of GLB_IDLE_TIMER_CONFIG values */ + /* GLB_INSTR_FEATURES register */ #define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT (0) -#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK ((u32)0xF << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) -#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(reg_val) \ - (((reg_val)&GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) >> GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) -#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SET(reg_val, value) \ - (((reg_val) & ~GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) | \ - (((value) << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) & GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK)) +#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK \ + ((u32)0xF << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) +#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(reg_val) \ + (((reg_val)&GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) >> \ + GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) +#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SET(reg_val, value) \ + (((reg_val) & ~GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) | \ + (((value) << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) & \ + GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK)) #define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT (4) #define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK ((u32)0xF << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) -#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(reg_val) \ - (((reg_val)&GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) >> GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) -#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SET(reg_val, value) \ - (((reg_val) & ~GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) | \ - (((value) << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) & GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK)) +#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(reg_val) \ + (((reg_val)&GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) >> \ + GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) +#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SET(reg_val, value) \ + (((reg_val) & ~GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) | \ + (((value) << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) & \ + GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK)) #define CSG_STATUS_STATE (0x0018) /* CSG state status register */ /* CSG_STATUS_STATE register */ @@ -1568,48 +1740,45 @@ #define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT) #define CSG_STATUS_STATE_IDLE_GET(reg_val) \ (((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT) -#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \ - (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \ - (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK)) +#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \ + (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK)) /* GLB_FEATURES_ITER_TRACE_SUPPORTED register */ #define GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT GPU_U(4) -#define GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK \ +#define GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK \ (GPU_U(0x1) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) -#define GLB_FEATURES_ITER_TRACE_SUPPORTED_GET(reg_val) \ - (((reg_val)&GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) >> \ +#define GLB_FEATURES_ITER_TRACE_SUPPORTED_GET(reg_val) \ + (((reg_val)&GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) >> \ GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) -#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SET(reg_val, value) \ - (((reg_val) & ~GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) | \ - (((value) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) & \ +#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SET(reg_val, value) \ + (((reg_val) & ~GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) | \ + (((value) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) & \ GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK)) /* GLB_REQ_ITER_TRACE_ENABLE register */ #define GLB_REQ_ITER_TRACE_ENABLE_SHIFT GPU_U(11) -#define GLB_REQ_ITER_TRACE_ENABLE_MASK \ - (GPU_U(0x1) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) -#define GLB_REQ_ITER_TRACE_ENABLE_GET(reg_val) \ - (((reg_val)&GLB_REQ_ITER_TRACE_ENABLE_MASK) >> \ - GLB_REQ_ITER_TRACE_ENABLE_SHIFT) -#define GLB_REQ_ITER_TRACE_ENABLE_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_ITER_TRACE_ENABLE_MASK) | \ - (((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) & \ - GLB_REQ_ITER_TRACE_ENABLE_MASK)) +#define GLB_REQ_ITER_TRACE_ENABLE_MASK (GPU_U(0x1) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) +#define GLB_REQ_ITER_TRACE_ENABLE_GET(reg_val) \ + (((reg_val)&GLB_REQ_ITER_TRACE_ENABLE_MASK) >> GLB_REQ_ITER_TRACE_ENABLE_SHIFT) +#define GLB_REQ_ITER_TRACE_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_ITER_TRACE_ENABLE_MASK) | \ + (((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) & GLB_REQ_ITER_TRACE_ENABLE_MASK)) /* GLB_PRFCNT_CONFIG register */ #define GLB_PRFCNT_CONFIG_SIZE_SHIFT (0) #define GLB_PRFCNT_CONFIG_SIZE_MASK (0xFF << GLB_PRFCNT_CONFIG_SIZE_SHIFT) -#define GLB_PRFCNT_CONFIG_SIZE_GET(reg_val) \ +#define GLB_PRFCNT_CONFIG_SIZE_GET(reg_val) \ (((reg_val)&GLB_PRFCNT_CONFIG_SIZE_MASK) >> GLB_PRFCNT_CONFIG_SIZE_SHIFT) -#define GLB_PRFCNT_CONFIG_SIZE_SET(reg_val, value) \ - (((reg_val) & ~GLB_PRFCNT_CONFIG_SIZE_MASK) | \ +#define GLB_PRFCNT_CONFIG_SIZE_SET(reg_val, value) \ + (((reg_val) & ~GLB_PRFCNT_CONFIG_SIZE_MASK) | \ (((value) << GLB_PRFCNT_CONFIG_SIZE_SHIFT) & GLB_PRFCNT_CONFIG_SIZE_MASK)) #define GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT GPU_U(8) #define GLB_PRFCNT_CONFIG_SET_SELECT_MASK (GPU_U(0x3) << GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) -#define GLB_PRFCNT_CONFIG_SET_SELECT_GET(reg_val) \ +#define GLB_PRFCNT_CONFIG_SET_SELECT_GET(reg_val) \ (((reg_val)&GLB_PRFCNT_CONFIG_SET_SELECT_MASK) >> GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) -#define GLB_PRFCNT_CONFIG_SET_SELECT_SET(reg_val, value) \ - (((reg_val) & ~GLB_PRFCNT_CONFIG_SET_SELECT_MASK) | \ +#define GLB_PRFCNT_CONFIG_SET_SELECT_SET(reg_val, value) \ + (((reg_val) & ~GLB_PRFCNT_CONFIG_SET_SELECT_MASK) | \ (((value) << GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) & GLB_PRFCNT_CONFIG_SET_SELECT_MASK)) /* GLB_PRFCNT_SIZE register */ @@ -1617,57 +1786,57 @@ #define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(value) ((value) << 8) #define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT GPU_U(0) #define GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) -#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(reg_val) \ - (GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) >> \ +#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(reg_val) \ + (GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) >> \ GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT)) -#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(reg_val, value) \ - (((reg_val) & ~GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) | \ - ((GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) & \ +#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(reg_val, value) \ + (((reg_val) & ~GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) | \ + ((GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) & \ GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK)) #define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) ((value) >> 8) #define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(value) ((value) << 8) #define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT GPU_U(16) #define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) -#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(reg_val) \ - (GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) >> \ +#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(reg_val) \ + (GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) >> \ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT)) -#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET(reg_val, value) \ - (((reg_val) & ~GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) | \ - ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \ +#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET(reg_val, value) \ + (((reg_val) & ~GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) | \ + ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK)) /* GLB_DEBUG_REQ register */ #define GLB_DEBUG_REQ_DEBUG_RUN_SHIFT GPU_U(23) #define GLB_DEBUG_REQ_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) -#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val) \ +#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val) \ (((reg_val)&GLB_DEBUG_REQ_DEBUG_RUN_MASK) >> GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) -#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value) \ - (((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) | \ +#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) | \ (((value) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) & GLB_DEBUG_REQ_DEBUG_RUN_MASK)) #define GLB_DEBUG_REQ_RUN_MODE_SHIFT GPU_U(24) #define GLB_DEBUG_REQ_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) -#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val) \ +#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val) \ (((reg_val)&GLB_DEBUG_REQ_RUN_MODE_MASK) >> GLB_DEBUG_REQ_RUN_MODE_SHIFT) -#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value) \ - (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \ +#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \ (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK)) /* GLB_DEBUG_ACK register */ #define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23) #define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) -#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val) \ +#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val) \ (((reg_val)&GLB_DEBUG_ACK_DEBUG_RUN_MASK) >> GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) -#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value) \ - (((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) | \ +#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) | \ (((value) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) & GLB_DEBUG_ACK_DEBUG_RUN_MASK)) #define GLB_DEBUG_ACK_RUN_MODE_SHIFT GPU_U(24) #define GLB_DEBUG_ACK_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) -#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val) \ +#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val) \ (((reg_val)&GLB_DEBUG_ACK_RUN_MODE_MASK) >> GLB_DEBUG_ACK_RUN_MODE_SHIFT) -#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value) \ - (((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \ +#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value) \ + (((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \ (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK)) /* RUN_MODE values */ @@ -1675,4 +1844,5 @@ #define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1 /* End of RUN_MODE values */ + #endif /* _KBASE_CSF_REGISTERS_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c index fe3b91a4845d..c18ed5b9f6cc 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,21 +38,18 @@ enum kbasep_soft_reset_status { MCU_REINIT_FAILED }; -static inline bool -kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state) +static inline bool kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state) { return (state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT); } -static inline bool -kbase_csf_reset_state_is_committed(enum kbase_csf_reset_gpu_state state) +static inline bool kbase_csf_reset_state_is_committed(enum kbase_csf_reset_gpu_state state) { return (state == KBASE_CSF_RESET_GPU_COMMITTED || state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT); } -static inline bool -kbase_csf_reset_state_is_active(enum kbase_csf_reset_gpu_state state) +static inline bool kbase_csf_reset_state_is_active(enum kbase_csf_reset_gpu_state state) { return (state == KBASE_CSF_RESET_GPU_HAPPENING); } @@ -100,8 +97,7 @@ int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) { down_read(&kbdev->csf.reset.sem); - if (atomic_read(&kbdev->csf.reset.state) == - KBASE_CSF_RESET_GPU_FAILED) { + if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED) { up_read(&kbdev->csf.reset.sem); return -ENOMEM; } @@ -120,8 +116,7 @@ int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) if (!down_read_trylock(&kbdev->csf.reset.sem)) return -EAGAIN; - if (atomic_read(&kbdev->csf.reset.state) == - KBASE_CSF_RESET_GPU_FAILED) { + if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED) { up_read(&kbdev->csf.reset.sem); return -ENOMEM; } @@ -166,9 +161,8 @@ void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) /* Mark the reset as now happening, and synchronize with other threads that * might be trying to access the GPU */ -static void kbase_csf_reset_begin_hw_access_sync( - struct kbase_device *kbdev, - enum kbase_csf_reset_gpu_state initial_reset_state) +static void kbase_csf_reset_begin_hw_access_sync(struct kbase_device *kbdev, + enum kbase_csf_reset_gpu_state initial_reset_state) { unsigned long hwaccess_lock_flags; unsigned long scheduler_spin_lock_flags; @@ -185,7 +179,7 @@ static void kbase_csf_reset_begin_hw_access_sync( */ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags); kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); - atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING); + atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_HAPPENING); kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags); } @@ -193,15 +187,13 @@ static void kbase_csf_reset_begin_hw_access_sync( /* Mark the reset as finished and allow others threads to once more access the * GPU */ -static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, - int err_during_reset, +static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, int err_during_reset, bool firmware_inited) { unsigned long hwaccess_lock_flags; unsigned long scheduler_spin_lock_flags; - WARN_ON(!kbase_csf_reset_state_is_active( - atomic_read(&kbdev->csf.reset.state))); + WARN_ON(!kbase_csf_reset_state_is_active(atomic_read(&kbdev->csf.reset.state))); /* Once again, we synchronize with atomic context threads accessing the * HW, as otherwise any actions they defer could get lost @@ -210,8 +202,7 @@ static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); if (!err_during_reset) { - atomic_set(&kbdev->csf.reset.state, - KBASE_CSF_RESET_GPU_NOT_PENDING); + atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_NOT_PENDING); } else { dev_err(kbdev->dev, "Reset failed to complete"); atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_FAILED); @@ -236,25 +227,26 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) kbase_io_history_dump(kbdev); dev_err(kbdev->dev, "Register state:"); - dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x MCU_STATUS=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); - dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); - dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); - dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); - dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x TILER_CONFIG=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG))); + dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x MCU_STATUS=0x%08x", + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_STATUS))); + dev_err(kbdev->dev, + " JOB_IRQ_RAWSTAT=0x%08x MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", + kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT)), + kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_RAWSTAT)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_FAULTSTATUS))); + dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)), + kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK)), + kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK))); + dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(PWR_OVERRIDE0)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(PWR_OVERRIDE1))); + dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x TILER_CONFIG=0x%08x", + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG))); } /** @@ -293,8 +285,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic spin_lock(&kbdev->mmu_mask_change); kbase_pm_reset_start_locked(kbdev); - dev_dbg(kbdev->dev, - "We're about to flush out the IRQs and their bottom halves\n"); + dev_dbg(kbdev->dev, "We're about to flush out the IRQs and their bottom halves\n"); kbdev->irq_reset_flush = true; /* Disable IRQ to avoid IRQ handlers to kick in after releasing the @@ -312,13 +303,11 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic dev_dbg(kbdev->dev, "Flush out any in-flight work items\n"); kbase_flush_mmu_wqs(kbdev); - dev_dbg(kbdev->dev, - "The flush has completed so reset the active indicator\n"); + dev_dbg(kbdev->dev, "The flush has completed so reset the active indicator\n"); kbdev->irq_reset_flush = false; if (!silent) - dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", - RESET_TIMEOUT); + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT); /* Output the state of some interesting registers to help in the * debugging of GPU resets, and dump the firmware trace buffer @@ -329,9 +318,11 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic kbase_csf_firmware_log_dump_buffer(kbdev); } - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_ipa_control_handle_gpu_reset_pre(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_ipa_control_handle_gpu_reset_pre(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } /* Tell hardware counters a reset is about to occur. * If the backend is in an unrecoverable error state (e.g. due to @@ -352,7 +343,9 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_ctx_sched_restore_all_as(kbdev); - kbase_ipa_control_handle_gpu_reset_post(kbdev); + { + kbase_ipa_control_handle_gpu_reset_post(kbdev); + } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); @@ -365,10 +358,12 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic mutex_unlock(&kbdev->pm.lock); if (err) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (!kbase_pm_l2_is_in_desired_state(kbdev)) ret = L2_ON_FAILED; else if (!kbase_pm_mcu_is_in_desired_state(kbdev)) ret = MCU_REINIT_FAILED; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } return ret; @@ -440,16 +435,14 @@ err: static void kbase_csf_reset_gpu_worker(struct work_struct *data) { - struct kbase_device *kbdev = container_of(data, struct kbase_device, - csf.reset.work); + struct kbase_device *kbdev = container_of(data, struct kbase_device, csf.reset.work); bool gpu_sleep_mode_active = false; bool firmware_inited; unsigned long flags; int err = 0; const enum kbase_csf_reset_gpu_state initial_reset_state = atomic_read(&kbdev->csf.reset.state); - const bool silent = - kbase_csf_reset_state_is_silent(initial_reset_state); + const bool silent = kbase_csf_reset_state_is_silent(initial_reset_state); /* Ensure any threads (e.g. executing the CSF scheduler) have finished * using the HW @@ -479,8 +472,8 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data) err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent); #endif - } else if (!kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + } else if (!kbase_pm_context_active_handle_suspend( + kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent); kbase_pm_context_idle(kbdev); } @@ -494,22 +487,22 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data) bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) { if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) - kbase_hwcnt_backend_csf_on_unrecoverable_error( - &kbdev->hwcnt_gpu_iface); + kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); - if (atomic_cmpxchg(&kbdev->csf.reset.state, - KBASE_CSF_RESET_GPU_NOT_PENDING, - KBASE_CSF_RESET_GPU_PREPARED) != - KBASE_CSF_RESET_GPU_NOT_PENDING) + if (atomic_cmpxchg(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_NOT_PENDING, + KBASE_CSF_RESET_GPU_PREPARED) != KBASE_CSF_RESET_GPU_NOT_PENDING) /* Some other thread is already resetting the GPU */ return false; + /* Issue the wake up of threads waiting for PM state transition. + * They might want to exit the wait since GPU reset has been triggered. + */ + wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); return true; } KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, - unsigned int flags) +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, unsigned int flags) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -521,8 +514,7 @@ void kbase_reset_gpu(struct kbase_device *kbdev) /* Note this is a WARN/atomic_set because it is a software issue for * a race to be occurring here */ - if (WARN_ON(atomic_read(&kbdev->csf.reset.state) != - KBASE_RESET_GPU_PREPARED)) + if (WARN_ON(atomic_read(&kbdev->csf.reset.state) != KBASE_RESET_GPU_PREPARED)) return; atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_COMMITTED); @@ -543,10 +535,9 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) int kbase_reset_gpu_silent(struct kbase_device *kbdev) { - if (atomic_cmpxchg(&kbdev->csf.reset.state, - KBASE_CSF_RESET_GPU_NOT_PENDING, - KBASE_CSF_RESET_GPU_COMMITTED_SILENT) != - KBASE_CSF_RESET_GPU_NOT_PENDING) { + if (atomic_cmpxchg(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_NOT_PENDING, + KBASE_CSF_RESET_GPU_COMMITTED_SILENT) != + KBASE_CSF_RESET_GPU_NOT_PENDING) { /* Some other thread is already resetting the GPU */ return -EAGAIN; } @@ -561,8 +552,7 @@ KBASE_EXPORT_TEST_API(kbase_reset_gpu_silent); bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) { - enum kbase_csf_reset_gpu_state reset_state = - atomic_read(&kbdev->csf.reset.state); + enum kbase_csf_reset_gpu_state reset_state = atomic_read(&kbdev->csf.reset.state); /* For CSF, the reset is considered active only when the reset worker * is actually executing and other threads would have to wait for it to @@ -597,10 +587,8 @@ int kbase_reset_gpu_wait(struct kbase_device *kbdev) remaining = wait_event_timeout( kbdev->csf.reset.wait, - (atomic_read(&kbdev->csf.reset.state) == - KBASE_CSF_RESET_GPU_NOT_PENDING) || - (atomic_read(&kbdev->csf.reset.state) == - KBASE_CSF_RESET_GPU_FAILED), + (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_NOT_PENDING) || + (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED), wait_timeout); if (!remaining) { @@ -608,8 +596,7 @@ int kbase_reset_gpu_wait(struct kbase_device *kbdev) return -ETIMEDOUT; - } else if (atomic_read(&kbdev->csf.reset.state) == - KBASE_CSF_RESET_GPU_FAILED) { + } else if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED) { return -ENOMEM; } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c index edaa6d17e304..a477ee666838 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c @@ -19,6 +19,8 @@ * */ +#include + #include #include "mali_kbase_config_defaults.h" #include @@ -33,6 +35,11 @@ #include #include "mali_kbase_csf_tiler_heap_reclaim.h" #include "mali_kbase_csf_mcu_shared_reg.h" +#include +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include +#include +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -58,30 +65,338 @@ /* Time to wait for completion of PING req before considering MCU as hung */ #define FW_PING_AFTER_ERROR_TIMEOUT_MS (10) +/* Time to wait for completion of PING request before considering MCU as hung + * when GPU reset is triggered during protected mode. + */ +#define FW_PING_ON_GPU_RESET_IN_PMODE_TIMEOUT_MS (500) + + /* Explicitly defining this blocked_reason code as SB_WAIT for clarity */ #define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT static int scheduler_group_schedule(struct kbase_queue_group *group); static void remove_group_from_idle_wait(struct kbase_queue_group *const group); -static -void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, - struct kbase_queue_group *const group, - enum kbase_csf_group_state run_state); -static struct kbase_queue_group *scheduler_get_protm_enter_async_group( - struct kbase_device *const kbdev, - struct kbase_queue_group *const group); -static struct kbase_queue_group *get_tock_top_group( - struct kbase_csf_scheduler *const scheduler); +static void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, + struct kbase_queue_group *const group, + enum kbase_csf_group_state run_state); +static struct kbase_queue_group * +scheduler_get_protm_enter_async_group(struct kbase_device *const kbdev, + struct kbase_queue_group *const group); +static struct kbase_queue_group *get_tock_top_group(struct kbase_csf_scheduler *const scheduler); static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); -static int suspend_active_queue_groups(struct kbase_device *kbdev, - unsigned long *slot_mask); -static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, - bool system_suspend); +static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask); +static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool system_suspend); static void schedule_in_cycle(struct kbase_queue_group *group, bool force); static bool queue_group_scheduled_locked(struct kbase_queue_group *group); #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +/** + * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists, + * otherwise allocate and initialise one. + * + * @kctx: Pointer to the Kbase context. + * + * The GPU metrics context represents an "Application" for the purposes of GPU metrics + * reporting. There may be multiple kbase_contexts contributing data to a single GPU + * metrics context. + * This function takes a reference on GPU metrics context if it already exists + * corresponding to the Application that is creating the Kbase context, otherwise + * memory is allocated for it and initialised. + * + * Return: 0 on success, or negative on failure. + */ +static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) +{ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; + struct kbase_device *kbdev = kctx->kbdev; + int ret = 0; + + const struct cred *cred = get_current_cred(); + const unsigned int aid = cred->euid.val; + + put_cred(cred); + + /* Return early if this is not a Userspace created context */ + if (unlikely(!kctx->kfile)) + return 0; + + /* Serialize against the other threads trying to create/destroy Kbase contexts. */ + mutex_lock(&kbdev->kctx_list_lock); + spin_lock_bh(&kbdev->csf.scheduler.gpu_metrics_lock); + gpu_metrics_ctx = kbase_gpu_metrics_ctx_get(kbdev, aid); + spin_unlock_bh(&kbdev->csf.scheduler.gpu_metrics_lock); + + if (!gpu_metrics_ctx) { + gpu_metrics_ctx = kmalloc(sizeof(*gpu_metrics_ctx), GFP_KERNEL); + + if (gpu_metrics_ctx) { + spin_lock_bh(&kbdev->csf.scheduler.gpu_metrics_lock); + kbase_gpu_metrics_ctx_init(kbdev, gpu_metrics_ctx, aid); + spin_unlock_bh(&kbdev->csf.scheduler.gpu_metrics_lock); + } else { + dev_err(kbdev->dev, "Allocation for gpu_metrics_ctx failed"); + ret = -ENOMEM; + } + } + + kctx->gpu_metrics_ctx = gpu_metrics_ctx; + mutex_unlock(&kbdev->kctx_list_lock); + + return ret; +} + +/** + * gpu_metrics_ctx_term() - Drop a reference on a GPU metrics context and free it + * if the refcount becomes 0. + * + * @kctx: Pointer to the Kbase context. + */ +static inline void gpu_metrics_ctx_term(struct kbase_context *kctx) +{ + /* Return early if this is not a Userspace created context */ + if (unlikely(!kctx->kfile)) + return; + + /* Serialize against the other threads trying to create/destroy Kbase contexts. */ + mutex_lock(&kctx->kbdev->kctx_list_lock); + spin_lock_bh(&kctx->kbdev->csf.scheduler.gpu_metrics_lock); + kbase_gpu_metrics_ctx_put(kctx->kbdev, kctx->gpu_metrics_ctx); + spin_unlock_bh(&kctx->kbdev->csf.scheduler.gpu_metrics_lock); + mutex_unlock(&kctx->kbdev->kctx_list_lock); +} + +/** + * struct gpu_metrics_event - A GPU metrics event recorded in trace buffer. + * + * @csg_slot_act: The 32bit data consisting of a GPU metrics event. + * 5 bits[4:0] represents CSG slot number. + * 1 bit [5] represents the transition of the CSG group on the slot. + * '1' means idle->active whilst '0' does active->idle. + * @timestamp: 64bit timestamp consisting of a GPU metrics event. + * + * Note: It's packed and word-aligned as agreed layout with firmware. + */ +struct gpu_metrics_event { + u32 csg_slot_act; + u64 timestamp; +} __packed __aligned(4); +#define GPU_METRICS_EVENT_SIZE sizeof(struct gpu_metrics_event) + +#define GPU_METRICS_ACT_SHIFT 5 +#define GPU_METRICS_ACT_MASK (0x1 << GPU_METRICS_ACT_SHIFT) +#define GPU_METRICS_ACT_GET(val) (((val)&GPU_METRICS_ACT_MASK) >> GPU_METRICS_ACT_SHIFT) + +#define GPU_METRICS_CSG_MASK 0x1f +#define GPU_METRICS_CSG_GET(val) ((val)&GPU_METRICS_CSG_MASK) + +/** + * gpu_metrics_read_event() - Read a GPU metrics trace from trace buffer + * + * @kbdev: Pointer to the device + * @kctx: Kcontext that is derived from CSG slot field of a GPU metrics. + * @prev_act: Previous CSG activity transition in a GPU metrics. + * @cur_act: Current CSG activity transition in a GPU metrics. + * @ts: CSG activity transition timestamp in a GPU metrics. + * + * This function reads firmware trace buffer, named 'gpu_metrics' and + * parse one 12-byte data packet into following information. + * - The number of CSG slot on which CSG was transitioned to active or idle. + * - Activity transition (1: idle->active, 0: active->idle). + * - Timestamp in nanoseconds when the transition occurred. + * + * Return: true on success. + */ +static bool gpu_metrics_read_event(struct kbase_device *kbdev, struct kbase_context **kctx, + bool *prev_act, bool *cur_act, uint64_t *ts) +{ + struct firmware_trace_buffer *tb = kbdev->csf.scheduler.gpu_metrics_tb; + struct gpu_metrics_event e; + + if (kbase_csf_firmware_trace_buffer_read_data(tb, (u8 *)&e, GPU_METRICS_EVENT_SIZE) == + GPU_METRICS_EVENT_SIZE) { + const u8 slot = GPU_METRICS_CSG_GET(e.csg_slot_act); + struct kbase_queue_group *group; + + if (WARN_ON_ONCE(slot >= kbdev->csf.global_iface.group_num)) { + dev_err(kbdev->dev, "invalid CSG slot (%u)", slot); + return false; + } + + group = kbdev->csf.scheduler.csg_slots[slot].resident_group; + + if (unlikely(!group)) { + dev_err(kbdev->dev, "failed to find CSG group from CSG slot (%u)", slot); + return false; + } + + *cur_act = GPU_METRICS_ACT_GET(e.csg_slot_act); + *ts = kbase_backend_time_convert_gpu_to_cpu(kbdev, e.timestamp); + *kctx = group->kctx; + + *prev_act = group->prev_act; + group->prev_act = *cur_act; + + return true; + } + + dev_err(kbdev->dev, "failed to read a GPU metrics from trace buffer"); + + return false; +} + +/** + * drain_gpu_metrics_trace_buffer() - Drain the "gpu_metrics" trace buffer + * + * @kbdev: Pointer to the device + * + * This function is called to drain the "gpu_metrics" trace buffer. As per the events + * read from trace buffer, the start and end of GPU activity for different GPU metrics + * context is reported to the frontend. + * + * Return: Timestamp in nanoseconds to be provided to the frontend for emitting the + * tracepoint. + */ +static u64 drain_gpu_metrics_trace_buffer(struct kbase_device *kbdev) +{ + u64 system_time = 0; + u64 ts_before_drain; + u64 ts = 0; + + lockdep_assert_held(&kbdev->csf.scheduler.gpu_metrics_lock); + + kbase_backend_get_gpu_time_norequest(kbdev, NULL, &system_time, NULL); + /* CPU time value that was used to derive the parameters for time conversion, + * was retrieved from ktime_get_raw_ts64(). But the tracing subsystem would use + * ktime_get_raw_fast_ns() to assign timestamp to the gpu metrics tracepoints + * if MONOTONIC_RAW clock source is used. This can potentially cause 'end_time_ns' + * value emitted for a tracepoint to be greater than the tracepoint emission time. + * As per the kernel doc ktime_get_raw_fast_ns() isn't supposed to be called by + * the drivers. So it would be used only if really needed. + */ + ts_before_drain = kbase_backend_time_convert_gpu_to_cpu(kbdev, system_time); + + while (!kbase_csf_firmware_trace_buffer_is_empty(kbdev->csf.scheduler.gpu_metrics_tb)) { + struct kbase_context *kctx; + bool prev_act; + bool cur_act; + + if (gpu_metrics_read_event(kbdev, &kctx, &prev_act, &cur_act, &ts)) { + if (prev_act == cur_act) { + /* Error handling + * + * In case of active CSG, Kbase will try to recover the + * lost event by ending previously active event and + * starting a new one. + * + * In case of inactive CSG, the event is drop as Kbase + * cannot recover. + */ + dev_err(kbdev->dev, + "Invalid activity state transition. (prev_act = %u, cur_act = %u)", + prev_act, cur_act); + if (cur_act) { + kbase_gpu_metrics_ctx_end_activity(kctx, ts); + kbase_gpu_metrics_ctx_start_activity(kctx, ts); + } + } else { + /* Normal handling */ + if (cur_act) + kbase_gpu_metrics_ctx_start_activity(kctx, ts); + else + kbase_gpu_metrics_ctx_end_activity(kctx, ts); + } + } else + break; + } + + return (ts >= ts_before_drain ? ts + 1 : ts_before_drain); +} + +/** + * emit_gpu_metrics_to_frontend() - Emit GPU metrics events to the frontend. + * + * @kbdev: Pointer to the device + * + * This function must be called only from the timer callback function to emit GPU metrics + * data to the frontend. + */ +static void emit_gpu_metrics_to_frontend(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + u64 ts; + +#ifdef CONFIG_MALI_BIFROST_DEBUG + WARN_ON_ONCE(!in_serving_softirq()); +#endif + +#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) + return; +#endif + + spin_lock(&scheduler->gpu_metrics_lock); + ts = drain_gpu_metrics_trace_buffer(kbdev); + kbase_gpu_metrics_emit_tracepoint(kbdev, ts); + spin_unlock(&scheduler->gpu_metrics_lock); +} + +/** + * emit_gpu_metrics_to_frontend_for_off_slot_group() - Emit GPU metrics events to the frontend + * after a group went off the CSG slot. + * + * @group: Pointer to the queue group that went off slot. + * + * This function must be called after the CSG suspend/terminate request has completed + * and before the mapping between the queue group and CSG slot is removed. + */ +static void emit_gpu_metrics_to_frontend_for_off_slot_group(struct kbase_queue_group *group) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + u64 ts; + + lockdep_assert_held(&scheduler->lock); + lockdep_assert_held(&scheduler->gpu_metrics_lock); + +#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) + return; +#endif + + ts = drain_gpu_metrics_trace_buffer(kbdev); + /* If the group is marked as active even after going off the slot, then it implies + * that the CSG suspend/terminate request didn't complete. + */ + if (unlikely(group->prev_act)) { + kbase_gpu_metrics_ctx_end_activity(group->kctx, ts); + group->prev_act = 0; + } + kbase_gpu_metrics_emit_tracepoint(kbdev, ts); +} + +/** + * gpu_metrics_timer_callback() - Callback function for the GPU metrics hrtimer + * + * @timer: Pointer to the GPU metrics hrtimer + * + * This function will emit power/gpu_work_period tracepoint for all the active + * GPU metrics contexts. The timer will be restarted if needed. + * + * Return: enum value to indicate that timer should not be restarted. + */ +static enum hrtimer_restart gpu_metrics_timer_callback(struct hrtimer *timer) +{ + struct kbase_device *kbdev = + container_of(timer, struct kbase_device, csf.scheduler.gpu_metrics_timer); + + emit_gpu_metrics_to_frontend(kbdev); + hrtimer_start(&kbdev->csf.scheduler.gpu_metrics_timer, + HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_tp_emit_interval()), + HRTIMER_MODE_REL_SOFT); + return HRTIMER_NORESTART; +} +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + /** * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and * scheduling tick/tock to complete before the group deschedule. @@ -106,8 +421,7 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group); * descheduled. To avoid the freeing in such a case, a sort of refcount is used * for the group which is incremented & decremented across the wait. */ -static -void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group) +static void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group) { #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_device *kbdev = group->kctx->kbdev; @@ -120,8 +434,7 @@ void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group) if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) return; - while ((!kbase_debug_csf_fault_dump_complete(kbdev) || - (scheduler->state == SCHED_BUSY)) && + while ((!kbase_debug_csf_fault_dump_complete(kbdev) || (scheduler->state == SCHED_BUSY)) && queue_group_scheduled_locked(group)) { group->deschedule_deferred_cnt++; mutex_unlock(&scheduler->lock); @@ -154,8 +467,8 @@ void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group) * won't change meanwhile, so no group can enter/exit the Scheduler, become * runnable or go off slot. */ -static void schedule_actions_trigger_df(struct kbase_device *kbdev, - struct kbase_context *kctx, enum dumpfault_error_type error) +static void schedule_actions_trigger_df(struct kbase_device *kbdev, struct kbase_context *kctx, + enum dumpfault_error_type error) { #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; @@ -221,10 +534,8 @@ static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev) while ((scheduler->state == SCHED_SLEEPING) && !ret) { mutex_unlock(&scheduler->lock); - remaining = wait_event_timeout( - kbdev->csf.event_wait, - (scheduler->state != SCHED_SLEEPING), - remaining); + remaining = wait_event_timeout(kbdev->csf.event_wait, + (scheduler->state != SCHED_SLEEPING), remaining); mutex_lock(&scheduler->lock); if (!remaining && (scheduler->state == SCHED_SLEEPING)) ret = -ETIMEDOUT; @@ -284,6 +595,9 @@ static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev) } scheduler->state = SCHED_SUSPENDED; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + hrtimer_cancel(&scheduler->gpu_metrics_timer); +#endif KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); return 0; @@ -304,80 +618,20 @@ out: * * @timer: Pointer to the scheduling tick hrtimer * - * This function will enqueue the scheduling tick work item for immediate - * execution, if it has not been queued already. + * This function will wake up kbase_csf_scheduler_kthread() to process a + * pending scheduling tick. It will be restarted manually once a tick has been + * processed if appropriate. * * Return: enum value to indicate that timer should not be restarted. */ static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) { - struct kbase_device *kbdev = container_of(timer, struct kbase_device, - csf.scheduler.tick_timer); - - kbase_csf_scheduler_tick_advance(kbdev); - return HRTIMER_NORESTART; -} - -/** - * start_tick_timer() - Start the scheduling tick hrtimer. - * - * @kbdev: Pointer to the device - * - * This function will start the scheduling tick hrtimer and is supposed to - * be called only from the tick work item function. The tick hrtimer should - * not be active already. - */ -static void start_tick_timer(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - unsigned long flags; - - lockdep_assert_held(&scheduler->lock); - - spin_lock_irqsave(&scheduler->interrupt_lock, flags); - WARN_ON(scheduler->tick_timer_active); - if (likely(!work_pending(&scheduler->tick_work))) { - scheduler->tick_timer_active = true; - - hrtimer_start(&scheduler->tick_timer, - HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), - HRTIMER_MODE_REL); - } - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -} - -/** - * cancel_tick_timer() - Cancel the scheduling tick hrtimer - * - * @kbdev: Pointer to the device - */ -static void cancel_tick_timer(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - unsigned long flags; - - spin_lock_irqsave(&scheduler->interrupt_lock, flags); - scheduler->tick_timer_active = false; - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - hrtimer_cancel(&scheduler->tick_timer); -} - -/** - * enqueue_tick_work() - Enqueue the scheduling tick work item - * - * @kbdev: Pointer to the device - * - * This function will queue the scheduling tick work item for immediate - * execution. This shall only be called when both the tick hrtimer and tick - * work item are not active/pending. - */ -static void enqueue_tick_work(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - - lockdep_assert_held(&scheduler->lock); + struct kbase_device *kbdev = + container_of(timer, struct kbase_device, csf.scheduler.tick_timer); kbase_csf_scheduler_invoke_tick(kbdev); + + return HRTIMER_NORESTART; } static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr) @@ -394,9 +648,8 @@ static int acquire_doorbell(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); - doorbell_nr = find_first_zero_bit( - kbdev->csf.scheduler.doorbell_inuse_bitmap, - CSF_NUM_DOORBELL); + doorbell_nr = + find_first_zero_bit(kbdev->csf.scheduler.doorbell_inuse_bitmap, CSF_NUM_DOORBELL); if (doorbell_nr >= CSF_NUM_DOORBELL) return KBASEP_USER_DB_NR_INVALID; @@ -407,7 +660,7 @@ static int acquire_doorbell(struct kbase_device *kbdev) } static void unassign_user_doorbell_from_group(struct kbase_device *kbdev, - struct kbase_queue_group *group) + struct kbase_queue_group *group) { lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -417,8 +670,7 @@ static void unassign_user_doorbell_from_group(struct kbase_device *kbdev, } } -static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev, - struct kbase_queue *queue) +static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev, struct kbase_queue *queue) { lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -428,14 +680,14 @@ static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev, queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; /* After this the dummy page would be mapped in */ unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping, - queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1); + queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1); } mutex_unlock(&kbdev->csf.reg_lock); } static void assign_user_doorbell_to_group(struct kbase_device *kbdev, - struct kbase_queue_group *group) + struct kbase_queue_group *group) { lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -444,7 +696,7 @@ static void assign_user_doorbell_to_group(struct kbase_device *kbdev, } static void assign_user_doorbell_to_queue(struct kbase_device *kbdev, - struct kbase_queue *const queue) + struct kbase_queue *const queue) { lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -461,10 +713,8 @@ static void assign_user_doorbell_to_queue(struct kbase_device *kbdev, queue->doorbell_nr = queue->group->doorbell_nr; /* After this the real Hw doorbell page would be mapped in */ - unmap_mapping_range( - kbdev->csf.db_filp->f_inode->i_mapping, - queue->db_file_offset << PAGE_SHIFT, - PAGE_SIZE, 1); + unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping, + queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1); } mutex_unlock(&kbdev->csf.reg_lock); @@ -474,8 +724,7 @@ static void scheduler_doorbell_init(struct kbase_device *kbdev) { int doorbell_nr; - bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap, - CSF_NUM_DOORBELL); + bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap, CSF_NUM_DOORBELL); mutex_lock(&kbdev->csf.scheduler.lock); /* Reserve doorbell 0 for use by kernel driver */ @@ -520,19 +769,17 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev) for (j = 0; j < max_streams; ++j) { struct kbase_queue *const queue = group->bound_queues[j]; - if (queue) { - if (queue->user_io_addr) { - u64 const *const output_addr = - (u64 const *)(queue->user_io_addr + PAGE_SIZE); + if (queue && queue->user_io_addr) { + u64 const *const output_addr = + (u64 const *)(queue->user_io_addr + + PAGE_SIZE / sizeof(u64)); - queue->extract_ofs = - output_addr[CS_EXTRACT_LO / sizeof(u64)]; - } else { - dev_warn(kbdev->dev, - "%s(): queue->user_io_addr is NULL, queue: %p", - __func__, - queue); - } + /* + * This 64-bit read will be atomic on a 64-bit kernel but may not + * be atomic on 32-bit kernels. Support for 32-bit kernels is + * limited to build-only. + */ + queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)]; } } } @@ -565,7 +812,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) * updated whilst gpu_idle_worker() is executing. */ scheduler->fast_gpu_idle_handling = - (kbdev->csf.gpu_idle_hysteresis_us == 0) || + (kbdev->csf.gpu_idle_hysteresis_ns == 0) || !kbase_csf_scheduler_all_csgs_idle(kbdev); /* The GPU idle worker relies on update_on_slot_queues_offsets() to have @@ -579,8 +826,8 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) update_on_slot_queues_offsets(kbdev); } } else { - /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */ - kbase_csf_scheduler_tick_advance_nolock(kbdev); + /* Invoke the scheduling tick to get the non-idle suspended groups loaded soon */ + kbase_csf_scheduler_invoke_tick(kbdev); } } @@ -591,7 +838,7 @@ u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap, - kbdev->csf.global_iface.group_num); + kbdev->csf.global_iface.group_num); return nr_active_csgs; } @@ -670,6 +917,14 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group) return queue_group_scheduled(group); } +static void update_idle_protm_group_state_to_runnable(struct kbase_queue_group *group) +{ + lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group, group->run_state); +} + /** * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode. * @@ -692,15 +947,14 @@ static bool scheduler_protm_wait_quit(struct kbase_device *kbdev) KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt)); remaining = wait_event_timeout(kbdev->csf.event_wait, - !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); + !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); if (unlikely(!remaining)) { struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp; struct kbase_context *kctx = group ? group->kctx : NULL; dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped", - kbase_backend_get_cycle_cnt(kbdev), - kbdev->csf.fw_timeout_ms); + kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms); schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT); success = false; } @@ -752,24 +1006,6 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev) kbase_reset_gpu(kbdev); } -/** - * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up - * automatically for periodic tasks. - * - * @kbdev: Pointer to the device - * - * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the - * CSF scheduler lock to already have been held. - * - * Return: true if the scheduler is configured to wake up periodically - */ -static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->csf.scheduler.lock); - - return kbdev->csf.scheduler.timer_enabled; -} - /** * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for * Scheduler @@ -784,7 +1020,7 @@ static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) * Return: 0 if successful or a negative error code on failure. */ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, - enum kbase_pm_suspend_handler suspend_handler) + enum kbase_pm_suspend_handler suspend_handler) { unsigned long flags; u32 prev_count; @@ -800,8 +1036,7 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, /* On 0 => 1, make a pm_ctx_active request */ if (!prev_count) { - ret = kbase_pm_context_active_handle_suspend(kbdev, - suspend_handler); + ret = kbase_pm_context_active_handle_suspend(kbdev, suspend_handler); /* Invoke the PM state machines again as the change in MCU * desired status, due to the update of scheduler.pm_active_count, * may be missed by the thread that called pm_wait_for_desired_state() @@ -837,8 +1072,7 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, * Return: zero when the PM reference was taken and non-zero when the * system is being suspending/suspended. */ -static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, - unsigned long *flags) +static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, unsigned long *flags) { u32 prev_count; int ret = 0; @@ -854,8 +1088,8 @@ static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, if (!prev_count) { spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags); - ret = kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); + ret = kbase_pm_context_active_handle_suspend( + kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); spin_lock_irqsave(&kbdev->hwaccess_lock, *flags); if (ret) @@ -949,21 +1183,23 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) lockdep_assert_held(&scheduler->lock); - if ((scheduler->state != SCHED_SUSPENDED) && - (scheduler->state != SCHED_SLEEPING)) + if ((scheduler->state != SCHED_SUSPENDED) && (scheduler->state != SCHED_SLEEPING)) return; if (scheduler->state == SCHED_SUSPENDED) { - dev_dbg(kbdev->dev, - "Re-activating the Scheduler after suspend"); + dev_dbg(kbdev->dev, "Re-activating the Scheduler after suspend"); ret = scheduler_pm_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + hrtimer_start(&scheduler->gpu_metrics_timer, + HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_tp_emit_interval()), + HRTIMER_MODE_REL_SOFT); +#endif } else { #ifdef KBASE_PM_RUNTIME unsigned long flags; - dev_dbg(kbdev->dev, - "Re-activating the Scheduler out of sleep"); + dev_dbg(kbdev->dev, "Re-activating the Scheduler out of sleep"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ret = scheduler_pm_active_after_sleep(kbdev, &flags); @@ -975,8 +1211,7 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) /* GPUCORE-29850 would add the handling for the case where * Scheduler could not be activated due to system suspend. */ - dev_info(kbdev->dev, - "Couldn't wakeup Scheduler due to system suspend"); + dev_info(kbdev->dev, "Couldn't wakeup Scheduler due to system suspend"); return; } @@ -997,6 +1232,9 @@ static void scheduler_suspend(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Suspending the Scheduler"); scheduler_pm_idle(kbdev); scheduler->state = SCHED_SUSPENDED; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + hrtimer_cancel(&scheduler->gpu_metrics_timer); +#endif KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); } } @@ -1016,16 +1254,14 @@ static void scheduler_suspend(struct kbase_device *kbdev) */ static void update_idle_suspended_group_state(struct kbase_queue_group *group) { - struct kbase_csf_scheduler *scheduler = - &group->kctx->kbdev->csf.scheduler; + struct kbase_csf_scheduler *scheduler = &group->kctx->kbdev->csf.scheduler; int new_val; lockdep_assert_held(&scheduler->lock); if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) { remove_group_from_idle_wait(group); - insert_group_to_runnable(scheduler, group, - KBASE_CSF_GROUP_SUSPENDED); + insert_group_to_runnable(scheduler, group, KBASE_CSF_GROUP_SUSPENDED); } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) { group->run_state = KBASE_CSF_GROUP_SUSPENDED; KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group, @@ -1038,23 +1274,17 @@ static void update_idle_suspended_group_state(struct kbase_queue_group *group) */ if (scheduler->state != SCHED_SUSPENDED) { unsigned long flags; - int n_idle; - int n_used; - int n_slots = - group->kctx->kbdev->csf.global_iface.group_num; + unsigned int n_idle; + unsigned int n_used; + unsigned int n_slots = group->kctx->kbdev->csf.global_iface.group_num; spin_lock_irqsave(&scheduler->interrupt_lock, flags); - n_idle = bitmap_weight(scheduler->csg_slots_idle_mask, - n_slots); - n_used = bitmap_weight(scheduler->csg_inuse_bitmap, - n_slots); - spin_unlock_irqrestore(&scheduler->interrupt_lock, - flags); + n_idle = bitmap_weight(scheduler->csg_slots_idle_mask, n_slots); + n_used = bitmap_weight(scheduler->csg_inuse_bitmap, n_slots); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - if (n_idle || - n_used < scheduler->num_csg_slots_for_tick || - group->scan_seq_num < - scheduler->num_csg_slots_for_tick) + if (n_idle || n_used < scheduler->num_csg_slots_for_tick || + group->scan_seq_num < scheduler->num_csg_slots_for_tick) schedule_in_cycle(group, true); } } else @@ -1067,15 +1297,13 @@ static void update_idle_suspended_group_state(struct kbase_queue_group *group) int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group) { - struct kbase_csf_scheduler *scheduler = - &group->kctx->kbdev->csf.scheduler; + struct kbase_csf_scheduler *scheduler = &group->kctx->kbdev->csf.scheduler; int slot_num = group->csg_nr; lockdep_assert_held(&scheduler->interrupt_lock); if (slot_num >= 0) { - if (WARN_ON(scheduler->csg_slots[slot_num].resident_group != - group)) + if (WARN_ON(scheduler->csg_slots[slot_num].resident_group != group)) return -1; } @@ -1084,8 +1312,7 @@ int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group) int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group) { - struct kbase_csf_scheduler *scheduler = - &group->kctx->kbdev->csf.scheduler; + struct kbase_csf_scheduler *scheduler = &group->kctx->kbdev->csf.scheduler; unsigned long flags; int slot_num; @@ -1104,18 +1331,15 @@ int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group) * * Return: true if @group is on slot. */ -static bool kbasep_csf_scheduler_group_is_on_slot_locked( - struct kbase_queue_group *group) +static bool kbasep_csf_scheduler_group_is_on_slot_locked(struct kbase_queue_group *group) { - struct kbase_csf_scheduler *scheduler = - &group->kctx->kbdev->csf.scheduler; + struct kbase_csf_scheduler *scheduler = &group->kctx->kbdev->csf.scheduler; int slot_num = group->csg_nr; lockdep_assert_held(&scheduler->lock); if (slot_num >= 0) { - if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group != - group)) + if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group != group)) return true; } @@ -1123,12 +1347,13 @@ static bool kbasep_csf_scheduler_group_is_on_slot_locked( } bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, - struct kbase_queue_group *group) + struct kbase_queue_group *group) { - struct kbase_csf_scheduler *scheduler = - &group->kctx->kbdev->csf.scheduler; + struct kbase_csf_scheduler *scheduler = &group->kctx->kbdev->csf.scheduler; int slot_num = group->csg_nr; + CSTD_UNUSED(kbdev); + lockdep_assert_held(&scheduler->interrupt_lock); if (WARN_ON(slot_num < 0)) @@ -1137,8 +1362,8 @@ bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, return test_bit(slot_num, scheduler->csgs_events_enable_mask); } -struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( - struct kbase_device *kbdev, int slot) +struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(struct kbase_device *kbdev, + int slot) { lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); @@ -1156,8 +1381,7 @@ static int halt_stream_sync(struct kbase_queue *queue) long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); unsigned long flags; - if (WARN_ON(!group) || - WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) + if (WARN_ON(!group) || WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) return -EINVAL; lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -1165,16 +1389,18 @@ static int halt_stream_sync(struct kbase_queue *queue) stream = &ginfo->streams[csi_index]; if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) == - CS_REQ_STATE_START) { - + CS_REQ_STATE_START) { remaining = wait_event_timeout(kbdev->csf.event_wait, - (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) - == CS_ACK_STATE_START), remaining); + (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output( + stream, CS_ACK)) == CS_ACK_STATE_START), + remaining); if (!remaining) { - dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, - csi_index, group->handle, group->csg_nr); + dev_warn( + kbdev->dev, + "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d", + kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, + csi_index, group->handle, group->csg_nr); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); @@ -1182,14 +1408,12 @@ static int halt_stream_sync(struct kbase_queue *queue) return -ETIMEDOUT; } - remaining = - kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); } spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); /* Set state to STOP */ - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, - CS_REQ_STATE_MASK); + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, CS_REQ_STATE_MASK); kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true); spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); @@ -1198,13 +1422,16 @@ static int halt_stream_sync(struct kbase_queue *queue) /* Timed wait */ remaining = wait_event_timeout(kbdev->csf.event_wait, - (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) - == CS_ACK_STATE_STOP), remaining); + (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output( + stream, CS_ACK)) == CS_ACK_STATE_STOP), + remaining); if (!remaining) { - dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, - queue->csi_index, group->handle, group->csg_nr); + dev_warn( + kbdev->dev, + "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d", + kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, + queue->csi_index, group->handle, group->csg_nr); /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU * will be reset as a work-around. @@ -1212,16 +1439,13 @@ static int halt_stream_sync(struct kbase_queue *queue) if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); - } return (remaining) ? 0 : -ETIMEDOUT; } -static bool can_halt_stream(struct kbase_device *kbdev, - struct kbase_queue_group *group) +static bool can_halt_stream(struct kbase_device *kbdev, struct kbase_queue_group *group) { - struct kbase_csf_csg_slot *const csg_slot = - kbdev->csf.scheduler.csg_slots; + struct kbase_csf_csg_slot *const csg_slot = kbdev->csf.scheduler.csg_slots; unsigned long flags; bool can_halt; int slot; @@ -1231,10 +1455,8 @@ static bool can_halt_stream(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); slot = kbase_csf_scheduler_group_get_slot_locked(group); - can_halt = (slot >= 0) && - (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); - spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, - flags); + can_halt = (slot >= 0) && (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); return can_halt; } @@ -1257,10 +1479,8 @@ static int sched_halt_stream(struct kbase_queue *queue) { struct kbase_queue_group *group = queue->group; struct kbase_device *kbdev = queue->kctx->kbdev; - struct kbase_csf_scheduler *const scheduler = - &kbdev->csf.scheduler; - struct kbase_csf_csg_slot *const csg_slot = - kbdev->csf.scheduler.csg_slots; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_csf_csg_slot *const csg_slot = kbdev->csf.scheduler.csg_slots; bool retry_needed = false; bool retried = false; long remaining; @@ -1280,7 +1500,8 @@ static int sched_halt_stream(struct kbase_queue *queue) WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { - dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state", + dev_dbg(kbdev->dev, + "Stopping a queue on csi %d when Group-%d is in under transition to running state", queue->csi_index, group->handle); retry_needed = true; } @@ -1308,9 +1529,8 @@ retry: * CSF context is locked. Therefore, the scheduler would be * the only one to update the run_state of the group. */ - remaining = wait_event_timeout( - kbdev->csf.event_wait, can_halt_stream(kbdev, group), - kbase_csf_timeout_in_jiffies(group_schedule_timeout)); + remaining = wait_event_timeout(kbdev->csf.event_wait, can_halt_stream(kbdev, group), + kbase_csf_timeout_in_jiffies(group_schedule_timeout)); mutex_lock(&scheduler->lock); @@ -1347,39 +1567,33 @@ retry: * interface after resuming the group before it processes * another state change request of the group. */ - if ((slot >= 0) && - (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) { + if ((slot >= 0) && (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) { err = halt_stream_sync(queue); } else if (retry_needed && !retried) { retried = true; goto retry; } else if (slot >= 0) { - struct kbase_csf_global_iface *global_iface = - &kbdev->csf.global_iface; - struct kbase_csf_cmd_stream_group_info *ginfo = - &global_iface->groups[slot]; + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + struct kbase_csf_cmd_stream_group_info *ginfo = &global_iface->groups[slot]; struct kbase_csf_cmd_stream_info *stream = - &ginfo->streams[queue->csi_index]; - u32 cs_req = - kbase_csf_firmware_cs_input_read(stream, CS_REQ); + &ginfo->streams[queue->csi_index]; + u32 cs_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ); - if (!WARN_ON(CS_REQ_STATE_GET(cs_req) != - CS_REQ_STATE_STOP)) { + if (!WARN_ON(CS_REQ_STATE_GET(cs_req) != CS_REQ_STATE_STOP)) { /* Timed wait */ remaining = wait_event_timeout( kbdev->csf.event_wait, - (CS_ACK_STATE_GET( - kbase_csf_firmware_cs_output( - stream, CS_ACK)) == - CS_ACK_STATE_STOP), + (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output( + stream, CS_ACK)) == CS_ACK_STATE_STOP), kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms)); if (!remaining) { - dev_warn(kbdev->dev, - "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, - queue->csi_index, - group->handle, group->csg_nr); + dev_warn( + kbdev->dev, + "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d", + kbase_backend_get_cycle_cnt(kbdev), + kbdev->csf.fw_timeout_ms, queue->csi_index, + group->handle, group->csg_nr); err = -ETIMEDOUT; @@ -1387,10 +1601,11 @@ retry: } } } else if (!remaining) { - dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)", - kbase_backend_get_cycle_cnt(kbdev), - group->handle, queue->csi_index, - group_schedule_timeout); + dev_warn( + kbdev->dev, + "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)", + kbase_backend_get_cycle_cnt(kbdev), group->handle, queue->csi_index, + group_schedule_timeout); err = -ETIMEDOUT; @@ -1421,10 +1636,8 @@ static void scheduler_activate_on_queue_stop(struct kbase_queue *queue) dev_warn( kbdev->dev, "[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), - queue->csi_index, queue->group->handle, - queue->kctx->tgid, queue->kctx->id, - queue->group->csg_nr); + kbase_backend_get_cycle_cnt(kbdev), queue->csi_index, queue->group->handle, + queue->kctx->tgid, queue->kctx->id, queue->group->csg_nr); } } @@ -1446,8 +1659,7 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled); if (cs_enabled && queue_group_scheduled_locked(group)) { - struct kbase_csf_csg_slot *const csg_slot = - kbdev->csf.scheduler.csg_slots; + struct kbase_csf_csg_slot *const csg_slot = kbdev->csf.scheduler.csg_slots; int slot = kbase_csf_scheduler_group_get_slot(group); /* Since the group needs to be resumed in order to stop the queue, @@ -1455,8 +1667,7 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) */ scheduler_activate_on_queue_stop(queue); - if ((slot >= 0) && - (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) + if ((slot >= 0) && (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) err = halt_stream_sync(queue); else err = sched_halt_stream(queue); @@ -1474,9 +1685,9 @@ static void update_hw_active(struct kbase_queue *queue, bool active) { #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) if (queue && queue->enabled) { - u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + u64 *output_addr = queue->user_io_addr + PAGE_SIZE / sizeof(u64); - output_addr[CS_ACTIVE / sizeof(u32)] = active; + output_addr[CS_ACTIVE / sizeof(*output_addr)] = active; } #else CSTD_UNUSED(queue); @@ -1486,11 +1697,16 @@ static void update_hw_active(struct kbase_queue *queue, bool active) static void program_cs_extract_init(struct kbase_queue *queue) { - u64 *input_addr = (u64 *)queue->user_io_addr; - u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); + u64 *input_addr = queue->user_io_addr; + u64 *output_addr = queue->user_io_addr + PAGE_SIZE / sizeof(u64); - input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] = - output_addr[CS_EXTRACT_LO / sizeof(u64)]; + /* + * These 64-bit reads and writes will be atomic on a 64-bit kernel but may + * not be atomic on 32-bit kernels. Support for 32-bit kernels is limited to + * build-only. + */ + input_addr[CS_EXTRACT_INIT_LO / sizeof(*input_addr)] = + output_addr[CS_EXTRACT_LO / sizeof(*output_addr)]; } static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, @@ -1512,24 +1728,20 @@ static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, * queue's register_ex call. */ if (kbase_csf_scheduler_queue_has_trace(queue)) { - u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET( - queue->trace_cfg, queue->kctx->as_nr); + u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET(queue->trace_cfg, queue->kctx->as_nr); kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg); - kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, - queue->trace_buffer_size); + kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, queue->trace_buffer_size); kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO, - queue->trace_buffer_base & U32_MAX); + queue->trace_buffer_base & U32_MAX); kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI, - queue->trace_buffer_base >> 32); + queue->trace_buffer_base >> 32); - kbase_csf_firmware_cs_input( - stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO, - queue->trace_offset_ptr & U32_MAX); - kbase_csf_firmware_cs_input( - stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI, - queue->trace_offset_ptr >> 32); + kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO, + queue->trace_offset_ptr & U32_MAX); + kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI, + queue->trace_offset_ptr >> 32); } else { /* Place the configuration to the disabled condition */ kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0); @@ -1537,8 +1749,8 @@ static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, } } -static void program_cs(struct kbase_device *kbdev, - struct kbase_queue *queue, bool ring_csg_doorbell) +static void program_cs(struct kbase_device *kbdev, struct kbase_queue *queue, + bool ring_csg_doorbell) { struct kbase_queue_group *group = queue->group; struct kbase_csf_cmd_stream_group_info *ginfo; @@ -1558,8 +1770,7 @@ static void program_cs(struct kbase_device *kbdev, ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; - if (WARN_ON(csi_index < 0) || - WARN_ON(csi_index >= ginfo->stream_num)) + if (WARN_ON(csi_index < 0) || WARN_ON((u32)csi_index >= ginfo->stream_num)) return; if (queue->enabled) { @@ -1575,12 +1786,9 @@ static void program_cs(struct kbase_device *kbdev, stream = &ginfo->streams[csi_index]; - kbase_csf_firmware_cs_input(stream, CS_BASE_LO, - queue->base_addr & 0xFFFFFFFF); - kbase_csf_firmware_cs_input(stream, CS_BASE_HI, - queue->base_addr >> 32); - kbase_csf_firmware_cs_input(stream, CS_SIZE, - queue->size); + kbase_csf_firmware_cs_input(stream, CS_BASE_LO, queue->base_addr & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_BASE_HI, queue->base_addr >> 32); + kbase_csf_firmware_cs_input(stream, CS_SIZE, queue->size); user_input = queue->user_io_gpu_va; WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va"); @@ -1593,7 +1801,7 @@ static void program_cs(struct kbase_device *kbdev, kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32); kbase_csf_firmware_cs_input(stream, CS_CONFIG, - (queue->doorbell_nr << 8) | (queue->priority & 0xF)); + (queue->doorbell_nr << 8) | (queue->priority & 0xF)); /* Program the queue's cs_trace configuration */ program_cs_trace_cfg(stream, queue); @@ -1608,11 +1816,9 @@ static void program_cs(struct kbase_device *kbdev, * removed from the CSG slot before the fault could be acknowledged. */ if (queue->enabled) { - u32 const cs_ack = - kbase_csf_firmware_cs_output(stream, CS_ACK); + u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK); - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, - CS_REQ_FAULT_MASK); + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK); } /* @@ -1628,10 +1834,9 @@ static void program_cs(struct kbase_device *kbdev, /* Set state to START/STOP */ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, - queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP, - CS_REQ_STATE_MASK); - kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, - ring_csg_doorbell); + queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP, + CS_REQ_STATE_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, ring_csg_doorbell); spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled); @@ -1664,7 +1869,7 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) kbase_reset_gpu_assert_prevented(kbdev); lockdep_assert_held(&queue->kctx->csf.lock); - if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) + if (WARN_ON_ONCE(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) return -EINVAL; mutex_lock(&kbdev->csf.scheduler.lock); @@ -1676,16 +1881,15 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) } #endif - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, - group->run_state); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, group->run_state); KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, queue->status_wait); if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) { err = -EIO; evicted = true; - } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) - && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { + } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) && + CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked", queue->csi_index, group->handle); } else { @@ -1745,26 +1949,21 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) return err; } -static enum kbase_csf_csg_slot_state update_csg_slot_status( - struct kbase_device *kbdev, s8 slot) +static enum kbase_csf_csg_slot_state update_csg_slot_status(struct kbase_device *kbdev, s8 slot) { - struct kbase_csf_csg_slot *csg_slot = - &kbdev->csf.scheduler.csg_slots[slot]; - struct kbase_csf_cmd_stream_group_info *ginfo = - &kbdev->csf.global_iface.groups[slot]; + struct kbase_csf_csg_slot *csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; + struct kbase_csf_cmd_stream_group_info *ginfo = &kbdev->csf.global_iface.groups[slot]; u32 state; enum kbase_csf_csg_slot_state slot_state; lockdep_assert_held(&kbdev->csf.scheduler.lock); - state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, - CSG_ACK)); + state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); slot_state = atomic_read(&csg_slot->state); switch (slot_state) { case CSG_SLOT_READY2RUN: - if ((state == CSG_ACK_STATE_START) || - (state == CSG_ACK_STATE_RESUME)) { + if ((state == CSG_ACK_STATE_START) || (state == CSG_ACK_STATE_RESUME)) { slot_state = CSG_SLOT_RUNNING; atomic_set(&csg_slot->state, slot_state); csg_slot->trigger_jiffies = jiffies; @@ -1775,12 +1974,12 @@ static enum kbase_csf_csg_slot_state update_csg_slot_status( } break; case CSG_SLOT_DOWN2STOP: - if ((state == CSG_ACK_STATE_SUSPEND) || - (state == CSG_ACK_STATE_TERMINATE)) { + if ((state == CSG_ACK_STATE_SUSPEND) || (state == CSG_ACK_STATE_TERMINATE)) { slot_state = CSG_SLOT_STOPPED; atomic_set(&csg_slot->state, slot_state); csg_slot->trigger_jiffies = jiffies; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, + state); dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n", csg_slot->resident_group->handle, slot); } @@ -1814,21 +2013,20 @@ static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot) slot_state = update_csg_slot_status(kbdev, slot); - return (slot_state == CSG_SLOT_STOPPED || - slot_state == CSG_SLOT_READY); + return (slot_state == CSG_SLOT_STOPPED || slot_state == CSG_SLOT_READY); } static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot) { - struct kbase_csf_cmd_stream_group_info *ginfo = - &kbdev->csf.global_iface.groups[slot]; + struct kbase_csf_cmd_stream_group_info *ginfo = &kbdev->csf.global_iface.groups[slot]; u32 state; - state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, - CSG_ACK)); + state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) { - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, + kbdev->csf.scheduler.csg_slots[slot].resident_group, + state); dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot); return true; } @@ -1840,8 +2038,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) { struct kbase_device *kbdev = group->kctx->kbdev; struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; - struct kbase_csf_csg_slot *csg_slot = - kbdev->csf.scheduler.csg_slots; + struct kbase_csf_csg_slot *csg_slot = kbdev->csf.scheduler.csg_slots; s8 slot; lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -1853,43 +2050,36 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) /* When in transition, wait for it to complete */ if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { - long remaining = - kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot); - remaining = wait_event_timeout(kbdev->csf.event_wait, - csg_slot_running(kbdev, slot), remaining); + remaining = wait_event_timeout(kbdev->csf.event_wait, csg_slot_running(kbdev, slot), + remaining); if (!remaining) - dev_warn(kbdev->dev, - "[%llu] slot %d timeout (%d ms) on up-running\n", - kbase_backend_get_cycle_cnt(kbdev), - slot, kbdev->csf.fw_timeout_ms); + dev_warn(kbdev->dev, "[%llu] slot %d timeout (%d ms) on up-running\n", + kbase_backend_get_cycle_cnt(kbdev), slot, + kbdev->csf.fw_timeout_ms); } if (csg_slot_running(kbdev, slot)) { unsigned long flags; - struct kbase_csf_cmd_stream_group_info *ginfo = - &global_iface->groups[slot]; + struct kbase_csf_cmd_stream_group_info *ginfo = &global_iface->groups[slot]; - u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : - CSG_REQ_STATE_TERMINATE; + u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : CSG_REQ_STATE_TERMINATE; dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d", suspend, group->handle, group->kctx->tgid, group->kctx->id, slot); spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); /* Set state to SUSPEND/TERMINATE */ - kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd, - CSG_REQ_STATE_MASK); + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd, CSG_REQ_STATE_MASK); kbase_csf_ring_csg_doorbell(kbdev, slot); - spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, - flags); + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); csg_slot[slot].trigger_jiffies = jiffies; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd); - KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( - kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend); + KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(kbdev, kbdev->id, slot, suspend); } } @@ -1966,8 +2156,7 @@ static bool evaluate_sync_update(struct kbase_queue *queue) goto out; } - sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, - &mapping); + sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, &mapping); KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue, queue->sync_ptr); @@ -1980,8 +2169,7 @@ static bool evaluate_sync_update(struct kbase_queue *queue) goto out; } - sync_wait_cond = - CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait); + sync_wait_cond = CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait); sync_wait_cond_valid = (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) || (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) || ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) && @@ -2007,8 +2195,8 @@ static bool evaluate_sync_update(struct kbase_queue *queue) */ updated = true; } else { - dev_dbg(queue->kctx->kbdev->dev, - "sync memory not updated yet(%u)", sync_current_val); + dev_dbg(queue->kctx->kbdev->dev, "sync memory not updated yet(%u)", + sync_current_val); } kbase_phy_alloc_mapping_put(queue->kctx, mapping); @@ -2031,40 +2219,34 @@ out: * * Return: true if the queue is blocked on a sync wait operation. */ -static -bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, - struct kbase_queue *queue) +static bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, + struct kbase_queue *queue) { - struct kbase_csf_cmd_stream_info *const stream = - &ginfo->streams[queue->csi_index]; + struct kbase_csf_cmd_stream_info *const stream = &ginfo->streams[queue->csi_index]; u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT); bool is_waiting = false; -#if IS_ENABLED(CONFIG_DEBUG_FS) u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO); cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32; queue->saved_cmd_ptr = cmd_ptr; -#endif KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, status); if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) { queue->status_wait = status; - queue->sync_ptr = kbase_csf_firmware_cs_output(stream, - CS_STATUS_WAIT_SYNC_POINTER_LO); - queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream, - CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; - queue->sync_value = kbase_csf_firmware_cs_output(stream, - CS_STATUS_WAIT_SYNC_VALUE); + queue->sync_ptr = + kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_POINTER_LO); + queue->sync_ptr |= + (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_POINTER_HI) + << 32; + queue->sync_value = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_VALUE); queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( - kbase_csf_firmware_cs_output(stream, - CS_STATUS_SCOREBOARDS)); + kbase_csf_firmware_cs_output(stream, CS_STATUS_SCOREBOARDS)); queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET( - kbase_csf_firmware_cs_output(stream, - CS_STATUS_BLOCKED_REASON)); + kbase_csf_firmware_cs_output(stream, CS_STATUS_BLOCKED_REASON)); if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) || !evaluate_sync_update(queue)) { @@ -2103,9 +2285,8 @@ static void schedule_in_cycle(struct kbase_queue_group *group, bool force) * of work needs to be enforced in situation such as entering into * protected mode). */ - if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) { - dev_dbg(kbdev->dev, "Kicking async for group %d\n", - group->handle); + if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) || force) { + dev_dbg(kbdev->dev, "Kicking async for group %d\n", group->handle); kbase_csf_scheduler_invoke_tock(kbdev); } } @@ -2115,43 +2296,42 @@ static void ktrace_log_group_state(struct kbase_queue_group *const group) switch (group->run_state) { case KBASE_CSF_GROUP_INACTIVE: KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group, - group->run_state); + group->run_state); break; case KBASE_CSF_GROUP_RUNNABLE: KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group, - group->run_state); + group->run_state); break; case KBASE_CSF_GROUP_IDLE: KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_IDLE, group, - group->run_state); + group->run_state); break; case KBASE_CSF_GROUP_SUSPENDED: KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group, - group->run_state); + group->run_state); break; case KBASE_CSF_GROUP_SUSPENDED_ON_IDLE: KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group, - group->run_state); + group->run_state); break; case KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC: KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, - group, group->run_state); + group, group->run_state); break; case KBASE_CSF_GROUP_FAULT_EVICTED: KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_FAULT_EVICTED, group, - group->run_state); + group->run_state); break; case KBASE_CSF_GROUP_TERMINATED: KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, - group->run_state); + group->run_state); break; } } -static -void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, - struct kbase_queue_group *const group, - enum kbase_csf_group_state run_state) +static void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, + struct kbase_queue_group *const group, + enum kbase_csf_group_state run_state) { struct kbase_context *const kctx = group->kctx; struct kbase_device *const kbdev = kctx->kbdev; @@ -2170,8 +2350,7 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, if (run_state == KBASE_CSF_GROUP_RUNNABLE) group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; - list_add_tail(&group->link, - &kctx->csf.sched.runnable_groups[group->priority]); + list_add_tail(&group->link, &kctx->csf.sched.runnable_groups[group->priority]); kctx->csf.sched.num_runnable_grps++; KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group, kctx->csf.sched.num_runnable_grps); @@ -2186,13 +2365,12 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, scheduler->total_runnable_grps++; - if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && - (scheduler->total_runnable_grps == 1 || - scheduler->state == SCHED_SUSPENDED || + if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) && + (scheduler->total_runnable_grps == 1 || scheduler->state == SCHED_SUSPENDED || scheduler->state == SCHED_SLEEPING)) { dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n"); /* Fire a scheduling to start the time-slice */ - enqueue_tick_work(kbdev); + kbase_csf_scheduler_invoke_tick(kbdev); } else schedule_in_cycle(group, false); @@ -2202,15 +2380,24 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, scheduler_wakeup(kbdev, false); } -static -void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, - struct kbase_queue_group *group, - enum kbase_csf_group_state run_state) +static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler) +{ + hrtimer_cancel(&scheduler->tick_timer); + atomic_set(&scheduler->pending_tick_work, false); +} + +static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) +{ + atomic_set(&scheduler->pending_tock_work, false); +} + +static void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, + struct kbase_queue_group *group, + enum kbase_csf_group_state run_state) { struct kbase_context *kctx = group->kctx; struct kbase_queue_group *new_head_grp; - struct list_head *list = - &kctx->csf.sched.runnable_groups[group->priority]; + struct list_head *list = &kctx->csf.sched.runnable_groups[group->priority]; unsigned long flags; lockdep_assert_held(&scheduler->lock); @@ -2251,7 +2438,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, /* * Note: this disables explicit rotation in the next scheduling * cycle. However, removing the top_grp is the same as an - * implicit rotation (e.g. if we instead rotated the top_ctx + * implicit rotation (e.g. if we instead rotated the top_kctx * and then remove top_grp) * * This implicit rotation is assumed by the scheduler rotate @@ -2264,8 +2451,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, * content in case there has been any in order to minimise * latency. */ - group = scheduler_get_protm_enter_async_group(kctx->kbdev, - NULL); + group = scheduler_get_protm_enter_async_group(kctx->kbdev, NULL); if (group) schedule_in_cycle(group, true); } @@ -2273,9 +2459,8 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, kctx->csf.sched.num_runnable_grps--; KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group, kctx->csf.sched.num_runnable_grps); - new_head_grp = (!list_empty(list)) ? - list_first_entry(list, struct kbase_queue_group, link) : - NULL; + new_head_grp = + (!list_empty(list)) ? list_first_entry(list, struct kbase_queue_group, link) : NULL; KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u); if (kctx->csf.sched.num_runnable_grps == 0) { @@ -2283,12 +2468,13 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, struct list_head *kctx_list = &scheduler->runnable_kctxs; /* drop the kctx */ list_del_init(&kctx->csf.link); - if (scheduler->top_ctx == kctx) - scheduler->top_ctx = NULL; + if (scheduler->top_kctx == kctx) + scheduler->top_kctx = NULL; KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u); - new_head_kctx = (!list_empty(kctx_list)) ? - list_first_entry(kctx_list, struct kbase_context, csf.link) : - NULL; + new_head_kctx = + (!list_empty(kctx_list)) ? + list_first_entry(kctx_list, struct kbase_context, csf.link) : + NULL; KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u); } @@ -2296,14 +2482,14 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, scheduler->total_runnable_grps--; if (!scheduler->total_runnable_grps) { dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups"); - cancel_tick_timer(kctx->kbdev); + cancel_tick_work(scheduler); WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps)); if (scheduler->state != SCHED_SUSPENDED) enqueue_gpu_idle_work(scheduler); } KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, - scheduler->num_active_address_spaces | - (((u64)scheduler->total_runnable_grps) << 32)); + scheduler->num_active_address_spaces | + (((u64)scheduler->total_runnable_grps) << 32)); } static void insert_group_to_idle_wait(struct kbase_queue_group *const group) @@ -2321,8 +2507,7 @@ static void insert_group_to_idle_wait(struct kbase_queue_group *const group) group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC; KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, group, group->run_state); - dev_dbg(kctx->kbdev->dev, - "Group-%d suspended on sync_wait, total wait_groups: %u\n", + dev_dbg(kctx->kbdev->dev, "Group-%d suspended on sync_wait, total wait_groups: %u\n", group->handle, kctx->csf.sched.num_idle_wait_grps); } @@ -2341,16 +2526,15 @@ static void remove_group_from_idle_wait(struct kbase_queue_group *const group) kctx->csf.sched.num_idle_wait_grps--; KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group, kctx->csf.sched.num_idle_wait_grps); - new_head_grp = (!list_empty(list)) ? - list_first_entry(list, struct kbase_queue_group, link) : - NULL; + new_head_grp = + (!list_empty(list)) ? list_first_entry(list, struct kbase_queue_group, link) : NULL; KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u); group->run_state = KBASE_CSF_GROUP_INACTIVE; KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_INACTIVE, group, group->run_state); } static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, - struct kbase_queue_group *group) + struct kbase_queue_group *group) { lockdep_assert_held(&scheduler->lock); @@ -2369,8 +2553,7 @@ static void update_offslot_non_idle_cnt(struct kbase_queue_group *group) lockdep_assert_held(&scheduler->lock); if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { - int new_val = - atomic_dec_return(&scheduler->non_idle_offslot_grps); + int new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); } } @@ -2385,14 +2568,12 @@ static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group WARN_ON(group->csg_nr < 0); if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { - int new_val = - atomic_dec_return(&scheduler->non_idle_offslot_grps); + int new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); } } -static void update_offslot_non_idle_cnt_on_grp_suspend( - struct kbase_queue_group *group) +static void update_offslot_non_idle_cnt_on_grp_suspend(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; @@ -2401,19 +2582,16 @@ static void update_offslot_non_idle_cnt_on_grp_suspend( if (scheduler->state == SCHED_BUSY) { /* active phase or, async entering the protected mode */ - if (group->prepared_seq_num >= - scheduler->non_idle_scanout_grps) { + if (group->prepared_seq_num >= scheduler->non_idle_scanout_grps) { /* At scanout, it was tagged as on-slot idle */ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { - int new_val = atomic_inc_return( - &scheduler->non_idle_offslot_grps); + int new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val); } } else { if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) { - int new_val = atomic_dec_return( - &scheduler->non_idle_offslot_grps); + int new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); } @@ -2421,8 +2599,7 @@ static void update_offslot_non_idle_cnt_on_grp_suspend( } else { /* async phases */ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { - int new_val = atomic_inc_return( - &scheduler->non_idle_offslot_grps); + int new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val); } @@ -2436,13 +2613,12 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue) u32 sb_status = 0; struct kbase_device const *const kbdev = queue->group->kctx->kbdev; - struct kbase_csf_global_iface const *const iface = - &kbdev->csf.global_iface; + struct kbase_csf_global_iface const *const iface = &kbdev->csf.global_iface; u32 glb_version = iface->version; u64 const *input_addr = (u64 const *)queue->user_io_addr; - u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); + u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64)); if (glb_version >= kbase_csf_interface_version(1, 0, 0)) { /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */ @@ -2452,10 +2628,14 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue) &ginfo->streams[queue->csi_index]; sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( - kbase_csf_firmware_cs_output(stream, - CS_STATUS_SCOREBOARDS)); + kbase_csf_firmware_cs_output(stream, CS_STATUS_SCOREBOARDS)); } + /* + * These 64-bit reads and writes will be atomic on a 64-bit kernel but may + * not be atomic on 32-bit kernels. Support for 32-bit kernels is limited to + * build-only. + */ cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] == output_addr[CS_EXTRACT_LO / sizeof(u64)]); cs_idle = cs_empty && (!sb_status); @@ -2477,11 +2657,9 @@ static void save_csg_slot(struct kbase_queue_group *group) ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; - state = - CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); + state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); - if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) && - (state != CSG_ACK_STATE_TERMINATE))) { + if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) && (state != CSG_ACK_STATE_TERMINATE))) { u32 max_streams = ginfo->stream_num; u32 i; bool sync_wait = false; @@ -2492,8 +2670,7 @@ static void save_csg_slot(struct kbase_queue_group *group) update_hw_active(group->bound_queues[i], false); #endif /* CONFIG_MALI_BIFROST_NO_MALI */ for (i = 0; idle && i < max_streams; i++) { - struct kbase_queue *const queue = - group->bound_queues[i]; + struct kbase_queue *const queue = group->bound_queues[i]; if (!queue || !queue->enabled) continue; @@ -2502,8 +2679,7 @@ static void save_csg_slot(struct kbase_queue_group *group) /* sync_wait is only true if the queue is blocked on * a CQS and not a scoreboard. */ - if (queue->blocked_reason != - CS_STATUS_BLOCKED_ON_SB_WAIT) + if (queue->blocked_reason != CS_STATUS_BLOCKED_ON_SB_WAIT) sync_wait = true; } else { /* Need to confirm if ringbuffer of the GPU @@ -2528,12 +2704,10 @@ static void save_csg_slot(struct kbase_queue_group *group) if (sync_wait) deschedule_idle_wait_group(scheduler, group); else { - group->run_state = - KBASE_CSF_GROUP_SUSPENDED_ON_IDLE; + group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_IDLE; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group, group->run_state); - dev_dbg(kbdev->dev, "Group-%d suspended: idle", - group->handle); + dev_dbg(kbdev->dev, "Group-%d suspended: idle", group->handle); } } else { group->run_state = KBASE_CSF_GROUP_SUSPENDED; @@ -2559,7 +2733,7 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) s8 slot; struct kbase_csf_csg_slot *csg_slot; unsigned long flags; - u32 i; + u32 csg_req, csg_ack, i; bool as_fault = false; lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -2577,13 +2751,11 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) if (group->bound_queues[i]) { if (group->bound_queues[i]->enabled) { - kbase_csf_firmware_cs_input_mask(stream, - CS_REQ, CS_REQ_STATE_STOP, - CS_REQ_STATE_MASK); + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, + CS_REQ_STATE_MASK); } - unassign_user_doorbell_from_queue(kbdev, - group->bound_queues[i]); + unassign_user_doorbell_from_queue(kbdev, group->bound_queues[i]); } } @@ -2597,8 +2769,17 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) as_fault = true; spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + spin_lock_bh(&kbdev->csf.scheduler.gpu_metrics_lock); + emit_gpu_metrics_to_frontend_for_off_slot_group(group); +#endif /* now marking the slot is vacant */ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + /* Process pending SYNC_UPDATE, if any */ + csg_req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); + csg_ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); + kbase_csf_handle_csg_sync_update(kbdev, ginfo, group, csg_req, csg_ack); + kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL; clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask); KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, @@ -2608,16 +2789,17 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask); clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + spin_unlock_bh(&kbdev->csf.scheduler.gpu_metrics_lock); +#endif csg_slot->trigger_jiffies = jiffies; atomic_set(&csg_slot->state, CSG_SLOT_READY); KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot); - dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n", - group->handle, slot); + dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n", group->handle, slot); - KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, - kbdev->gpu_props.props.raw_props.gpu_id, slot); + KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, kbdev->id, slot); /* Notify the group is off-slot and the csg_reg might be available for * resue with other groups in a 'lazy unbinding' style. @@ -2634,7 +2816,7 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) struct kbase_csf_cmd_stream_group_info *ginfo; s8 slot; u8 prev_prio; - u32 ep_cfg; + u64 ep_cfg; u32 csg_req; unsigned long flags; @@ -2651,7 +2833,7 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) * This also applies in protected mode. */ WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) || - (group->run_state == KBASE_CSF_GROUP_IDLE))); + (group->run_state == KBASE_CSF_GROUP_IDLE))); /* Update consumes a group from scanout */ update_offslot_non_idle_cnt_for_onslot_grp(group); @@ -2659,63 +2841,56 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) if (csg_slot->priority == prio) return; - /* Read the csg_ep_cfg back for updating the priority field */ - ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ); + ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ_LO); + prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg); ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); - kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); + kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ_LO, ep_cfg); spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); csg_req ^= CSG_REQ_EP_CFG_MASK; - kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, - CSG_REQ_EP_CFG_MASK); + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, CSG_REQ_EP_CFG_MASK); kbase_csf_ring_csg_doorbell(kbdev, slot); spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); csg_slot->priority = prio; - dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n", - group->handle, group->kctx->tgid, group->kctx->id, slot, - prev_prio, prio); + dev_dbg(kbdev->dev, + "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n", + group->handle, group->kctx->tgid, group->kctx->id, slot, prev_prio, prio); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prio); set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update); } -static void program_csg_slot(struct kbase_queue_group *group, s8 slot, - u8 prio) +static void program_csg_slot(struct kbase_queue_group *group, s8 slot, u8 prio) { struct kbase_context *kctx = group->kctx; struct kbase_device *kbdev = kctx->kbdev; struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; - const u64 shader_core_mask = - kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); - const u64 tiler_core_mask = - kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER); + const u64 shader_core_mask = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); + const u64 tiler_core_mask = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER); const u64 compute_mask = shader_core_mask & group->compute_mask; const u64 fragment_mask = shader_core_mask & group->fragment_mask; const u64 tiler_mask = tiler_core_mask & group->tiler_mask; - const u8 num_cores = kbdev->gpu_props.num_cores; - const u8 compute_max = min(num_cores, group->compute_max); - const u8 fragment_max = min(num_cores, group->fragment_max); + const u8 compute_max = min(kbdev->gpu_props.num_cores, group->compute_max); + const u8 fragment_max = min(kbdev->gpu_props.num_cores, group->fragment_max); const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max); struct kbase_csf_cmd_stream_group_info *ginfo; - u32 ep_cfg = 0; + u64 ep_cfg = 0; u32 csg_req; u32 state; int i; unsigned long flags; u64 normal_suspend_buf; u64 protm_suspend_buf; - struct kbase_csf_csg_slot *csg_slot = - &kbdev->csf.scheduler.csg_slots[slot]; + struct kbase_csf_csg_slot *csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; lockdep_assert_held(&kbdev->csf.scheduler.lock); - if (WARN_ON(slot < 0) && - WARN_ON(slot >= global_iface->group_num)) + if (WARN_ON(slot < 0) && WARN_ON((u32)slot >= global_iface->group_num)) return; WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); @@ -2743,17 +2918,24 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, mutex_unlock(&kbdev->mmu_hw_mutex); if (kctx->as_nr == KBASEP_AS_NR_INVALID) { - dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", + dev_dbg(kbdev->dev, + "Could not get a valid AS for group %d of context %d_%d on slot %d\n", group->handle, kctx->tgid, kctx->id, slot); kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); return; } +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + spin_lock_bh(&kbdev->csf.scheduler.gpu_metrics_lock); +#endif spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); kbdev->csf.scheduler.csg_slots[slot].resident_group = group; group->csg_nr = slot; spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + spin_unlock_bh(&kbdev->csf.scheduler.gpu_metrics_lock); +#endif assign_user_doorbell_to_group(kbdev, group); @@ -2765,36 +2947,28 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, program_cs(kbdev, queue, false); } - /* Endpoint programming for CSG */ - kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO, - compute_mask & U32_MAX); - kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI, - compute_mask >> 32); - kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO, - fragment_mask & U32_MAX); - kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, - fragment_mask >> 32); - kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, - tiler_mask & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO, compute_mask & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI, compute_mask >> 32); + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO, fragment_mask & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, fragment_mask >> 32); + + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, tiler_mask & U32_MAX); /* Register group UID with firmware */ - kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG, - group->group_uid); + kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG, group->group_uid); ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max); ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); - kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); + kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ_LO, ep_cfg & U32_MAX); /* Program the address space number assigned to the context */ kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr); - kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO, - normal_suspend_buf & U32_MAX); - kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, - normal_suspend_buf >> 32); + kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO, normal_suspend_buf & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, normal_suspend_buf >> 32); /* Note, we program the P-mode buffer pointer here, but actual runtime * enter into pmode execution is controlled by the P-mode phy pages are @@ -2807,10 +2981,8 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32); if (group->dvs_buf) { - kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO, - group->dvs_buf & U32_MAX); - kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_HI, - group->dvs_buf >> 32); + kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO, group->dvs_buf & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_HI, group->dvs_buf >> 32); } /* Enable all interrupts for now */ @@ -2819,8 +2991,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); csg_req ^= CSG_REQ_EP_CFG_MASK; - kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, - CSG_REQ_EP_CFG_MASK); + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, CSG_REQ_EP_CFG_MASK); /* Set state to START/RESUME */ if (queue_group_suspended_locked(group)) { @@ -2830,8 +3001,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, state = CSG_REQ_STATE_START; } - kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, - state, CSG_REQ_STATE_MASK); + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, state, CSG_REQ_STATE_MASK); kbase_csf_ring_csg_doorbell(kbdev, slot); spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); @@ -2841,9 +3011,8 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, csg_slot->priority = prio; /* Trace the programming of the CSG on the slot */ - KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( - kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id, - group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0); + KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(kbdev, kbdev->id, group->kctx->id, group->handle, + slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0); dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n", group->handle, kctx->tgid, kctx->id, slot, prio); @@ -2862,15 +3031,13 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group); } -static void remove_scheduled_group(struct kbase_device *kbdev, - struct kbase_queue_group *group) +static void remove_scheduled_group(struct kbase_device *kbdev, struct kbase_queue_group *group) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; lockdep_assert_held(&scheduler->lock); - WARN_ON(group->prepared_seq_num == - KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID); + WARN_ON(group->prepared_seq_num == KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID); WARN_ON(list_empty(&group->link_to_schedule)); list_del_init(&group->link_to_schedule); @@ -2894,8 +3061,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, if (update_non_idle_offslot_grps_cnt_from_run_state && (group->run_state == KBASE_CSF_GROUP_SUSPENDED || group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { - int new_val = atomic_dec_return( - &scheduler->non_idle_offslot_grps); + int new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); } @@ -2905,8 +3071,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, group->bound_queues[i]->enabled = false; } - if (group->prepared_seq_num != - KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) { + if (group->prepared_seq_num != KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) { if (!update_non_idle_offslot_grps_cnt_from_run_state) update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); @@ -2915,8 +3080,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) remove_group_from_idle_wait(group); else { - remove_group_from_runnable(scheduler, group, - KBASE_CSF_GROUP_INACTIVE); + remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_INACTIVE); } WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); @@ -2951,16 +3115,18 @@ static int term_group_sync(struct kbase_queue_group *group) term_csg_slot(group); remaining = wait_event_timeout(kbdev->csf.event_wait, - group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr), - remaining); + group->cs_unrecoverable || + csg_slot_stopped_locked(kbdev, group->csg_nr), + remaining); if (unlikely(!remaining)) { enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT; - dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, - group->handle, group->kctx->tgid, - group->kctx->id, group->csg_nr); + dev_warn( + kbdev->dev, + "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", + kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, group->handle, + group->kctx->tgid, group->kctx->id, group->csg_nr); if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) error_type = DF_PING_REQUEST_TIMEOUT; kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type); @@ -3008,9 +3174,8 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) dev_warn( kbdev->dev, "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), - group->handle, group->kctx->tgid, - group->kctx->id, group->csg_nr); + kbase_backend_get_cycle_cnt(kbdev), group->handle, + group->kctx->tgid, group->kctx->id, group->csg_nr); /* No point in waiting for CSG termination if MCU didn't * become active. */ @@ -3072,8 +3237,7 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) struct kbase_queue_group *protm_grp; unsigned long flags; - WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked( - group)); + WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)); group->run_state = KBASE_CSF_GROUP_RUNNABLE; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, @@ -3091,10 +3255,9 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) /* Request the update to confirm the condition inferred. */ group->reevaluate_idle_status = true; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, - scheduler->csg_slots_idle_mask[0]); + scheduler->csg_slots_idle_mask[0]); } - spin_unlock_irqrestore(&scheduler->interrupt_lock, - flags); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); /* If GPU is in protected mode then any doorbells rang * would have no effect. Check if GPU is in protected @@ -3102,8 +3265,7 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) * active protected mode group. If so prompt the FW * to exit protected mode. */ - if (protm_grp && - group->scan_seq_num < protm_grp->scan_seq_num) { + if (protm_grp && group->scan_seq_num < protm_grp->scan_seq_num) { /* Prompt the FW to exit protected mode */ scheduler_force_protm_exit(kbdev); } @@ -3111,11 +3273,9 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) } else if (!queue_group_scheduled_locked(group)) { int new_val; - insert_group_to_runnable(&kbdev->csf.scheduler, group, - KBASE_CSF_GROUP_RUNNABLE); + insert_group_to_runnable(&kbdev->csf.scheduler, group, KBASE_CSF_GROUP_RUNNABLE); /* A new group into the scheduler */ - new_val = atomic_inc_return( - &kbdev->csf.scheduler.non_idle_offslot_grps); + new_val = atomic_inc_return(&kbdev->csf.scheduler.non_idle_offslot_grps); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val); } @@ -3142,13 +3302,11 @@ static inline void set_max_csg_slots(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; - unsigned int max_address_space_slots = - kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; + unsigned int max_address_space_slots = kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; WARN_ON(scheduler->num_active_address_spaces > total_csg_slots); - if (likely(scheduler->num_active_address_spaces <= - max_address_space_slots)) + if (likely(scheduler->num_active_address_spaces <= max_address_space_slots)) scheduler->num_csg_slots_for_tick = total_csg_slots; } @@ -3163,19 +3321,17 @@ static inline void set_max_csg_slots(struct kbase_device *kbdev) * group slots from the groups at the head of groups_to_schedule list. */ static inline void count_active_address_space(struct kbase_device *kbdev, - struct kbase_context *kctx) + struct kbase_context *kctx) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; - unsigned int max_address_space_slots = - kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; + unsigned int max_address_space_slots = kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; if (scheduler->ngrp_to_schedule <= total_csg_slots) { if (kctx->csf.sched.ngrp_to_schedule == 1) scheduler->num_active_address_spaces++; - if (scheduler->num_active_address_spaces <= - max_address_space_slots) + if (scheduler->num_active_address_spaces <= max_address_space_slots) scheduler->num_csg_slots_for_tick++; } } @@ -3217,8 +3373,7 @@ static inline void count_active_address_space(struct kbase_device *kbdev, */ static u8 get_slot_priority(struct kbase_queue_group *group) { - struct kbase_csf_scheduler *scheduler = - &group->kctx->kbdev->csf.scheduler; + struct kbase_csf_scheduler *scheduler = &group->kctx->kbdev->csf.scheduler; u8 slot_prio; u32 slots_for_tick = scheduler->num_csg_slots_for_tick; u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots; @@ -3231,8 +3386,7 @@ static u8 get_slot_priority(struct kbase_queue_group *group) } else { /* There will be a mix of idle and non-idle groups. */ if (group->scan_seq_num < slots_for_tick) - slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - - group->scan_seq_num); + slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - group->scan_seq_num); else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots)) slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots)); else @@ -3260,18 +3414,14 @@ static void update_resident_groups_priority(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); while (!list_empty(&scheduler->groups_to_schedule)) { - struct kbase_queue_group *group = - list_first_entry(&scheduler->groups_to_schedule, - struct kbase_queue_group, - link_to_schedule); - bool resident = - kbasep_csf_scheduler_group_is_on_slot_locked(group); + struct kbase_queue_group *group = list_first_entry( + &scheduler->groups_to_schedule, struct kbase_queue_group, link_to_schedule); + bool resident = kbasep_csf_scheduler_group_is_on_slot_locked(group); if ((group->prepared_seq_num >= num_groups) || !resident) break; - update_csg_slot_priority(group, - get_slot_priority(group)); + update_csg_slot_priority(group, get_slot_priority(group)); /* Drop the head group from the list */ remove_scheduled_group(kbdev, group); @@ -3293,15 +3443,14 @@ static void update_resident_groups_priority(struct kbase_device *kbdev) * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after * programming the slot. */ -static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, - s8 slot) +static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, s8 slot) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; struct kbase_queue_group *const group = - list_empty(&scheduler->groups_to_schedule) ? NULL : - list_first_entry(&scheduler->groups_to_schedule, - struct kbase_queue_group, - link_to_schedule); + list_empty(&scheduler->groups_to_schedule) ? + NULL : + list_first_entry(&scheduler->groups_to_schedule, struct kbase_queue_group, + link_to_schedule); u32 num_groups = scheduler->num_csg_slots_for_tick; lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -3310,8 +3459,7 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, if (!WARN_ON(ret)) { if (kctx_as_enabled(group->kctx) && !group->faulted) { - program_csg_slot(group, slot, - get_slot_priority(group)); + program_csg_slot(group, slot, get_slot_priority(group)); if (likely(csg_slot_in_use(kbdev, slot))) { /* Drop the head group from the list */ @@ -3344,8 +3492,7 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - struct kbase_csf_csg_slot *const csg_slot = - scheduler->csg_slots; + struct kbase_csf_csg_slot *const csg_slot = scheduler->csg_slots; lockdep_assert_held(&kbdev->csf.scheduler.lock); WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY); @@ -3364,12 +3511,11 @@ static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot) update_resident_groups_priority(kbdev); } -static bool slots_state_changed(struct kbase_device *kbdev, - unsigned long *slots_mask, - bool (*state_check_func)(struct kbase_device *, s8)) +static bool slots_state_changed(struct kbase_device *kbdev, unsigned long *slots_mask, + bool (*state_check_func)(struct kbase_device *, s8)) { u32 num_groups = kbdev->csf.global_iface.group_num; - DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0}; + DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = { 0 }; bool changed = false; u32 i; @@ -3409,27 +3555,26 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) u32 num_groups = kbdev->csf.global_iface.group_num; struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS); - DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0}; + DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = { 0 }; bool suspend_wait_failed = false; - long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); lockdep_assert_held(&kbdev->csf.scheduler.lock); /* In the current implementation, csgs_events_enable_mask would be used * only to indicate suspending CSGs. */ - bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask, - MAX_SUPPORTED_CSGS); + bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); + long remaining = kbase_csf_timeout_in_jiffies( + kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT)); bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); - remaining = wait_event_timeout(kbdev->csf.event_wait, - slots_state_changed(kbdev, changed, - csg_slot_stopped_raw), - remaining); + remaining = wait_event_timeout( + kbdev->csf.event_wait, + slots_state_changed(kbdev, changed, csg_slot_stopped_raw), remaining); if (likely(remaining)) { u32 i; @@ -3444,15 +3589,17 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) /* The on slot csg is now stopped */ clear_bit(i, slot_mask); - KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( - kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i); - if (likely(group)) { bool as_fault; /* Only do save/cleanup if the * group is not terminated during * the sleep. */ + + /* Only emit suspend, if there was no AS fault */ + if (kctx_as_enabled(group->kctx) && !group->faulted) + KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( + kbdev, kbdev->id, i); save_csg_slot(group); as_fault = cleanup_csg_slot(group); /* If AS fault detected, evict it */ @@ -3495,9 +3642,8 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) dev_warn( kbdev->dev, "[%llu] Group %d of context %d_%d on slot %u failed to suspend (timeout %d ms)", - kbase_backend_get_cycle_cnt(kbdev), - group->handle, group->kctx->tgid, - group->kctx->id, i, + kbase_backend_get_cycle_cnt(kbdev), group->handle, + group->kctx->tgid, group->kctx->id, i, kbdev->csf.fw_timeout_ms); if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) @@ -3519,21 +3665,20 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) } if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS)) - dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n", - num_groups, evicted_mask); + dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n", num_groups, + evicted_mask); if (likely(!suspend_wait_failed)) { u32 i; - while (scheduler->ngrp_to_schedule && - scheduler->remaining_tick_slots) { - i = find_first_zero_bit(scheduler->csg_inuse_bitmap, - num_groups); + while (scheduler->ngrp_to_schedule && scheduler->remaining_tick_slots) { + i = find_first_zero_bit(scheduler->csg_inuse_bitmap, num_groups); if (WARN_ON(i == num_groups)) break; program_vacant_csg_slot(kbdev, (s8)i); if (!csg_slot_in_use(kbdev, (int)i)) { - dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i); + dev_warn(kbdev->dev, + "Couldn't use CSG slot %d despite being vacant", i); break; } } @@ -3546,8 +3691,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) static void suspend_queue_group(struct kbase_queue_group *group) { unsigned long flags; - struct kbase_csf_scheduler *const scheduler = - &group->kctx->kbdev->csf.scheduler; + struct kbase_csf_scheduler *const scheduler = &group->kctx->kbdev->csf.scheduler; spin_lock_irqsave(&scheduler->interrupt_lock, flags); /* This shall be used in program_suspending_csg_slots() where we @@ -3570,15 +3714,14 @@ static void wait_csg_slots_start(struct kbase_device *kbdev) u32 num_groups = kbdev->csf.global_iface.group_num; struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); - DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; + DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; u32 i; lockdep_assert_held(&kbdev->csf.scheduler.lock); /* extract start slot flags for check */ for (i = 0; i < num_groups; i++) { - if (atomic_read(&scheduler->csg_slots[i].state) == - CSG_SLOT_READY2RUN) + if (atomic_read(&scheduler->csg_slots[i].state) == CSG_SLOT_READY2RUN) set_bit(i, slot_mask); } @@ -3587,9 +3730,9 @@ static void wait_csg_slots_start(struct kbase_device *kbdev) bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); - remaining = wait_event_timeout(kbdev->csf.event_wait, - slots_state_changed(kbdev, changed, csg_slot_running), - remaining); + remaining = wait_event_timeout( + kbdev->csf.event_wait, + slots_state_changed(kbdev, changed, csg_slot_running), remaining); if (likely(remaining)) { for_each_set_bit(i, changed, num_groups) { @@ -3639,13 +3782,11 @@ static void wait_csg_slots_start(struct kbase_device *kbdev) * * Return: true if the group resident on slot is idle, otherwise false. */ -static bool group_on_slot_is_idle(struct kbase_device *kbdev, - unsigned long slot) +static bool group_on_slot_is_idle(struct kbase_device *kbdev, unsigned long slot) { - struct kbase_csf_cmd_stream_group_info *ginfo = - &kbdev->csf.global_iface.groups[slot]; + struct kbase_csf_cmd_stream_group_info *ginfo = &kbdev->csf.global_iface.groups[slot]; bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & - CSG_STATUS_STATE_IDLE_MASK; + CSG_STATUS_STATE_IDLE_MASK; lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -3669,9 +3810,8 @@ static bool group_on_slot_is_idle(struct kbase_device *kbdev, * Return: true if the slots_done is set for at least one slot. * Otherwise false. */ -static -bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, - const unsigned long *slots_mask, unsigned long *slots_done) +static bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, + const unsigned long *slots_mask, unsigned long *slots_done) { u32 num_groups = kbdev->csf.global_iface.group_num; bool changed = false; @@ -3681,7 +3821,7 @@ bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, for_each_set_bit(i, slots_mask, num_groups) { struct kbase_csf_cmd_stream_group_info const *const ginfo = - &kbdev->csf.global_iface.groups[i]; + &kbdev->csf.global_iface.groups[i]; u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK); @@ -3714,22 +3854,20 @@ bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, * timed out condition with unacknowledged slots, their bits remain * set in the slot_mask. */ -static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev, - u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies) +static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev, u32 field_mask, + unsigned long *slot_mask, long wait_in_jiffies) { const u32 num_groups = kbdev->csf.global_iface.group_num; long remaining = wait_in_jiffies; lockdep_assert_held(&kbdev->csf.scheduler.lock); - while (!bitmap_empty(slot_mask, num_groups) && - !kbase_reset_gpu_is_active(kbdev)) { + while (!bitmap_empty(slot_mask, num_groups) && !kbase_reset_gpu_is_active(kbdev)) { DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 }; - remaining = wait_event_timeout(kbdev->csf.event_wait, - slots_update_state_changed(kbdev, field_mask, - slot_mask, dones), - remaining); + remaining = wait_event_timeout( + kbdev->csf.event_wait, + slots_update_state_changed(kbdev, field_mask, slot_mask, dones), remaining); if (likely(remaining)) bitmap_andnot(slot_mask, slot_mask, dones, num_groups); @@ -3745,11 +3883,9 @@ static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev, static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev) { - unsigned long *slot_mask = - kbdev->csf.scheduler.csg_slots_prio_update; + unsigned long *slot_mask = kbdev->csf.scheduler.csg_slots_prio_update; long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); - int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK, - slot_mask, wait_time); + int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK, slot_mask, wait_time); lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -3762,9 +3898,7 @@ static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev) dev_warn( kbdev->dev, "[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx", - kbase_backend_get_cycle_cnt(kbdev), - kbdev->csf.fw_timeout_ms, - slot_mask[0]); + kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, slot_mask[0]); if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) error_type = DF_PING_REQUEST_TIMEOUT; schedule_actions_trigger_df(kbdev, group->kctx, error_type); @@ -3786,14 +3920,14 @@ static void report_csg_termination(struct kbase_queue_group *const group) kbase_csf_add_group_fatal_error(group, &err); } -void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, - struct kbase_context *kctx, struct list_head *evicted_groups) +void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, struct kbase_context *kctx, + struct list_head *evicted_groups) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; struct kbase_queue_group *group; u32 num_groups = kbdev->csf.global_iface.group_num; u32 slot; - DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; + DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; lockdep_assert_held(&kctx->csf.lock); mutex_lock(&scheduler->lock); @@ -3824,8 +3958,8 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, } } - dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n", - kctx->tgid, kctx->id, num_groups, slot_mask); + dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n", kctx->tgid, kctx->id, + num_groups, slot_mask); /* Fatal errors may have been the cause of the GPU reset * taking place, in which case we want to make sure that @@ -3858,15 +3992,13 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, * request. */ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, - struct kbase_queue_group *const group, - const int slot) + struct kbase_queue_group *const group, const int slot) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool protm_ack = false; - struct kbase_csf_cmd_stream_group_info *ginfo = - &kbdev->csf.global_iface.groups[slot]; + struct kbase_csf_cmd_stream_group_info *ginfo = &kbdev->csf.global_iface.groups[slot]; u32 max_csi; - int i; + u32 i; if (WARN_ON(scheduler->csg_slots[slot].resident_group != group)) return protm_ack; @@ -3875,8 +4007,7 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock); max_csi = ginfo->stream_num; - for (i = find_first_bit(group->protm_pending_bitmap, max_csi); - i < max_csi; + for (i = find_first_bit(group->protm_pending_bitmap, max_csi); i < max_csi; i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) { struct kbase_queue *queue = group->bound_queues[i]; @@ -3885,17 +4016,14 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, group->protm_pending_bitmap[0]); if (!WARN_ON(!queue) && queue->enabled) { - struct kbase_csf_cmd_stream_info *stream = - &ginfo->streams[i]; - u32 cs_protm_ack = kbase_csf_firmware_cs_output( - stream, CS_ACK) & - CS_ACK_PROTM_PEND_MASK; - u32 cs_protm_req = kbase_csf_firmware_cs_input_read( - stream, CS_REQ) & - CS_REQ_PROTM_PEND_MASK; + struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i]; + u32 cs_protm_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) & + CS_ACK_PROTM_PEND_MASK; + u32 cs_protm_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) & + CS_REQ_PROTM_PEND_MASK; - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group, - queue, cs_protm_ack ^ cs_protm_req); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group, queue, + cs_protm_ack ^ cs_protm_req); if (cs_protm_ack == cs_protm_req) { dev_dbg(kbdev->dev, @@ -3904,12 +4032,10 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, continue; } - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, - cs_protm_ack, - CS_ACK_PROTM_PEND_MASK); + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_protm_ack, + CS_ACK_PROTM_PEND_MASK); protm_ack = true; - dev_dbg(kbdev->dev, - "PROTM-ack for queue-%d, group-%d slot-%d", + dev_dbg(kbdev->dev, "PROTM-ack for queue-%d, group-%d slot-%d", queue->csi_index, group->handle, slot); } } @@ -3933,16 +4059,13 @@ static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev) struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; u32 num_groups = kbdev->csf.global_iface.group_num; u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num; - DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 }; u32 i; kbase_csf_scheduler_spin_lock_assert_held(kbdev); - bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap, - num_groups); /* Reset the tick's pending protm seq number to invalid initially */ scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; - for_each_set_bit(i, active_csgs, num_groups) { + for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group; /* Set to the next pending protm group's scan_seq_number */ @@ -3968,7 +4091,7 @@ static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev) * acknowledged and the GPU is instructed to enter the protected mode. */ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, - struct kbase_queue_group *const input_grp) + struct kbase_queue_group *const input_grp) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf; @@ -3992,8 +4115,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, /* Check if the previous transition to enter & exit the protected * mode has completed or not. */ - protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) || - kbdev->protected_mode; + protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) || kbdev->protected_mode; KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER_CHECK, input_grp, protm_in_use); /* Firmware samples the PROTM_PEND ACK bit for CSs when @@ -4015,14 +4137,12 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, * slot state is running). */ if (!protm_in_use && !WARN_ON(!input_grp)) { - const int slot = - kbase_csf_scheduler_group_get_slot_locked(input_grp); + const int slot = kbase_csf_scheduler_group_get_slot_locked(input_grp); /* check the input_grp is running and requesting protected mode */ if (slot >= 0 && - atomic_read(&scheduler->csg_slots[slot].state) == - CSG_SLOT_RUNNING) { + atomic_read(&scheduler->csg_slots[slot].state) == CSG_SLOT_RUNNING) { if (kctx_as_enabled(input_grp->kctx) && scheduler_slot_protm_ack(kbdev, input_grp, slot)) { int err; @@ -4058,8 +4178,9 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, mutex_unlock(&kbdev->mmu_hw_mutex); if (err) - schedule_actions_trigger_df(kbdev, input_grp->kctx, - DF_PROTECTED_MODE_ENTRY_FAILURE); + schedule_actions_trigger_df( + kbdev, input_grp->kctx, + DF_PROTECTED_MODE_ENTRY_FAILURE); scheduler->protm_enter_time = ktime_get_raw(); @@ -4086,10 +4207,8 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, static void scheduler_check_pmode_progress(struct kbase_device *kbdev) { u64 protm_spent_time_ms; - u64 protm_progress_timeout = - kbase_get_timeout_ms(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT); - s64 diff_ms_signed = - ktime_ms_delta(ktime_get_raw(), kbdev->csf.scheduler.protm_enter_time); + u64 protm_progress_timeout = kbase_get_timeout_ms(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT); + s64 diff_ms_signed = ktime_ms_delta(ktime_get_raw(), kbdev->csf.scheduler.protm_enter_time); if (diff_ms_signed < 0) return; @@ -4100,8 +4219,8 @@ static void scheduler_check_pmode_progress(struct kbase_device *kbdev) if (protm_spent_time_ms < protm_progress_timeout) return; - dev_dbg(kbdev->dev, "Protected mode progress timeout: %llu >= %llu", - protm_spent_time_ms, protm_progress_timeout); + dev_dbg(kbdev->dev, "Protected mode progress timeout: %llu >= %llu", protm_spent_time_ms, + protm_progress_timeout); /* Prompt the FW to exit protected mode */ scheduler_force_protm_exit(kbdev); @@ -4138,22 +4257,18 @@ static void scheduler_apply(struct kbase_device *kbdev) scheduler->remaining_tick_slots = available_csg_slots; /* If there are spare slots, apply heads in the list */ - spare = (available_csg_slots > resident_cnt) ? - (available_csg_slots - resident_cnt) : 0; + spare = (available_csg_slots > resident_cnt) ? (available_csg_slots - resident_cnt) : 0; while (!list_empty(&scheduler->groups_to_schedule)) { - group = list_first_entry(&scheduler->groups_to_schedule, - struct kbase_queue_group, - link_to_schedule); + group = list_first_entry(&scheduler->groups_to_schedule, struct kbase_queue_group, + link_to_schedule); if (kbasep_csf_scheduler_group_is_on_slot_locked(group) && group->prepared_seq_num < available_csg_slots) { /* One of the resident remainders */ - update_csg_slot_priority(group, - get_slot_priority(group)); + update_csg_slot_priority(group, get_slot_priority(group)); } else if (spare != 0) { - s8 slot = (s8)find_first_zero_bit( - kbdev->csf.scheduler.csg_inuse_bitmap, - total_csg_slots); + s8 slot = (s8)find_first_zero_bit(kbdev->csf.scheduler.csg_inuse_bitmap, + total_csg_slots); if (WARN_ON(slot >= (s8)total_csg_slots)) break; @@ -4164,8 +4279,7 @@ static void scheduler_apply(struct kbase_device *kbdev) remove_scheduled_group(kbdev, group); continue; } - program_csg_slot(group, slot, - get_slot_priority(group)); + program_csg_slot(group, slot, get_slot_priority(group)); if (unlikely(!csg_slot_in_use(kbdev, slot))) break; @@ -4183,23 +4297,24 @@ static void scheduler_apply(struct kbase_device *kbdev) program_suspending_csg_slots(kbdev); } -static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, - struct kbase_context *kctx, int priority) +static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, struct kbase_context *kctx, + int priority, struct list_head *privileged_groups, + struct list_head *active_groups) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; struct kbase_queue_group *group; lockdep_assert_held(&scheduler->lock); lockdep_assert_held(&scheduler->interrupt_lock); - if (WARN_ON(priority < 0) || - WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) + if (WARN_ON(priority < 0) || WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) return; if (!kctx_as_enabled(kctx)) return; - list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], - link) { + list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], link) { + bool protm_req; + if (WARN_ON(!list_empty(&group->link_to_schedule))) /* This would be a bug */ list_del_init(&group->link_to_schedule); @@ -4210,33 +4325,30 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, /* Set the scanout sequence number, starting from 0 */ group->scan_seq_num = scheduler->csg_scan_count_for_tick++; + protm_req = !bitmap_empty(group->protm_pending_bitmap, + kbdev->csf.global_iface.groups[0].stream_num); + if (scheduler->tick_protm_pending_seq == - KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) { - if (!bitmap_empty(group->protm_pending_bitmap, - kbdev->csf.global_iface.groups[0].stream_num)) - scheduler->tick_protm_pending_seq = - group->scan_seq_num; + KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) { + if (protm_req) + scheduler->tick_protm_pending_seq = group->scan_seq_num; } - if (queue_group_idle_locked(group)) { + if (protm_req && on_slot_group_idle_locked(group)) + update_idle_protm_group_state_to_runnable(group); + else if (queue_group_idle_locked(group)) { if (can_schedule_idle_group(group)) list_add_tail(&group->link_to_schedule, - &scheduler->idle_groups_to_schedule); + &scheduler->idle_groups_to_schedule); continue; } - if (!scheduler->ngrp_to_schedule) { - /* keep the top csg's origin */ - scheduler->top_ctx = kctx; - scheduler->top_grp = group; + if (protm_req && (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)) { + list_add_tail(&group->link_to_schedule, privileged_groups); + continue; } - list_add_tail(&group->link_to_schedule, - &scheduler->groups_to_schedule); - group->prepared_seq_num = scheduler->ngrp_to_schedule++; - - kctx->csf.sched.ngrp_to_schedule++; - count_active_address_space(kbdev, kctx); + list_add_tail(&group->link_to_schedule, active_groups); } } @@ -4250,13 +4362,13 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned * the highest slot priority) is guaranteed to get the resources that it * needs we only rotate the kbase_context corresponding to it - - * kbase_csf_scheduler's top_ctx. + * kbase_csf_scheduler's top_kctx. * * The priority level chosen for rotation is the one containing the previous * scheduling cycle's kbase_csf_scheduler's top_grp. * * In a 'fresh-slice-cycle' this always corresponds to the highest group - * priority in use by kbase_csf_scheduler's top_ctx. That is, it's the priority + * priority in use by kbase_csf_scheduler's top_kctx. That is, it's the priority * level of the previous scheduling cycle's first runnable kbase_context. * * We choose this priority level because when higher priority work is @@ -4265,18 +4377,18 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, * based on process priority or group priority), and thus * kbase_csf_scheduler's top_grp will point to the first of those high priority * groups, which necessarily must be the highest priority group in - * kbase_csf_scheduler's top_ctx. The fresh-slice-cycle will run later and pick + * kbase_csf_scheduler's top_kctx. The fresh-slice-cycle will run later and pick * up that group appropriately. * * If kbase_csf_scheduler's top_grp was instead evicted (and thus is NULL), * then no explicit rotation occurs on the next fresh-slice-cycle schedule, but - * will set up kbase_csf_scheduler's top_ctx again for the next scheduling + * will set up kbase_csf_scheduler's top_kctx again for the next scheduling * cycle. Implicitly, a rotation had already occurred by removing * the kbase_csf_scheduler's top_grp * * If kbase_csf_scheduler's top_grp became idle and all other groups belonging * to kbase_csf_scheduler's top_grp's priority level in kbase_csf_scheduler's - * top_ctx are also idle, then the effect of this will be to rotate idle + * top_kctx are also idle, then the effect of this will be to rotate idle * groups, which might not actually become resident in the next * scheduling slice. However this is acceptable since a queue group becoming * idle is implicitly a rotation (as above with evicted queue groups), as it @@ -4289,28 +4401,28 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, static void scheduler_rotate_groups(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - struct kbase_context *const top_ctx = scheduler->top_ctx; + struct kbase_context *const top_kctx = scheduler->top_kctx; struct kbase_queue_group *const top_grp = scheduler->top_grp; lockdep_assert_held(&scheduler->lock); - if (top_ctx && top_grp) { - struct list_head *list = - &top_ctx->csf.sched.runnable_groups[top_grp->priority]; + if (top_kctx && top_grp) { + struct list_head *list = &top_kctx->csf.sched.runnable_groups[top_grp->priority]; - WARN_ON(top_grp->kctx != top_ctx); + WARN_ON(top_grp->kctx != top_kctx); if (!WARN_ON(list_empty(list))) { struct kbase_queue_group *new_head_grp; list_move_tail(&top_grp->link, list); - new_head_grp = (!list_empty(list)) ? - list_first_entry(list, struct kbase_queue_group, link) : - NULL; + new_head_grp = + (!list_empty(list)) ? + list_first_entry(list, struct kbase_queue_group, link) : + NULL; KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_ROTATE, top_grp, - top_ctx->csf.sched.num_runnable_grps); + top_kctx->csf.sched.num_runnable_grps); KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u); dev_dbg(kbdev->dev, - "groups rotated for a context, num_runnable_groups: %u\n", - scheduler->top_ctx->csf.sched.num_runnable_grps); + "groups rotated for a context, num_runnable_groups: %u\n", + scheduler->top_kctx->csf.sched.num_runnable_grps); } } } @@ -4321,14 +4433,14 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev) struct list_head *list = &scheduler->runnable_kctxs; lockdep_assert_held(&scheduler->lock); - if (scheduler->top_ctx) { + if (scheduler->top_kctx) { if (!WARN_ON(list_empty(list))) { - struct kbase_context *pos; + struct kbase_context *pos_kctx; bool found = false; /* Locate the ctx on the list */ - list_for_each_entry(pos, list, csf.link) { - if (scheduler->top_ctx == pos) { + list_for_each_entry(pos_kctx, list, csf.link) { + if (scheduler->top_kctx == pos_kctx) { found = true; break; } @@ -4337,11 +4449,13 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev) if (!WARN_ON(!found)) { struct kbase_context *new_head_kctx; - list_move_tail(&pos->csf.link, list); - KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos, 0u); + list_move_tail(&pos_kctx->csf.link, list); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos_kctx, + 0u); new_head_kctx = (!list_empty(list)) ? - list_first_entry(list, struct kbase_context, csf.link) : - NULL; + list_first_entry(list, struct kbase_context, + csf.link) : + NULL; KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u); dev_dbg(kbdev->dev, "contexts rotated\n"); @@ -4373,12 +4487,12 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev) * this function. */ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, - unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap) + unsigned long *csg_bitmap, + unsigned long *failed_csg_bitmap) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; const u32 num_groups = kbdev->csf.global_iface.group_num; - struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags, i; u32 active_chk = 0; @@ -4389,8 +4503,7 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; struct kbase_queue_group *group = csg_slot->resident_group; - struct kbase_csf_cmd_stream_group_info *const ginfo = - &global_iface->groups[i]; + struct kbase_csf_cmd_stream_group_info *const ginfo = &global_iface->groups[i]; u32 csg_req; bool idle_flag; @@ -4404,12 +4517,12 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, if (idle_flag || group->reevaluate_idle_status) { if (idle_flag) { #ifdef CONFIG_MALI_BIFROST_DEBUG - if (!bitmap_empty(group->protm_pending_bitmap, - ginfo->stream_num)) { - dev_warn(kbdev->dev, + if (!bitmap_empty(group->protm_pending_bitmap, ginfo->stream_num)) { + dev_warn( + kbdev->dev, "Idle bit set for group %d of ctx %d_%d on slot %d with pending protm execution", - group->handle, group->kctx->tgid, - group->kctx->id, (int)i); + group->handle, group->kctx->tgid, group->kctx->id, + (int)i); } #endif clear_bit(i, scheduler->csg_slots_idle_mask); @@ -4429,7 +4542,7 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); csg_req ^= CSG_REQ_STATUS_UPDATE_MASK; kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, - CSG_REQ_STATUS_UPDATE_MASK); + CSG_REQ_STATUS_UPDATE_MASK); /* Track the slot update requests in csg_bitmap. * Note, if the scheduler requested extended update, the resulting @@ -4440,22 +4553,20 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, } else { group->run_state = KBASE_CSF_GROUP_RUNNABLE; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, - group->run_state); + group->run_state); } } - /* The groups are aggregated into a single kernel doorbell request */ if (!bitmap_empty(csg_bitmap, num_groups)) { - long wt = - kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); u32 db_slots = (u32)csg_bitmap[0]; kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots); spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - if (wait_csg_slots_handshake_ack(kbdev, - CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) { + if (wait_csg_slots_handshake_ack(kbdev, CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, + wt)) { const int csg_nr = ffs(csg_bitmap[0]) - 1; struct kbase_queue_group *group = scheduler->csg_slots[csg_nr].resident_group; @@ -4463,11 +4574,10 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, dev_warn( kbdev->dev, "[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx", - kbase_backend_get_cycle_cnt(kbdev), - kbdev->csf.fw_timeout_ms, + kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, csg_bitmap[0]); schedule_actions_trigger_df(kbdev, group->kctx, - DF_CSG_STATUS_UPDATE_TIMEOUT); + DF_CSG_STATUS_UPDATE_TIMEOUT); /* Store the bitmap of timed out slots */ bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups); @@ -4523,8 +4633,7 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) lockdep_assert_held(&scheduler->lock); - scheduler_update_idle_slots_status(kbdev, csg_bitmap, - failed_csg_bitmap); + scheduler_update_idle_slots_status(kbdev, csg_bitmap, failed_csg_bitmap); spin_lock_irqsave(&scheduler->interrupt_lock, flags); for_each_set_bit(i, csg_bitmap, num_groups) { @@ -4536,7 +4645,7 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) if (WARN_ON(!group)) continue; if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE && - group->run_state != KBASE_CSF_GROUP_IDLE)) + group->run_state != KBASE_CSF_GROUP_IDLE)) continue; if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) continue; @@ -4545,8 +4654,8 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) group->run_state = KBASE_CSF_GROUP_IDLE; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state); set_bit(i, scheduler->csg_slots_idle_mask); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, - group, scheduler->csg_slots_idle_mask[0]); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group, + scheduler->csg_slots_idle_mask[0]); } else { group->run_state = KBASE_CSF_GROUP_RUNNABLE; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, @@ -4554,32 +4663,27 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) } } - bitmap_or(scheduler->csg_slots_idle_mask, - scheduler->csg_slots_idle_mask, - failed_csg_bitmap, num_groups); + bitmap_or(scheduler->csg_slots_idle_mask, scheduler->csg_slots_idle_mask, failed_csg_bitmap, + num_groups); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_HANDLE_IDLE_SLOTS, NULL, scheduler->csg_slots_idle_mask[0]); spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } -static void scheduler_scan_idle_groups(struct kbase_device *kbdev) +static void scheduler_scan_group_list(struct kbase_device *kbdev, struct list_head *groups) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; struct kbase_queue_group *group, *n; - list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule, - link_to_schedule) { - WARN_ON(!can_schedule_idle_group(group)); - + list_for_each_entry_safe(group, n, groups, link_to_schedule) { if (!scheduler->ngrp_to_schedule) { - /* keep the top csg's origin */ - scheduler->top_ctx = group->kctx; + /* keep the top csg''s origin */ + scheduler->top_kctx = group->kctx; scheduler->top_grp = group; } group->prepared_seq_num = scheduler->ngrp_to_schedule++; - list_move_tail(&group->link_to_schedule, - &scheduler->groups_to_schedule); + list_move_tail(&group->link_to_schedule, &scheduler->groups_to_schedule); group->kctx->csf.sched.ngrp_to_schedule++; count_active_address_space(kbdev, group->kctx); @@ -4597,21 +4701,17 @@ static void scheduler_rotate(struct kbase_device *kbdev) scheduler_rotate_ctxs(kbdev); } -static struct kbase_queue_group *get_tock_top_group( - struct kbase_csf_scheduler *const scheduler) +static struct kbase_queue_group *get_tock_top_group(struct kbase_csf_scheduler *const scheduler) { struct kbase_context *kctx; int i; lockdep_assert_held(&scheduler->lock); for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { - list_for_each_entry(kctx, - &scheduler->runnable_kctxs, csf.link) { + list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link) { struct kbase_queue_group *group; - list_for_each_entry(group, - &kctx->csf.sched.runnable_groups[i], - link) { + list_for_each_entry(group, &kctx->csf.sched.runnable_groups[i], link) { if (queue_group_idle_locked(group)) continue; @@ -4635,8 +4735,7 @@ static struct kbase_queue_group *get_tock_top_group( * * Return: 0 on success, -1 otherwise. */ -static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, - bool system_suspend) +static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool system_suspend) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; @@ -4645,17 +4744,17 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, if (unlikely(ret)) { const int csg_nr = ffs(slot_mask[0]) - 1; - struct kbase_queue_group *group = - scheduler->csg_slots[csg_nr].resident_group; + struct kbase_queue_group *group = scheduler->csg_slots[csg_nr].resident_group; enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; /* The suspend of CSGs failed, * trigger the GPU reset to be in a deterministic state. */ - dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", - kbase_backend_get_cycle_cnt(kbdev), - kbdev->csf.fw_timeout_ms, - kbdev->csf.global_iface.group_num, slot_mask); + dev_warn( + kbdev->dev, + "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", + kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, + kbdev->csf.global_iface.group_num, slot_mask); if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) error_type = DF_PING_REQUEST_TIMEOUT; schedule_actions_trigger_df(kbdev, group->kctx, error_type); @@ -4699,22 +4798,24 @@ static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev) lockdep_assert_held(&scheduler->lock); lockdep_assert_held(&scheduler->interrupt_lock); - for_each_set_bit(i, scheduler->csg_slots_idle_mask, - kbdev->csf.global_iface.group_num) { - struct kbase_queue_group *const group = - scheduler->csg_slots[i].resident_group; + for_each_set_bit(i, scheduler->csg_slots_idle_mask, kbdev->csf.global_iface.group_num) { + struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group; size_t j; for (j = 0; j < max_streams; ++j) { - struct kbase_queue const *const queue = - group->bound_queues[j]; + struct kbase_queue const *const queue = group->bound_queues[j]; u64 const *output_addr; u64 cur_extract_ofs; if (!queue || !queue->user_io_addr) continue; - output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); + output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64)); + /* + * These 64-bit reads and writes will be atomic on a 64-bit kernel + * but may not be atomic on 32-bit kernels. Support for 32-bit + * kernels is limited to build-only. + */ cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)]; if (cur_extract_ofs != queue->extract_ofs) { /* More work has been executed since the idle @@ -4736,8 +4837,7 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev) lockdep_assert_held(&scheduler->lock); - if ((scheduler->state == SCHED_SUSPENDED) || - (scheduler->state == SCHED_SLEEPING)) + if ((scheduler->state == SCHED_SUSPENDED) || (scheduler->state == SCHED_SLEEPING)) return false; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -4751,6 +4851,10 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev) kbase_pm_idle_groups_sched_suspendable(kbdev); } else suspend = kbase_pm_no_runnables_sched_suspendable(kbdev); + + if (suspend && unlikely(atomic_read(&scheduler->gpu_no_longer_idle))) + suspend = false; + spin_unlock(&scheduler->interrupt_lock); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -4758,7 +4862,6 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev) } if (scheduler->total_runnable_grps) { - /* Check both on-slots and off-slots groups idle status */ suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) && !atomic_read(&scheduler->non_idle_offslot_grps) && @@ -4771,8 +4874,10 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev) * informing the scheduler in case userspace rings a doorbell directly. */ if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) || - unlikely(!all_on_slot_groups_remained_idle(kbdev)))) + unlikely(!all_on_slot_groups_remained_idle(kbdev)))) { + dev_dbg(kbdev->dev, "GPU suspension skipped due to active CSGs"); suspend = false; + } spin_unlock(&scheduler->interrupt_lock); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -4798,9 +4903,8 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev) lockdep_assert_held(&scheduler->lock); - dev_dbg(kbdev->dev, - "Scheduler to be put to sleep on GPU becoming idle"); - cancel_tick_timer(kbdev); + dev_dbg(kbdev->dev, "Scheduler to be put to sleep on GPU becoming idle"); + cancel_tick_work(scheduler); scheduler_pm_idle_before_sleep(kbdev); scheduler->state = SCHED_SLEEPING; KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state); @@ -4821,34 +4925,34 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev) */ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) { + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int ret = suspend_active_groups_on_powerdown(kbdev, false); if (ret) { dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)", - atomic_read( - &kbdev->csf.scheduler.non_idle_offslot_grps)); + atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps)); /* Bring forward the next tick */ - kbase_csf_scheduler_tick_advance(kbdev); + kbase_csf_scheduler_invoke_tick(kbdev); return false; } dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle"); scheduler_suspend(kbdev); - cancel_tick_timer(kbdev); + cancel_tick_work(scheduler); return true; } static void gpu_idle_worker(struct work_struct *work) { - struct kbase_device *kbdev = container_of( - work, struct kbase_device, csf.scheduler.gpu_idle_work); + struct kbase_device *kbdev = + container_of(work, struct kbase_device, csf.scheduler.gpu_idle_work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool scheduler_is_idle_suspendable = false; bool all_groups_suspended = false; KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u); -#define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ +#define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ (((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8)) if (kbase_reset_gpu_try_prevent(kbdev)) { @@ -4894,6 +4998,7 @@ static void gpu_idle_worker(struct work_struct *work) static int scheduler_prepare(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct list_head privileged_groups, active_groups; unsigned long flags; int i; @@ -4901,10 +5006,8 @@ static int scheduler_prepare(struct kbase_device *kbdev) /* Empty the groups_to_schedule */ while (!list_empty(&scheduler->groups_to_schedule)) { - struct kbase_queue_group *grp = - list_first_entry(&scheduler->groups_to_schedule, - struct kbase_queue_group, - link_to_schedule); + struct kbase_queue_group *grp = list_first_entry( + &scheduler->groups_to_schedule, struct kbase_queue_group, link_to_schedule); remove_scheduled_group(kbdev, grp); } @@ -4912,26 +5015,34 @@ static int scheduler_prepare(struct kbase_device *kbdev) /* Pre-scan init scheduler fields */ if (WARN_ON(scheduler->ngrp_to_schedule != 0)) scheduler->ngrp_to_schedule = 0; - scheduler->top_ctx = NULL; + scheduler->top_kctx = NULL; scheduler->top_grp = NULL; scheduler->csg_scan_count_for_tick = 0; WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule)); scheduler->num_active_address_spaces = 0; scheduler->num_csg_slots_for_tick = 0; bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS); + INIT_LIST_HEAD(&privileged_groups); + INIT_LIST_HEAD(&active_groups); spin_lock_irqsave(&scheduler->interrupt_lock, flags); - scheduler->tick_protm_pending_seq = - KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; + scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; /* Scan out to run groups */ for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { struct kbase_context *kctx; list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link) - scheduler_ctx_scan_groups(kbdev, kctx, i); + scheduler_ctx_scan_groups(kbdev, kctx, i, &privileged_groups, + &active_groups); } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + /* Adds privileged (RT + p.mode) groups to the scanout list */ + scheduler_scan_group_list(kbdev, &privileged_groups); + + /* Adds remainder of active groups to the scanout list */ + scheduler_scan_group_list(kbdev, &active_groups); + /* Update this tick's non-idle groups */ scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule; @@ -4940,19 +5051,18 @@ static int scheduler_prepare(struct kbase_device *kbdev) * of the tick. It will be subject to up/downs during the scheduler * active phase. */ - atomic_set(&scheduler->non_idle_offslot_grps, - scheduler->non_idle_scanout_grps); + atomic_set(&scheduler->non_idle_offslot_grps, scheduler->non_idle_scanout_grps); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, NULL, scheduler->non_idle_scanout_grps); /* Adds those idle but runnable groups to the scanout list */ - scheduler_scan_idle_groups(kbdev); + scheduler_scan_group_list(kbdev, &scheduler->idle_groups_to_schedule); WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, - scheduler->num_active_address_spaces | - (((u64)scheduler->ngrp_to_schedule) << 32)); + scheduler->num_active_address_spaces | + (((u64)scheduler->ngrp_to_schedule) << 32)); set_max_csg_slots(kbdev); dev_dbg(kbdev->dev, "prepared groups length: %u, num_active_address_spaces: %u\n", scheduler->ngrp_to_schedule, scheduler->num_active_address_spaces); @@ -4977,8 +5087,8 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; bool keep_lru = false; - int on_slots = bitmap_weight(scheduler->csg_inuse_bitmap, - kbdev->csf.global_iface.group_num); + int on_slots = + bitmap_weight(scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num); lockdep_assert_held(&scheduler->lock); @@ -4993,8 +5103,7 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev) spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n", - keep_lru, on_slots); + dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n", keep_lru, on_slots); } return keep_lru; @@ -5048,11 +5157,9 @@ static int prepare_fast_local_tock(struct kbase_device *kbdev) return bitmap_weight(csg_bitmap, num_groups); } -static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask, - unsigned int timeout_ms) +static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - long remaining = kbase_csf_timeout_in_jiffies(timeout_ms); u32 num_groups = kbdev->csf.global_iface.group_num; int err = 0; DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS); @@ -5061,11 +5168,12 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); - while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) { + while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)) { + long remaining = kbase_csf_timeout_in_jiffies( + kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT)); DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); - remaining = wait_event_timeout( kbdev->csf.event_wait, slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining); @@ -5082,18 +5190,22 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo /* The on slot csg is now stopped */ clear_bit(i, slot_mask_local); - KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( - kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i); - group = scheduler->csg_slots[i].resident_group; if (likely(group)) { /* Only do save/cleanup if the * group is not terminated during * the sleep. */ + + /* Only emit suspend, if there was no AS fault */ + if (kctx_as_enabled(group->kctx) && !group->faulted) + KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( + kbdev, kbdev->id, i); + save_csg_slot(group); - if (cleanup_csg_slot(group)) + if (cleanup_csg_slot(group)) { sched_evict_group(group, true, true); + } } } } else { @@ -5104,8 +5216,8 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo slot_mask_local[0]); /* Return the bitmask of the timed out slots to the caller */ bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS); - err = -ETIMEDOUT; + break; } } @@ -5129,8 +5241,9 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) size_t i; struct kbase_queue_group *lru_idle_group = NULL; const u32 total_csg_slots = kbdev->csf.global_iface.group_num; - const bool all_addr_spaces_used = (scheduler->num_active_address_spaces >= - (kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS)); + const bool all_addr_spaces_used = + (scheduler->num_active_address_spaces >= + (u32)(kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS)); u8 as_usage[BASE_MAX_NR_AS] = { 0 }; lockdep_assert_held(&scheduler->lock); @@ -5138,7 +5251,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) return; BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(int) * BITS_PER_BYTE)); - if (fls(scheduler->csg_inuse_bitmap[0]) != total_csg_slots) + if ((u32)fls(scheduler->csg_inuse_bitmap[0]) != total_csg_slots) return; /* Some CSG slots remain unused */ if (all_addr_spaces_used) { @@ -5167,7 +5280,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) * idle. */ if ((group->run_state == KBASE_CSF_GROUP_IDLE) && - (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) && + (group->priority != KBASE_QUEUE_GROUP_PRIORITY_REALTIME) && ((lru_idle_group == NULL) || (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) { if (WARN_ON(group->kctx->as_nr < 0)) @@ -5189,7 +5302,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) lru_idle_group->handle, lru_idle_group->kctx->tgid, lru_idle_group->kctx->id, lru_idle_group->csg_nr); suspend_queue_group(lru_idle_group); - if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) { + if (wait_csg_slots_suspend(kbdev, &slot_mask)) { enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; dev_warn( @@ -5221,15 +5334,13 @@ static void schedule_actions(struct kbase_device *kbdev, bool is_tick) ret = kbase_csf_scheduler_wait_mcu_active(kbdev); if (ret) { - dev_err(kbdev->dev, - "Wait for MCU power on failed on scheduling tick/tock"); + dev_err(kbdev->dev, "Wait for MCU power on failed on scheduling tick/tock"); return; } spin_lock_irqsave(&scheduler->interrupt_lock, flags); skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev); - skip_scheduling_actions = - !skip_idle_slots_update && kbdev->protected_mode; + skip_scheduling_actions = !skip_idle_slots_update && kbdev->protected_mode; spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); /* Skip scheduling actions as GPU reset hasn't been performed yet to @@ -5237,8 +5348,7 @@ static void schedule_actions(struct kbase_device *kbdev, bool is_tick) * received before the termination of group running in pmode. */ if (unlikely(skip_scheduling_actions)) { - dev_info(kbdev->dev, - "Scheduling actions skipped due to anomaly in pmode"); + dev_info(kbdev->dev, "Scheduling actions skipped due to anomaly in pmode"); return; } @@ -5265,8 +5375,7 @@ redo_local_tock: * if System suspend is done when all groups are idle and and no work * is submitted for the groups after the System resume. */ - if (unlikely(!scheduler->ngrp_to_schedule && - scheduler->total_runnable_grps)) { + if (unlikely(!scheduler->ngrp_to_schedule && scheduler->total_runnable_grps)) { dev_dbg(kbdev->dev, "No groups to schedule in the tick"); enqueue_gpu_idle_work(scheduler); return; @@ -5285,8 +5394,7 @@ redo_local_tock: * queue jobs. */ if (protm_grp && scheduler->top_grp == protm_grp) { - dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", - protm_grp->handle); + dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", protm_grp->handle); spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); update_offslot_non_idle_cnt_for_onslot_grp(protm_grp); @@ -5298,10 +5406,10 @@ redo_local_tock: protm_grp->handle); if (!bitmap_empty(scheduler->top_grp->protm_pending_bitmap, - kbdev->csf.global_iface.groups[0].stream_num)) { - dev_dbg(kbdev->dev, "Scheduler prepare protm exec: group-%d of context %d_%d", - scheduler->top_grp->handle, - scheduler->top_grp->kctx->tgid, + kbdev->csf.global_iface.groups[0].stream_num)) { + dev_dbg(kbdev->dev, + "Scheduler prepare protm exec: group-%d of context %d_%d", + scheduler->top_grp->handle, scheduler->top_grp->kctx->tgid, scheduler->top_grp->kctx->id); /* When entering protected mode all CSG slots can be occupied @@ -5327,10 +5435,8 @@ redo_local_tock: wait_csg_slots_finish_prio_update(kbdev); if (new_protm_top_grp) { - scheduler_group_check_protm_enter(kbdev, - scheduler->top_grp); - } else if (!local_tock_slots && - atomic_read(&scheduler->non_idle_offslot_grps)) { + scheduler_group_check_protm_enter(kbdev, scheduler->top_grp); + } else if (!local_tock_slots && atomic_read(&scheduler->non_idle_offslot_grps)) { /* If during the scheduling action, we have off-slot * non-idle CSGs in waiting, if it happens to have * some new idle slots emerging during the committed @@ -5339,8 +5445,7 @@ redo_local_tock: local_tock_slots = prepare_fast_local_tock(kbdev); if (local_tock_slots) { - dev_dbg(kbdev->dev, - "In-cycle %d idle slots available\n", + dev_dbg(kbdev->dev, "In-cycle %d idle slots available\n", local_tock_slots); goto redo_local_tock; } @@ -5392,8 +5497,7 @@ static bool can_skip_scheduling(struct kbase_device *kbdev) return false; } - dev_info(kbdev->dev, - "Skip scheduling due to system suspend"); + dev_info(kbdev->dev, "Skip scheduling due to system suspend"); return true; } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -5404,10 +5508,8 @@ static bool can_skip_scheduling(struct kbase_device *kbdev) return false; } -static void schedule_on_tock(struct work_struct *work) +static void schedule_on_tock(struct kbase_device *kbdev) { - struct kbase_device *kbdev = - container_of(work, struct kbase_device, csf.scheduler.tock_work.work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int err; @@ -5420,8 +5522,7 @@ static void schedule_on_tock(struct work_struct *work) kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&scheduler->lock); - if (can_skip_scheduling(kbdev)) - { + if (can_skip_scheduling(kbdev)) { atomic_set(&scheduler->pending_tock_work, false); goto exit_no_schedule_unlock; } @@ -5445,9 +5546,6 @@ static void schedule_on_tock(struct work_struct *work) mutex_unlock(&scheduler->lock); kbase_reset_gpu_allow(kbdev); - dev_dbg(kbdev->dev, - "Waking up for event after schedule-on-tock completes."); - wake_up_all(&kbdev->csf.event_wait); KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u); return; @@ -5456,10 +5554,8 @@ exit_no_schedule_unlock: kbase_reset_gpu_allow(kbdev); } -static void schedule_on_tick(struct work_struct *work) +static void schedule_on_tick(struct kbase_device *kbdev) { - struct kbase_device *kbdev = - container_of(work, struct kbase_device, csf.scheduler.tick_work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int err = kbase_reset_gpu_try_prevent(kbdev); @@ -5472,7 +5568,6 @@ static void schedule_on_tick(struct work_struct *work) kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&scheduler->lock); - WARN_ON(scheduler->tick_timer_active); if (can_skip_scheduling(kbdev)) goto exit_no_schedule_unlock; @@ -5487,11 +5582,12 @@ static void schedule_on_tick(struct work_struct *work) scheduler->last_schedule = jiffies; /* Kicking next scheduling if needed */ - if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && - (scheduler->total_runnable_grps > 0)) { - start_tick_timer(kbdev); - dev_dbg(kbdev->dev, - "scheduling for next tick, num_runnable_groups:%u\n", + if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) && + (scheduler->total_runnable_grps > 0)) { + hrtimer_start(&scheduler->tick_timer, + HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), + HRTIMER_MODE_REL); + dev_dbg(kbdev->dev, "scheduling for next tick, num_runnable_groups:%u\n", scheduler->total_runnable_grps); } else if (!scheduler->total_runnable_grps) { enqueue_gpu_idle_work(scheduler); @@ -5502,10 +5598,7 @@ static void schedule_on_tick(struct work_struct *work) KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); kbase_reset_gpu_allow(kbdev); - dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes."); - wake_up_all(&kbdev->csf.event_wait); - KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL, - scheduler->total_runnable_grps); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL, scheduler->total_runnable_grps); return; exit_no_schedule_unlock: @@ -5513,8 +5606,7 @@ exit_no_schedule_unlock: kbase_reset_gpu_allow(kbdev); } -static int suspend_active_queue_groups(struct kbase_device *kbdev, - unsigned long *slot_mask) +static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; u32 num_groups = kbdev->csf.global_iface.group_num; @@ -5524,8 +5616,7 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev, lockdep_assert_held(&scheduler->lock); for (slot_num = 0; slot_num < num_groups; slot_num++) { - struct kbase_queue_group *group = - scheduler->csg_slots[slot_num].resident_group; + struct kbase_queue_group *group = scheduler->csg_slots[slot_num].resident_group; if (group) { suspend_queue_group(group); @@ -5533,7 +5624,7 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev, } } - ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms); + ret = wait_csg_slots_suspend(kbdev, slot_mask); return ret; } @@ -5549,8 +5640,10 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) ret = suspend_active_queue_groups(kbdev, slot_mask); if (ret) { - dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n", - kbdev->csf.global_iface.group_num, slot_mask); + dev_warn( + kbdev->dev, + "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n", + kbdev->csf.global_iface.group_num, slot_mask); } /* Need to flush the GPU cache to ensure suspend buffer @@ -5563,16 +5656,12 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) * due to the extra context ref-count, which prevents the * L2 powering down cache clean operation in the non racing * case. - * LSC is being flushed together to cover buslogging usecase, - * where GPU reset is done regularly to avoid the log buffer - * overflow. */ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); - ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, - kbdev->reset_timeout_ms); + ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, kbdev->mmu_or_gpu_cache_op_wait_time_ms); if (ret2) { - dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset", - kbase_backend_get_cycle_cnt(kbdev)); + dev_err(kbdev->dev, "[%llu] Timeout waiting for CACHE_CLN_INV_L2_LSC", + kbase_backend_get_cycle_cnt(kbdev)); if (!ret) ret = ret2; } @@ -5585,24 +5674,27 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) /** * scheduler_handle_reset_in_protected_mode() - Update the state of normal mode * groups when reset is done during - * protected mode execution. + * protected mode execution and GPU + * can't exit the protected mode in + * response to the ping request. * * @kbdev: Pointer to the device. * - * This function is called at the time of GPU reset, before the suspension of - * queue groups, to handle the case when the reset is getting performed whilst - * GPU is in protected mode. - * On entry to protected mode all the groups, except the top group that executes - * in protected mode, are implicitly suspended by the FW. Thus this function - * simply marks the normal mode groups as suspended (and cleans up the - * corresponding CSG slots) to prevent their potential forceful eviction from - * the Scheduler. So if GPU was in protected mode and there was no fault, then - * only the protected mode group would be suspended in the regular way post exit - * from this function. And if GPU was in normal mode, then all on-slot groups - * will get suspended in the regular way. + * This function is called at the time of GPU reset, before the suspension of queue groups, + * to handle the case when the reset is getting performed whilst GPU is in protected mode. + * If GPU is in protected mode, then the function attempts to bring it back to the normal + * mode by sending a ping request. + * - If GPU exited the protected mode, then the function returns success to the caller + * indicating that the on-slot groups can be suspended in a regular way. + * - If GPU didn't exit the protected mode then as a recovery measure the function marks + * the normal mode on-slot groups as suspended to avoid any loss of work for those groups. + * All on-slot groups, except the top group that executes in protected mode, are implicitly + * suspended by the FW just before entering protected mode. So the failure to exit protected + * mode is attributed to the top group and it is thus forcefully evicted by the Scheduler + * later in the request sequence. * * Return: true if the groups remaining on the CSG slots need to be suspended in - * the regular way by sending CSG SUSPEND reqs to FW, otherwise false. + * the regular way by sending CSG SUSPEND requests to FW, otherwise false. */ static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev) { @@ -5619,52 +5711,33 @@ static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev) spin_lock_irqsave(&scheduler->interrupt_lock, flags); protm_grp = scheduler->active_protm_grp; pmode_active = kbdev->protected_mode; - - if (likely(!protm_grp && !pmode_active)) { - /* Case 1: GPU is not in protected mode or it successfully - * exited protected mode. All on-slot groups can be suspended in - * the regular way before reset. - */ - suspend_on_slot_groups = true; - } else if (protm_grp && pmode_active) { - /* Case 2: GPU went successfully into protected mode and hasn't - * exited from it yet and the protected mode group is still - * active. If there was no fault for the protected mode group - * then it can be suspended in the regular way before reset. - * The other normal mode on-slot groups were already implicitly - * suspended on entry to protected mode so they can be marked as - * suspended right away. - */ - suspend_on_slot_groups = !protm_grp->faulted; - } else if (!protm_grp && pmode_active) { - /* Case 3: GPU went successfully into protected mode and hasn't - * exited from it yet but the protected mode group got deleted. - * This would have happened if the FW got stuck during protected - * mode for some reason (like GPU page fault or some internal - * error). In normal cases FW is expected to send the pmode exit - * interrupt before it handles the CSG termination request. - * The other normal mode on-slot groups would already have been - * implicitly suspended on entry to protected mode so they can be - * marked as suspended right away. - */ - suspend_on_slot_groups = false; - } else if (protm_grp && !pmode_active) { - /* Case 4: GPU couldn't successfully enter protected mode, i.e. - * PROTM_ENTER request had timed out. - * All the on-slot groups need to be suspended in the regular - * way before reset. - */ - suspend_on_slot_groups = true; - } - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + /* GPU not in protected mode. Suspend the on-slot groups normally */ if (likely(!pmode_active)) goto unlock; - /* GPU hasn't exited protected mode, so all the on-slot groups barring - * the protected mode group can be marked as suspended right away. + /* Send the ping request to force exit from protected mode */ + if (!kbase_csf_firmware_ping_wait(kbdev, FW_PING_ON_GPU_RESET_IN_PMODE_TIMEOUT_MS)) { + /* FW should have exited protected mode before acknowledging the ping request */ + WARN_ON_ONCE(scheduler->active_protm_grp); + + /* Explicitly suspend all groups. The protected mode group would be terminated + * if it had faulted. + */ + goto unlock; + } + + dev_warn( + kbdev->dev, + "Timeout for ping request sent to force exit from protected mode before GPU reset"); + + /* GPU didn't exit the protected mode, so FW is most probably unresponsive. + * All the on-slot groups barring the protected mode group can be marked + * as suspended right away. The protected mode group would be forcefully + * evicted from the csg slot. */ + suspend_on_slot_groups = false; for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { struct kbase_queue_group *const group = kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; @@ -5690,17 +5763,6 @@ unlock: return suspend_on_slot_groups; } -static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler) -{ - cancel_work_sync(&scheduler->tick_work); -} - -static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) -{ - atomic_set(&scheduler->pending_tock_work, false); - cancel_delayed_work_sync(&scheduler->tock_work); -} - static void scheduler_inner_reset(struct kbase_device *kbdev) { u32 const num_groups = kbdev->csf.global_iface.group_num; @@ -5711,7 +5773,6 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) /* Cancel any potential queued delayed work(s) */ cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); - cancel_tick_timer(kbdev); cancel_tick_work(scheduler); cancel_tock_work(scheduler); cancel_delayed_work_sync(&scheduler->ping_work); @@ -5724,17 +5785,16 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp, 0u); scheduler->active_protm_grp = NULL; - memset(kbdev->csf.scheduler.csg_slots, 0, - num_groups * sizeof(struct kbase_csf_csg_slot)); + memset(kbdev->csf.scheduler.csg_slots, 0, num_groups * sizeof(struct kbase_csf_csg_slot)); bitmap_zero(kbdev->csf.scheduler.csg_inuse_bitmap, num_groups); spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - scheduler->top_ctx = NULL; + scheduler->top_kctx = NULL; scheduler->top_grp = NULL; KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, - scheduler->num_active_address_spaces | - (((u64)scheduler->total_runnable_grps) << 32)); + scheduler->num_active_address_spaces | + (((u64)scheduler->total_runnable_grps) << 32)); mutex_unlock(&scheduler->lock); } @@ -5793,8 +5853,8 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev) static void firmware_aliveness_monitor(struct work_struct *work) { - struct kbase_device *kbdev = container_of(work, struct kbase_device, - csf.scheduler.ping_work.work); + struct kbase_device *kbdev = + container_of(work, struct kbase_device, csf.scheduler.ping_work.work); int err; /* Ensure that reset will not be occurring while this function is being @@ -5833,8 +5893,7 @@ static void firmware_aliveness_monitor(struct work_struct *work) if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) goto exit; - if (kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + if (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { /* Suspend pending - no real need to ping */ goto exit; } @@ -5847,8 +5906,7 @@ static void firmware_aliveness_monitor(struct work_struct *work) /* It is acceptable to enqueue a reset whilst we've prevented * them, it will happen after we've allowed them again */ - if (kbase_prepare_to_reset_gpu( - kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) { queue_delayed_work( @@ -5863,7 +5921,7 @@ exit: } int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, - struct kbase_suspend_copy_buffer *sus_buf) + struct kbase_suspend_copy_buffer *sus_buf) { struct kbase_context *const kctx = group->kctx; struct kbase_device *const kbdev = kctx->kbdev; @@ -5883,8 +5941,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, dev_warn( kbdev->dev, "Wait for scheduler to exit sleep state timedout when copying suspend buffer for group %d of ctx %d_%d on slot %d", - group->handle, group->kctx->tgid, - group->kctx->id, group->csg_nr); + group->handle, group->kctx->tgid, group->kctx->id, group->csg_nr); scheduler_wakeup(kbdev, true); @@ -5893,8 +5950,8 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, dev_warn( kbdev->dev, "Wait for MCU active failed when copying suspend buffer for group %d of ctx %d_%d on slot %d", - group->handle, group->kctx->tgid, - group->kctx->id, group->csg_nr); + group->handle, group->kctx->tgid, group->kctx->id, + group->csg_nr); } /* Check the group state again as scheduler lock would have been @@ -5904,18 +5961,17 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, } #endif if (on_slot) { - DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; + DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask); if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) suspend_queue_group(group); - err = wait_csg_slots_suspend(kbdev, slot_mask, - kbdev->csf.fw_timeout_ms); + err = wait_csg_slots_suspend(kbdev, slot_mask); if (err) { - dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d", - kbase_backend_get_cycle_cnt(kbdev), - group->handle, group->csg_nr); + dev_warn(kbdev->dev, + "[%llu] Timeout waiting for the group %d to suspend on slot %d", + kbase_backend_get_cycle_cnt(kbdev), group->handle, group->csg_nr); goto exit; } } @@ -5934,8 +5990,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, * Flushing LSC is not done here, since only the flush of * CSG suspend buffer contents is needed from the L2 cache. */ - kbase_gpu_start_cache_clean( - kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); + kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); kbase_gpu_wait_cache_clean(kbdev); } else { /* Make sure power down transitions have completed, @@ -5947,22 +6002,20 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, kbase_pm_wait_for_desired_state(kbdev); } - for (i = 0; i < csg_suspend_buf_nr_pages && - target_page_nr < sus_buf->nr_pages; i++) { - struct page *pg = - as_page(group->normal_suspend_buf.phy[i]); - void *sus_page = kmap(pg); + for (i = 0; i < csg_suspend_buf_nr_pages && target_page_nr < sus_buf->nr_pages; + i++) { + struct page *pg = as_page(group->normal_suspend_buf.phy[i]); + void *sus_page = kbase_kmap(pg); if (sus_page) { - kbase_sync_single_for_cpu(kbdev, - kbase_dma_addr(pg), - PAGE_SIZE, DMA_BIDIRECTIONAL); + kbase_sync_single_for_cpu(kbdev, kbase_dma_addr(pg), PAGE_SIZE, + DMA_BIDIRECTIONAL); - err = kbase_mem_copy_to_pinned_user_pages( - sus_buf->pages, sus_page, - &to_copy, sus_buf->nr_pages, - &target_page_nr, offset); - kunmap(pg); + err = kbase_mem_copy_to_pinned_user_pages(sus_buf->pages, sus_page, + &to_copy, + sus_buf->nr_pages, + &target_page_nr, offset); + kbase_kunmap(pg, sus_page); if (err) break; } else { @@ -6000,7 +6053,7 @@ static bool group_sync_updated(struct kbase_queue_group *group) /* Groups can also be blocked on-slot during protected mode. */ WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC && - group->run_state != KBASE_CSF_GROUP_IDLE); + group->run_state != KBASE_CSF_GROUP_IDLE); for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) { struct kbase_queue *const queue = group->bound_queues[stream]; @@ -6011,8 +6064,7 @@ static bool group_sync_updated(struct kbase_queue_group *group) * evicted from the CSG slot, thus this CSG doesn't have * valid information in the shared memory. */ - if (queue && queue->enabled && - CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) + if (queue && queue->enabled && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) if (evaluate_sync_update(queue)) { updated = true; queue->status_wait = 0; @@ -6054,9 +6106,9 @@ static bool group_sync_updated(struct kbase_queue_group *group) * Return: the pointer to queue group that can currently execute in protected * mode or NULL. */ -static struct kbase_queue_group *scheduler_get_protm_enter_async_group( - struct kbase_device *const kbdev, - struct kbase_queue_group *const group) +static struct kbase_queue_group * +scheduler_get_protm_enter_async_group(struct kbase_device *const kbdev, + struct kbase_queue_group *const group) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; struct kbase_queue_group *match_grp, *input_grp; @@ -6070,20 +6122,27 @@ static struct kbase_queue_group *scheduler_get_protm_enter_async_group( input_grp = group ? group : match_grp; if (input_grp && (input_grp == match_grp)) { - struct kbase_csf_cmd_stream_group_info *ginfo = - &kbdev->csf.global_iface.groups[0]; - unsigned long *pending = - input_grp->protm_pending_bitmap; + struct kbase_csf_cmd_stream_group_info *ginfo = &kbdev->csf.global_iface.groups[0]; + unsigned long *pending = input_grp->protm_pending_bitmap; unsigned long flags; spin_lock_irqsave(&scheduler->interrupt_lock, flags); - if (kbase_csf_scheduler_protected_mode_in_use(kbdev) || - bitmap_empty(pending, ginfo->stream_num)) + if (bitmap_empty(pending, ginfo->stream_num)) { + dev_dbg(kbdev->dev, + "Pmode requested for group %d of ctx %d_%d with no pending queues", + input_grp->handle, input_grp->kctx->tgid, input_grp->kctx->id); input_grp = NULL; + } else if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) { + kbase_csf_scheduler_invoke_tock(kbdev); + input_grp = NULL; + } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } else { + if (group && (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)) + kbase_csf_scheduler_invoke_tock(kbdev); + input_grp = NULL; } @@ -6104,11 +6163,8 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) mutex_lock(&scheduler->lock); - if (group->run_state == KBASE_CSF_GROUP_IDLE) { - group->run_state = KBASE_CSF_GROUP_RUNNABLE; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, - group->run_state); - } + if (on_slot_group_idle_locked(group)) + update_idle_protm_group_state_to_runnable(group); /* Check if the group is now eligible for execution in protected mode. */ if (scheduler_get_protm_enter_async_group(kbdev, group)) scheduler_group_check_protm_enter(kbdev, group); @@ -6133,12 +6189,10 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) * * Return: true if the sync condition of at least one queue has been satisfied. */ -static bool check_sync_update_for_on_slot_group( - struct kbase_queue_group *group) +static bool check_sync_update_for_on_slot_group(struct kbase_queue_group *group) { struct kbase_device *const kbdev = group->kctx->kbdev; - struct kbase_csf_scheduler *const scheduler = - &kbdev->csf.scheduler; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool sync_update_done = false; int i; @@ -6152,8 +6206,7 @@ static bool check_sync_update_for_on_slot_group( &kbdev->csf.global_iface.groups[group->csg_nr]; struct kbase_csf_cmd_stream_info *const stream = &ginfo->streams[queue->csi_index]; - u32 status = kbase_csf_firmware_cs_output( - stream, CS_STATUS_WAIT); + u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT); unsigned long flags; KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, @@ -6171,14 +6224,12 @@ static bool check_sync_update_for_on_slot_group( queue->sync_ptr = kbase_csf_firmware_cs_output( stream, CS_STATUS_WAIT_SYNC_POINTER_LO); queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output( - stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; - queue->sync_value = kbase_csf_firmware_cs_output( - stream, CS_STATUS_WAIT_SYNC_VALUE); - queue->blocked_reason = - CS_STATUS_BLOCKED_REASON_REASON_GET( - kbase_csf_firmware_cs_output( - stream, - CS_STATUS_BLOCKED_REASON)); + stream, CS_STATUS_WAIT_SYNC_POINTER_HI) + << 32; + queue->sync_value = + kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_VALUE); + queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET( + kbase_csf_firmware_cs_output(stream, CS_STATUS_BLOCKED_REASON)); if (!evaluate_sync_update(queue)) continue; @@ -6189,15 +6240,12 @@ static bool check_sync_update_for_on_slot_group( * with by the scheduler's tick/tock action, otherwise * leave it untouched. */ - spin_lock_irqsave(&scheduler->interrupt_lock, - flags); + spin_lock_irqsave(&scheduler->interrupt_lock, flags); clear_bit((unsigned int)group->csg_nr, scheduler->csg_slots_idle_mask); - KBASE_KTRACE_ADD_CSF_GRP( - kbdev, CSG_SLOT_IDLE_CLEAR, group, - scheduler->csg_slots_idle_mask[0]); - spin_unlock_irqrestore( - &scheduler->interrupt_lock, flags); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, + scheduler->csg_slots_idle_mask[0]); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); /* Request the scheduler to confirm the condition inferred * here inside the protected mode. */ @@ -6250,8 +6298,7 @@ static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev) num_groups = kbdev->csf.global_iface.group_num; for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { - struct kbase_csf_csg_slot *csg_slot = - &scheduler->csg_slots[i]; + struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; struct kbase_queue_group *group = csg_slot->resident_group; if (group->scan_seq_num < protm_grp->scan_seq_num) { @@ -6267,6 +6314,49 @@ static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev) return exit_protm; } +/** + * wait_for_mcu_sleep_before_sync_update_check() - Wait for MCU sleep request to + * complete before performing the sync update + * check for all the on-slot groups. + * + * @kbdev: Pointer to the GPU device + * + * This function is called before performing the sync update check on the GPU queues + * of all the on-slot groups when a CQS object is signaled and Scheduler was in + * SLEEPING state. + */ +static void wait_for_mcu_sleep_before_sync_update_check(struct kbase_device *kbdev) +{ + long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); + bool can_wait_for_mcu_sleep; + unsigned long flags; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ON_ONCE(kbdev->csf.scheduler.state != SCHED_SLEEPING)) + return; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* If exit from sleep state has already been triggered then there is no need + * to wait, as scheduling is anyways going to be resumed and also MCU would + * have already transitioned to the sleep state. + * Also there is no need to wait if the Scheduler's PM refcount is not zero, + * which implies that MCU needs to be turned on. + */ + can_wait_for_mcu_sleep = !kbdev->pm.backend.exit_gpu_sleep_mode && + !kbdev->csf.scheduler.pm_active_count; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + if (!can_wait_for_mcu_sleep) + return; + + /* Wait until MCU enters sleep state or there is a pending GPU reset */ + if (!wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP || + !kbase_reset_gpu_is_not_pending(kbdev), + timeout)) + dev_warn(kbdev->dev, "Wait for MCU sleep timed out"); +} + static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; @@ -6275,6 +6365,12 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) lockdep_assert_held(&scheduler->lock); + /* Wait for MCU to enter the sleep state to ensure that FW has published + * the status of CSGs/CSIs, otherwise we can miss detecting that a GPU + * queue stuck on SYNC_WAIT has been unblocked. + */ + wait_for_mcu_sleep_before_sync_update_check(kbdev); + for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { struct kbase_queue_group *const group = kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; @@ -6283,6 +6379,8 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) continue; if (check_sync_update_for_on_slot_group(group)) { + /* SYNC_UPDATE event shall invalidate GPU idle event */ + atomic_set(&scheduler->gpu_no_longer_idle, true); scheduler_wakeup(kbdev, true); return; } @@ -6306,8 +6404,8 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) */ static void check_group_sync_update_worker(struct work_struct *work) { - struct kbase_context *const kctx = container_of(work, - struct kbase_context, csf.sched.sync_update_work); + struct kbase_context *const kctx = + container_of(work, struct kbase_context, csf.sched.sync_update_work); struct kbase_device *const kbdev = kctx->kbdev; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool sync_updated = false; @@ -6316,8 +6414,7 @@ static void check_group_sync_update_worker(struct work_struct *work) #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(scheduler->state == SCHED_BUSY)) { - queue_work(kctx->csf.sched.sync_update_wq, - &kctx->csf.sched.sync_update_work); + queue_work(kctx->csf.sched.sync_update_wq, &kctx->csf.sched.sync_update_work); mutex_unlock(&scheduler->lock); return; } @@ -6327,8 +6424,7 @@ static void check_group_sync_update_worker(struct work_struct *work) if (kctx->csf.sched.num_idle_wait_grps != 0) { struct kbase_queue_group *group, *temp; - list_for_each_entry_safe(group, temp, - &kctx->csf.sched.idle_wait_groups, link) { + list_for_each_entry_safe(group, temp, &kctx->csf.sched.idle_wait_groups, link) { if (group_sync_updated(group)) { sync_updated = true; /* Move this group back in to the runnable @@ -6358,15 +6454,13 @@ static void check_group_sync_update_worker(struct work_struct *work) mutex_unlock(&scheduler->lock); } -static -enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) +static enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) { struct kbase_context *const kctx = param; KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u); - queue_work(kctx->csf.sched.sync_update_wq, - &kctx->csf.sched.sync_update_work); + queue_work(kctx->csf.sched.sync_update_wq, &kctx->csf.sched.sync_update_work); return KBASE_CSF_EVENT_CALLBACK_KEEP; } @@ -6375,39 +6469,36 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) { int priority; int err; + struct kbase_device *kbdev = kctx->kbdev; - kbase_ctx_sched_init_ctx(kctx); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + err = gpu_metrics_ctx_init(kctx); + if (err) + return err; +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ - for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT; - ++priority) { + for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++priority) { INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]); } - kctx->csf.sched.num_runnable_grps = 0; INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups); - kctx->csf.sched.num_idle_wait_grps = 0; - kctx->csf.sched.ngrp_to_schedule = 0; kctx->csf.sched.sync_update_wq = - alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq", - WQ_HIGHPRI); + alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq", WQ_HIGHPRI); if (!kctx->csf.sched.sync_update_wq) { - dev_err(kctx->kbdev->dev, - "Failed to initialize scheduler context workqueue"); + dev_err(kbdev->dev, "Failed to initialize scheduler context workqueue"); err = -ENOMEM; goto alloc_wq_failed; } - INIT_WORK(&kctx->csf.sched.sync_update_work, - check_group_sync_update_worker); + INIT_WORK(&kctx->csf.sched.sync_update_work, check_group_sync_update_worker); kbase_csf_tiler_heap_reclaim_ctx_init(kctx); err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx); if (err) { - dev_err(kctx->kbdev->dev, - "Failed to register a sync update callback"); + dev_err(kbdev->dev, "Failed to register a sync update callback"); goto event_wait_add_failed; } @@ -6417,6 +6508,9 @@ event_wait_add_failed: destroy_workqueue(kctx->csf.sched.sync_update_wq); alloc_wq_failed: kbase_ctx_sched_remove_ctx(kctx); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + gpu_metrics_ctx_term(kctx); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ return err; } @@ -6427,6 +6521,78 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) destroy_workqueue(kctx->csf.sched.sync_update_wq); kbase_ctx_sched_remove_ctx(kctx); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + gpu_metrics_ctx_term(kctx); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ +} + +static int kbase_csf_scheduler_kthread(void *data) +{ + struct kbase_device *const kbdev = data; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + while (scheduler->kthread_running) { + struct kbase_queue *queue; + + if (wait_for_completion_interruptible(&scheduler->kthread_signal) != 0) + continue; + reinit_completion(&scheduler->kthread_signal); + + /* Iterate through queues with pending kicks */ + do { + u8 prio; + + spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + queue = NULL; + for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) { + if (!list_empty(&kbdev->csf.pending_gpuq_kicks[prio])) { + queue = list_first_entry( + &kbdev->csf.pending_gpuq_kicks[prio], + struct kbase_queue, pending_kick_link); + list_del_init(&queue->pending_kick_link); + break; + } + } + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + + if (queue != NULL) { + WARN_ONCE( + prio != queue->group_priority, + "Queue %pK has priority %hhu but instead its kick was handled at priority %hhu", + (void *)queue, queue->group_priority, prio); + + kbase_csf_process_queue_kick(queue); + + /* Perform a scheduling tock for high-priority queue groups if + * required. + */ + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0); + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1); + if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) && + atomic_read(&scheduler->pending_tock_work)) + schedule_on_tock(kbdev); + } + } while (queue != NULL); + + /* Check if we need to perform a scheduling tick/tock. A tick + * event shall override a tock event but not vice-versa. + */ + if (atomic_cmpxchg(&scheduler->pending_tick_work, true, false) == true) { + atomic_set(&scheduler->pending_tock_work, false); + schedule_on_tick(kbdev); + } else if (atomic_read(&scheduler->pending_tock_work)) { + schedule_on_tock(kbdev); + } + + dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration."); + wake_up_all(&kbdev->csf.event_wait); + } + + /* Wait for the other thread, that signaled the exit, to call kthread_stop() */ + while (!kthread_should_stop()) + ; + + return 0; } int kbase_csf_scheduler_init(struct kbase_device *kbdev) @@ -6437,14 +6603,47 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) bitmap_zero(scheduler->csg_inuse_bitmap, num_groups); bitmap_zero(scheduler->csg_slots_idle_mask, num_groups); - scheduler->csg_slots = kcalloc(num_groups, - sizeof(*scheduler->csg_slots), GFP_KERNEL); + scheduler->csg_slots = kcalloc(num_groups, sizeof(*scheduler->csg_slots), GFP_KERNEL); if (!scheduler->csg_slots) { - dev_err(kbdev->dev, - "Failed to allocate memory for csg slot status array\n"); + dev_err(kbdev->dev, "Failed to allocate memory for csg slot status array\n"); return -ENOMEM; } + init_completion(&scheduler->kthread_signal); + scheduler->kthread_running = true; + scheduler->gpuq_kthread = + kthread_run(&kbase_csf_scheduler_kthread, kbdev, "mali-gpuq-kthread"); + if (!scheduler->gpuq_kthread) { + kfree(scheduler->csg_slots); + scheduler->csg_slots = NULL; + + dev_err(kbdev->dev, "Failed to spawn the GPU queue submission worker thread"); + return -ENOMEM; + } + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) + scheduler->gpu_metrics_tb = + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_GPU_METRICS_BUF_NAME); + if (!scheduler->gpu_metrics_tb) { + scheduler->kthread_running = false; + complete(&scheduler->kthread_signal); + kthread_stop(scheduler->gpuq_kthread); + scheduler->gpuq_kthread = NULL; + + kfree(scheduler->csg_slots); + scheduler->csg_slots = NULL; + + dev_err(kbdev->dev, "Failed to get the handler of gpu_metrics from trace buffer"); + return -ENOENT; + } +#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ + + spin_lock_init(&scheduler->gpu_metrics_lock); + hrtimer_init(&scheduler->gpu_metrics_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + scheduler->gpu_metrics_timer.function = gpu_metrics_timer_callback; +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + return kbase_csf_mcu_shared_regs_data_init(kbdev); } @@ -6452,26 +6651,14 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - scheduler->timer_enabled = true; + atomic_set(&scheduler->timer_enabled, true); - scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI); - if (!scheduler->wq) { - dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n"); - return -ENOMEM; - } - scheduler->idle_wq = alloc_ordered_workqueue( - "csf_scheduler_gpu_idle_wq", WQ_HIGHPRI); + scheduler->idle_wq = alloc_ordered_workqueue("csf_scheduler_gpu_idle_wq", WQ_HIGHPRI); if (!scheduler->idle_wq) { - dev_err(kbdev->dev, - "Failed to allocate GPU idle scheduler workqueue\n"); - destroy_workqueue(kbdev->csf.scheduler.wq); + dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n"); return -ENOMEM; } - INIT_WORK(&scheduler->tick_work, schedule_on_tick); - INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock); - atomic_set(&scheduler->pending_tock_work, false); - INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); mutex_init(&scheduler->lock); @@ -6483,28 +6670,15 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) INIT_LIST_HEAD(&scheduler->idle_groups_to_schedule); BUILD_BUG_ON(MAX_SUPPORTED_CSGS > - (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE)); + (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE)); bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); scheduler->state = SCHED_SUSPENDED; KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); - scheduler->pm_active_count = 0; - scheduler->ngrp_to_schedule = 0; - scheduler->total_runnable_grps = 0; - scheduler->top_ctx = NULL; - scheduler->top_grp = NULL; - scheduler->last_schedule = 0; - scheduler->active_protm_grp = NULL; scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; scheduler_doorbell_init(kbdev); - INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker); - scheduler->fast_gpu_idle_handling = false; - atomic_set(&scheduler->gpu_no_longer_idle, false); - atomic_set(&scheduler->non_idle_offslot_grps, 0); - hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); scheduler->tick_timer.function = tick_timer_callback; - scheduler->tick_timer_active = false; kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); @@ -6513,6 +6687,14 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) void kbase_csf_scheduler_term(struct kbase_device *kbdev) { + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + if (scheduler->gpuq_kthread) { + scheduler->kthread_running = false; + complete(&scheduler->kthread_signal); + kthread_stop(scheduler->gpuq_kthread); + } + if (kbdev->csf.scheduler.csg_slots) { WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps)); /* The unload of Driver can take place only when all contexts have @@ -6537,9 +6719,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) mutex_unlock(&kbdev->csf.scheduler.lock); cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); - cancel_tick_timer(kbdev); - cancel_tick_work(&kbdev->csf.scheduler); - cancel_tock_work(&kbdev->csf.scheduler); kfree(kbdev->csf.scheduler.csg_slots); kbdev->csf.scheduler.csg_slots = NULL; } @@ -6553,8 +6732,6 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) { if (kbdev->csf.scheduler.idle_wq) destroy_workqueue(kbdev->csf.scheduler.idle_wq); - if (kbdev->csf.scheduler.wq) - destroy_workqueue(kbdev->csf.scheduler.wq); kbase_csf_tiler_heap_reclaim_mgr_term(kbdev); mutex_destroy(&kbdev->csf.scheduler.lock); @@ -6576,15 +6753,14 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); - if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev))) + if (unlikely(!kbase_csf_scheduler_timer_is_enabled(kbdev))) return; - WARN_ON((scheduler->state != SCHED_INACTIVE) && - (scheduler->state != SCHED_SUSPENDED) && + WARN_ON((scheduler->state != SCHED_INACTIVE) && (scheduler->state != SCHED_SUSPENDED) && (scheduler->state != SCHED_SLEEPING)); if (scheduler->total_runnable_grps > 0) { - enqueue_tick_work(kbdev); + kbase_csf_scheduler_invoke_tick(kbdev); dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n"); } else if (scheduler->state != SCHED_SUSPENDED) { enqueue_gpu_idle_work(scheduler); @@ -6598,43 +6774,23 @@ void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev) mutex_unlock(&kbdev->csf.scheduler.lock); } -bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - bool enabled; - - mutex_lock(&scheduler->lock); - enabled = scheduler_timer_is_enabled_nolock(kbdev); - mutex_unlock(&scheduler->lock); - - return enabled; -} - -void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, - bool enable) +void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, bool enable) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool currently_enabled; + /* This lock is taken to prevent this code being executed concurrently + * by userspace. + */ mutex_lock(&scheduler->lock); - currently_enabled = scheduler_timer_is_enabled_nolock(kbdev); + currently_enabled = kbase_csf_scheduler_timer_is_enabled(kbdev); if (currently_enabled && !enable) { - scheduler->timer_enabled = false; - cancel_tick_timer(kbdev); - mutex_unlock(&scheduler->lock); - /* The non-sync version to cancel the normal work item is not - * available, so need to drop the lock before cancellation. - */ + atomic_set(&scheduler->timer_enabled, false); cancel_tick_work(scheduler); - cancel_tock_work(scheduler); - return; - } - - if (!currently_enabled && enable) { - scheduler->timer_enabled = true; - - scheduler_enable_tick_timer_nolock(kbdev); + } else if (!currently_enabled && enable) { + atomic_set(&scheduler->timer_enabled, true); + kbase_csf_scheduler_invoke_tick(kbdev); } mutex_unlock(&scheduler->lock); @@ -6644,17 +6800,17 @@ void kbase_csf_scheduler_kick(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + if (unlikely(kbase_csf_scheduler_timer_is_enabled(kbdev))) + return; + + /* This lock is taken to prevent this code being executed concurrently + * by userspace. + */ mutex_lock(&scheduler->lock); - if (unlikely(scheduler_timer_is_enabled_nolock(kbdev))) - goto out; + kbase_csf_scheduler_invoke_tick(kbdev); + dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); - if (scheduler->total_runnable_grps > 0) { - enqueue_tick_work(kbdev); - dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); - } - -out: mutex_unlock(&scheduler->lock); } @@ -6691,7 +6847,7 @@ int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev) } else { dev_info(kbdev->dev, "Scheduler PM suspend"); scheduler_suspend(kbdev); - cancel_tick_timer(kbdev); + cancel_tick_work(scheduler); } } @@ -6730,8 +6886,7 @@ void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev) struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; lockdep_assert_held(&scheduler->lock); - if ((scheduler->total_runnable_grps > 0) && - (scheduler->state == SCHED_SUSPENDED)) { + if ((scheduler->total_runnable_grps > 0) && (scheduler->state == SCHED_SUSPENDED)) { dev_info(kbdev->dev, "Scheduler PM resume"); scheduler_wakeup(kbdev, true); } @@ -6753,8 +6908,7 @@ void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev) * the CSGs before powering down the GPU. */ mutex_lock(&kbdev->csf.scheduler.lock); - scheduler_pm_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); + scheduler_pm_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); mutex_unlock(&kbdev->csf.scheduler.lock); } KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active); @@ -6770,7 +6924,7 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle); -int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) +static int scheduler_wait_mcu_active(struct kbase_device *kbdev, bool killable_wait) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; unsigned long flags; @@ -6783,9 +6937,17 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_unlock(kbdev); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (killable_wait) + err = kbase_pm_killable_wait_for_poweroff_work_complete(kbdev); + else + err = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (err) + return err; - err = kbase_pm_wait_for_desired_state(kbdev); + if (killable_wait) + err = kbase_pm_killable_wait_for_desired_state(kbdev); + else + err = kbase_pm_wait_for_desired_state(kbdev); if (!err) { spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON); @@ -6794,6 +6956,17 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) return err; } + +int kbase_csf_scheduler_killable_wait_mcu_active(struct kbase_device *kbdev) +{ + return scheduler_wait_mcu_active(kbdev, true); +} + +int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) +{ + return scheduler_wait_mcu_active(kbdev, false); +} + KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active); #ifdef KBASE_PM_RUNTIME @@ -6818,7 +6991,7 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev) if (ret) { dev_dbg(kbdev->dev, "Aborting runtime suspend (grps: %d)", - atomic_read(&scheduler->non_idle_offslot_grps)); + atomic_read(&scheduler->non_idle_offslot_grps)); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.exit_gpu_sleep_mode = true; @@ -6829,6 +7002,9 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev) } scheduler->state = SCHED_SUSPENDED; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + hrtimer_cancel(&scheduler->gpu_metrics_timer); +#endif KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.gpu_sleep_mode_active = false; @@ -6854,12 +7030,10 @@ void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev) if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group) continue; - csg_idle = - kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & - CSG_STATUS_STATE_IDLE_MASK; + csg_idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & + CSG_STATUS_STATE_IDLE_MASK; if (!csg_idle) { - dev_dbg(kbdev->dev, - "Re-activate Scheduler after MCU sleep"); + dev_dbg(kbdev->dev, "Re-activate Scheduler after MCU sleep"); kbdev->pm.backend.exit_gpu_sleep_mode = true; kbase_csf_scheduler_invoke_tick(kbdev); break; @@ -6872,8 +7046,7 @@ void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev) struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; mutex_lock(&scheduler->lock); - if (kbase_pm_gpu_sleep_allowed(kbdev) && - (scheduler->state == SCHED_INACTIVE)) + if (kbase_pm_gpu_sleep_allowed(kbdev) && (scheduler->state == SCHED_INACTIVE)) scheduler_sleep_on_idle(kbdev); mutex_unlock(&scheduler->lock); } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h index d22d7c8b9dce..abd62342d38f 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -108,7 +108,7 @@ int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group); * Note: Caller must hold the interrupt_lock. */ bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, - struct kbase_queue_group *group); + struct kbase_queue_group *group); /** * kbase_csf_scheduler_get_group_on_slot()- Gets the queue group that has been @@ -121,8 +121,8 @@ bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, * * Note: Caller must hold the interrupt_lock. */ -struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( - struct kbase_device *kbdev, int slot); +struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(struct kbase_device *kbdev, + int slot); /** * kbase_csf_scheduler_group_deschedule() - Deschedule a GPU command queue @@ -148,8 +148,8 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group); * on firmware slots from the given Kbase context. The affected groups are * added to the supplied list_head argument. */ -void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, - struct kbase_context *kctx, struct list_head *evicted_groups); +void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, struct kbase_context *kctx, + struct list_head *evicted_groups); /** * kbase_csf_scheduler_context_init() - Initialize the context-specific part @@ -264,7 +264,7 @@ void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev); * Return: 0 on success, or negative on failure. */ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, - struct kbase_suspend_copy_buffer *sus_buf); + struct kbase_suspend_copy_buffer *sus_buf); /** * kbase_csf_scheduler_lock - Acquire the global Scheduler lock. @@ -299,8 +299,7 @@ static inline void kbase_csf_scheduler_unlock(struct kbase_device *kbdev) * This function will take the global scheduler lock, in order to serialize * against the Scheduler actions, for access to CS IO pages. */ -static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev, - unsigned long *flags) +static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev, unsigned long *flags) { spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, *flags); } @@ -312,8 +311,7 @@ static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev, * @flags: Previously stored interrupt state when Scheduler interrupt * spinlock was acquired. */ -static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev, - unsigned long flags) +static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev, unsigned long flags) { spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); } @@ -324,8 +322,7 @@ static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev, * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ -static inline void -kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev) +static inline void kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); } @@ -338,7 +335,10 @@ kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev) * * Return: true if the scheduler is configured to wake up periodically */ -bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev); +static inline bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev) +{ + return atomic_read(&kbdev->csf.scheduler.timer_enabled); +} /** * kbase_csf_scheduler_timer_set_enabled() - Enable/disable periodic @@ -347,8 +347,7 @@ bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev); * @kbdev: Pointer to the device * @enable: Whether to enable periodic scheduler tasks */ -void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, - bool enable); +void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, bool enable); /** * kbase_csf_scheduler_kick - Perform pending scheduling tasks once. @@ -367,8 +366,7 @@ void kbase_csf_scheduler_kick(struct kbase_device *kbdev); * * Return: true if the scheduler is running with protected mode tasks */ -static inline bool kbase_csf_scheduler_protected_mode_in_use( - struct kbase_device *kbdev) +static inline bool kbase_csf_scheduler_protected_mode_in_use(struct kbase_device *kbdev) { return (kbdev->csf.scheduler.active_protm_grp != NULL); } @@ -411,6 +409,22 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev); */ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev); +/** + * kbase_csf_scheduler_killable_wait_mcu_active - Wait for the MCU to actually become + * active in killable state. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function is same as kbase_csf_scheduler_wait_mcu_active(), expect that + * it would allow the SIGKILL signal to interrupt the wait. + * This function is supposed to be called from the code that is executed in ioctl or + * Userspace context, wherever it is safe to do so. + * + * Return: 0 if the MCU was successfully activated, or -ETIMEDOUT code on timeout error or + * -ERESTARTSYS if the wait was interrupted. + */ +int kbase_csf_scheduler_killable_wait_mcu_active(struct kbase_device *kbdev); + /** * kbase_csf_scheduler_pm_resume_no_lock - Reactivate the scheduler on system resume * @@ -473,70 +487,25 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) kbdev->csf.global_iface.group_num); } -/** - * kbase_csf_scheduler_tick_advance_nolock() - Advance the scheduling tick - * - * @kbdev: Pointer to the device - * - * This function advances the scheduling tick by enqueing the tick work item for - * immediate execution, but only if the tick hrtimer is active. If the timer - * is inactive then the tick work item is already in flight. - * The caller must hold the interrupt lock. - */ -static inline void -kbase_csf_scheduler_tick_advance_nolock(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - - lockdep_assert_held(&scheduler->interrupt_lock); - - if (scheduler->tick_timer_active) { - KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_ADVANCE, NULL, 0u); - scheduler->tick_timer_active = false; - queue_work(scheduler->wq, &scheduler->tick_work); - } else { - KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_NOADVANCE, NULL, 0u); - } -} - -/** - * kbase_csf_scheduler_tick_advance() - Advance the scheduling tick - * - * @kbdev: Pointer to the device - * - * This function advances the scheduling tick by enqueing the tick work item for - * immediate execution, but only if the tick hrtimer is active. If the timer - * is inactive then the tick work item is already in flight. - */ -static inline void kbase_csf_scheduler_tick_advance(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - unsigned long flags; - - spin_lock_irqsave(&scheduler->interrupt_lock, flags); - kbase_csf_scheduler_tick_advance_nolock(kbdev); - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -} - /** * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick * * @kbdev: Pointer to the device * - * This function will queue the scheduling tick work item for immediate - * execution if tick timer is not active. This can be called from interrupt - * context to resume the scheduling after GPU was put to sleep. + * This function wakes up kbase_csf_scheduler_kthread() to perform a scheduling + * tick regardless of whether the tick timer is enabled. This can be called + * from interrupt context to resume the scheduling after GPU was put to sleep. + * + * Caller is expected to check kbase_csf_scheduler.timer_enabled as required + * to see whether it is appropriate before calling this function. */ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - unsigned long flags; KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u); - spin_lock_irqsave(&scheduler->interrupt_lock, flags); - if (!scheduler->tick_timer_active) - queue_work(scheduler->wq, &scheduler->tick_work); - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + if (atomic_cmpxchg(&scheduler->pending_tick_work, false, true) == false) + complete(&scheduler->kthread_signal); } /** @@ -544,8 +513,11 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev) * * @kbdev: Pointer to the device * - * This function will queue the scheduling tock work item for immediate - * execution. + * This function wakes up kbase_csf_scheduler_kthread() to perform a scheduling + * tock. + * + * Caller is expected to check kbase_csf_scheduler.timer_enabled as required + * to see whether it is appropriate before calling this function. */ static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev) { @@ -553,7 +525,7 @@ static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev) KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u); if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false) - mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0); + complete(&scheduler->kthread_signal); } /** diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c new file mode 100644 index 000000000000..b95e77ce4eaa --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c @@ -0,0 +1,838 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_csg.h" +#include "mali_kbase_csf_sync.h" +#include "mali_kbase_csf_util.h" +#include +#include + +#if IS_ENABLED(CONFIG_SYNC_FILE) +#include "mali_kbase_sync.h" +#endif + +#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)" + +#define CSF_SYNC_DUMP_SIZE 256 + +/* Number of nearby commands around the "extract_ptr" of GPU queues. + * + * [extract_ptr - MAX_NR_NEARBY_INSTR, extract_ptr + MAX_NR_NEARBY_INSTR]. + */ +#define MAX_NR_NEARBY_INSTR 32 + +/** + * kbasep_csf_sync_get_cqs_live_u32() - Obtain live (u32) value for a CQS object. + * + * @kctx: The context of the queue. + * @obj_addr: Pointer to the CQS live 32-bit value. + * @live_val: Pointer to the u32 that will be set to the CQS object's current, live + * value. + * + * Return: 0 if successful or a negative error code on failure. + */ +static int kbasep_csf_sync_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr, u32 *live_val) +{ + struct kbase_vmap_struct *mapping; + u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); + + if (!cpu_ptr) + return -1; + + *live_val = *cpu_ptr; + kbase_phy_alloc_mapping_put(kctx, mapping); + return 0; +} + +/** + * kbasep_csf_sync_get_cqs_live_u64() - Obtain live (u64) value for a CQS object. + * + * @kctx: The context of the queue. + * @obj_addr: Pointer to the CQS live value (32 or 64-bit). + * @live_val: Pointer to the u64 that will be set to the CQS object's current, live + * value. + * + * Return: 0 if successful or a negative error code on failure. + */ +static int kbasep_csf_sync_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr, u64 *live_val) +{ + struct kbase_vmap_struct *mapping; + u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); + + if (!cpu_ptr) + return -1; + + *live_val = *cpu_ptr; + kbase_phy_alloc_mapping_put(kctx, mapping); + return 0; +} + +/** + * kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait + * or Fence Signal command, contained in a + * KCPU queue. + * + * @buffer: The buffer to write to. + * @length: The length of text in the buffer. + * @cmd: The KCPU Command to be printed. + * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT. + */ +static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(char *buffer, int *length, + struct kbase_kcpu_command *cmd, + const char *cmd_name) +{ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence = NULL; +#else + struct dma_fence *fence = NULL; +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ + struct kbase_kcpu_command_fence_info *fence_info; + struct kbase_sync_fence_info info; + const char *timeline_name = NULL; + bool is_signaled = false; + + fence_info = &cmd->info.fence; + if (kbase_kcpu_command_fence_has_force_signaled(fence_info)) + return; + + fence = kbase_fence_get(fence_info); + if (WARN_ON(!fence)) + return; + + kbase_sync_fence_info_get(fence, &info); + timeline_name = fence->ops->get_timeline_name(fence); + is_signaled = info.status > 0; + + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled); + + /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */ + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", + timeline_name, fence->context, (u64)fence->seqno); + + kbase_fence_put(fence); +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command, + * contained in a KCPU queue. + * + * @kctx: The kbase context. + * @buffer: The buffer to write to. + * @length: The length of text in the buffer. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_wait(struct kbase_context *kctx, char *buffer, + int *length, struct kbase_kcpu_command *cmd) +{ + size_t i; + + for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { + struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i]; + + u32 live_val; + int ret = kbasep_csf_sync_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); + bool live_val_valid = (ret >= 0); + + *length += + snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + + if (live_val_valid) + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", (u64)live_val); + else + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); + + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:gt arg_value:0x%.8x", cqs_obj->val); + } +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS + * Set command, contained in a KCPU queue. + * + * @kctx: The kbase context. + * @buffer: The buffer to write to. + * @length: The length of text in the buffer. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_set(struct kbase_context *kctx, char *buffer, + int *length, struct kbase_kcpu_command *cmd) +{ + size_t i; + + for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { + struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i]; + + u32 live_val; + int ret = kbasep_csf_sync_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); + bool live_val_valid = (ret >= 0); + + *length += + snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + + if (live_val_valid) + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", (u64)live_val); + else + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); + + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:add arg_value:0x%.8x", 1); + } +} + +/** + * kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation. + * + * @op: The numerical value of operation. + * + * Return: const static pointer to the command name, or '??' if unknown. + */ +static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op) +{ + const char *string; + + switch (op) { + case BASEP_CQS_WAIT_OPERATION_LE: + string = "le"; + break; + case BASEP_CQS_WAIT_OPERATION_GT: + string = "gt"; + break; + default: + string = "??"; + break; + } + return string; +} + +/** + * kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation. + * + * @op: The numerical value of operation. + * + * Return: const static pointer to the command name, or '??' if unknown. + */ +static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op) +{ + const char *string; + + switch (op) { + case BASEP_CQS_SET_OPERATION_ADD: + string = "add"; + break; + case BASEP_CQS_SET_OPERATION_SET: + string = "set"; + break; + default: + string = "???"; + break; + } + return string; +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS + * Wait Operation command, contained + * in a KCPU queue. + * + * @kctx: The kbase context. + * @buffer: The buffer to write to. + * @length: The length of text in the buffer. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct kbase_context *kctx, char *buffer, + int *length, struct kbase_kcpu_command *cmd) +{ + size_t i; + + for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { + struct base_cqs_wait_operation_info *wait_op = + &cmd->info.cqs_wait_operation.objs[i]; + const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation); + + u64 live_val; + int ret = kbasep_csf_sync_get_cqs_live_u64(kctx, wait_op->addr, &live_val); + + bool live_val_valid = (ret >= 0); + + *length += + snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); + + if (live_val_valid) + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", live_val); + else + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); + + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); + } +} + +/** + * kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS + * Set Operation command, contained + * in a KCPU queue. + * + * @kctx: The kbase context. + * @buffer: The buffer to write to. + * @length: The length of text in the buffer. + * @cmd: The KCPU Command to be printed. + */ +static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct kbase_context *kctx, char *buffer, + int *length, struct kbase_kcpu_command *cmd) +{ + size_t i; + + for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { + struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i]; + const char *op_name = kbasep_csf_sync_get_set_op_name( + (basep_cqs_set_operation_op)set_op->operation); + + u64 live_val; + int ret = kbasep_csf_sync_get_cqs_live_u64(kctx, set_op->addr, &live_val); + + bool live_val_valid = (ret >= 0); + + *length += + snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); + + if (live_val_valid) + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", live_val); + else + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); + + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:%s arg_value:0x%.16llx", op_name, set_op->val); + } +} + +/** + * kbasep_csf_sync_kcpu_print_queue() - Print debug data for a KCPU queue + * + * @kctx: The kbase context. + * @kbpr: Pointer to printer instance. + * @queue: Pointer to the KCPU queue. + */ +static void kbasep_csf_sync_kcpu_print_queue(struct kbase_context *kctx, + struct kbase_kcpu_command_queue *queue, + struct kbasep_printer *kbpr) +{ + char started_or_pending; + struct kbase_kcpu_command *cmd; + size_t i; + + if (WARN_ON(!queue)) + return; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + mutex_lock(&queue->lock); + + for (i = 0; i != queue->num_pending_cmds; ++i) { + char buffer[CSF_SYNC_DUMP_SIZE]; + int length = 0; + + started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P'; + length += snprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ", + kctx->id, queue->id, started_or_pending); + + cmd = &queue->commands[(u8)(queue->start_offset + i)]; + switch (cmd->type) { +#if IS_ENABLED(CONFIG_SYNC_FILE) + case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: + kbasep_csf_sync_print_kcpu_fence_wait_or_signal(buffer, &length, cmd, + "FENCE_SIGNAL"); + break; + case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: + kbasep_csf_sync_print_kcpu_fence_wait_or_signal(buffer, &length, cmd, + "FENCE_WAIT"); + break; +#endif + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: + kbasep_csf_sync_print_kcpu_cqs_wait(kctx, buffer, &length, cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET: + kbasep_csf_sync_print_kcpu_cqs_set(kctx, buffer, &length, cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + kbasep_csf_sync_print_kcpu_cqs_wait_op(kctx, buffer, &length, cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + kbasep_csf_sync_print_kcpu_cqs_set_op(kctx, buffer, &length, cmd); + break; + default: + length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, + ", U, Unknown blocking command"); + break; + } + + length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n"); + kbasep_print(kbpr, buffer); + } + + mutex_unlock(&queue->lock); +} + +int kbasep_csf_sync_kcpu_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr) +{ + unsigned long queue_idx; + + mutex_lock(&kctx->csf.kcpu_queues.lock); + + kbasep_print(kbpr, "CSF KCPU queues sync info (version: v" __stringify( + MALI_CSF_SYNC_DUMP_VERSION) "):\n"); + + kbasep_print(kbpr, "KCPU queues for ctx %d:\n", kctx->id); + + queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); + + while (queue_idx < KBASEP_MAX_KCPU_QUEUES) { + kbasep_csf_sync_kcpu_print_queue(kctx, kctx->csf.kcpu_queues.array[queue_idx], + kbpr); + + queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES, + queue_idx + 1); + } + + mutex_unlock(&kctx->csf.kcpu_queues.lock); + + return 0; +} + +/* GPU queue related values */ +#define GPU_CSF_MOVE_OPCODE ((u64)0x1) +#define GPU_CSF_MOVE32_OPCODE ((u64)0x2) +#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25) +#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26) +#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27) +#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33) +#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34) +#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35) +#define GPU_CSF_CALL_OPCODE ((u64)0x20) + +#define MAX_NR_GPU_CALLS (5) +#define INSTR_OPCODE_MASK ((u64)0xFF << 56) +#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56) +#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL) +#define MOVE_DEST_MASK ((u64)0xFF << 48) +#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48) +#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL) +#define SYNC_SRC0_MASK ((u64)0xFF << 40) +#define SYNC_SRC1_MASK ((u64)0xFF << 32) +#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40) +#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32) +#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28) +#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28) + +/* Enumeration for types of GPU queue sync events for + * the purpose of dumping them through sync. + */ +enum sync_gpu_sync_type { + CSF_GPU_SYNC_WAIT, + CSF_GPU_SYNC_SET, + CSF_GPU_SYNC_ADD, + NUM_CSF_GPU_SYNC_TYPES +}; + +/** + * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations + * from a MOVE instruction. + * + * @move_cmd: Raw MOVE instruction. + * @sync_addr_reg: Register identifier from SYNC_* instruction. + * @compare_val_reg: Register identifier from SYNC_* instruction. + * @sync_val: Pointer to store CQS object address for sync operation. + * @compare_val: Pointer to store compare value for sync operation. + * + * Return: True if value is obtained by checking for correct register identifier, + * or false otherwise. + */ +static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg, + u64 compare_val_reg, u64 *sync_val, + u64 *compare_val) +{ + u64 imm_mask; + + /* Verify MOVE instruction and get immediate mask */ + if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE) + imm_mask = MOVE32_IMM_MASK; + else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE) + imm_mask = MOVE_IMM_MASK; + else + /* Error return */ + return false; + + /* Verify value from MOVE instruction and assign to variable */ + if (sync_addr_reg == MOVE_DEST_GET(move_cmd)) + *sync_val = move_cmd & imm_mask; + else if (compare_val_reg == MOVE_DEST_GET(move_cmd)) + *compare_val = move_cmd & imm_mask; + else + /* Error return */ + return false; + + return true; +} + +/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided + * offset. + * + * @queue: Pointer to the queue. + * @ringbuff_offset: Ringbuffer offset. + * + * Return: the u64 in the ringbuffer at the desired offset. + */ +static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset) +{ + u64 page_off = ringbuff_offset >> PAGE_SHIFT; + u64 offset_within_page = ringbuff_offset & ~PAGE_MASK; + struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]); + u64 *ringbuffer = vmap(&page, 1, VM_MAP, pgprot_noncached(PAGE_KERNEL)); + u64 value; + + if (!ringbuffer) { + struct kbase_context *kctx = queue->kctx; + + dev_err(kctx->kbdev->dev, "%s failed to map the buffer page for read a command!", + __func__); + /* Return an alternative 0 for dumping operation*/ + value = 0; + } else { + value = ringbuffer[offset_within_page / sizeof(u64)]; + vunmap(ringbuffer); + } + + return value; +} + +/** + * kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command. + * + * @kbpr: Pointer to printer instance. + * @kctx: Pointer to kbase context. + * @queue: Pointer to the GPU command queue. + * @ringbuff_offset: Offset to index the ring buffer with, for the given sync command. + * (Useful for finding preceding MOVE commands) + * @instr_addr: GPU command address. + * @sync_cmd: Entire u64 of the sync command, which has both sync address and + * comparison-value encoded in it. + * @type: Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT). + * @is_64bit: Bool to indicate if operation is 64 bit (true) or 32 bit (false). + * @follows_wait: Bool to indicate if the operation follows at least one wait + * operation. Used to determine whether it's pending or started. + */ +static void kbasep_csf_print_gpu_sync_op(struct kbasep_printer *kbpr, struct kbase_context *kctx, + struct kbase_queue *queue, u32 ringbuff_offset, + u64 instr_addr, u64 sync_cmd, enum sync_gpu_sync_type type, + bool is_64bit, bool follows_wait) +{ + u64 sync_addr = 0, compare_val = 0, live_val = 0, ringbuffer_boundary_check; + u64 move_cmd; + u8 sync_addr_reg, compare_val_reg, wait_condition = 0; + int err; + + static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" }; + static const char *const gpu_sync_type_op[] = { + "wait", /* This should never be printed, only included to simplify indexing */ + "set", "add" + }; + + if (type >= NUM_CSF_GPU_SYNC_TYPES) { + dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!"); + return; + } + + /* 1. Get Register identifiers from SYNC_* instruction */ + sync_addr_reg = SYNC_SRC0_GET(sync_cmd); + compare_val_reg = SYNC_SRC1_GET(sync_cmd); + + if (ringbuff_offset < sizeof(u64)) { + dev_warn(kctx->kbdev->dev, + "Unexpected wraparound detected between %s & MOVE instruction", + gpu_sync_type_name[type]); + return; + } + /* 2. Get values from first MOVE command */ + ringbuff_offset -= sizeof(u64); + move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); + + /* We expect there to be at least 2 preceding MOVE instructions for CQS, or 3 preceding + * MOVE instructions for Timeline CQS, and Base will always arrange for these + * MOVE + SYNC instructions to be contiguously located, and is therefore never expected + * to be wrapped around the ringbuffer boundary. The following check takes place after + * the ringbuffer has been decremented, and already points to the first MOVE command, + * so that it can be determined if it's a 32-bit MOVE (so 2 vs 1 preceding MOVE commands + * will be checked). + * This is to maintain compatibility with older userspace; a check is done to ensure that + * the MOVE opcode found was a 32-bit MOVE, and if so, it has determined that a newer + * userspace is being used and will continue to read the next 32-bit MOVE to recover the + * compare/set value in the wait/set operation. If not, the single 48-bit value found + * will be used. + */ + ringbuffer_boundary_check = + (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE && is_64bit) ? 2 : 1; + if (unlikely(ringbuff_offset < (ringbuffer_boundary_check * sizeof(u64)))) { + dev_warn(kctx->kbdev->dev, + "Unexpected wraparound detected between %s & MOVE instruction", + gpu_sync_type_name[type]); + return; + } + /* For 64-bit SYNC commands, the first MOVE command read in will actually use 1 register + * above the compare value register in the sync command, as this will store the higher + * 32-bits of 64-bit compare value. The compare value register read above will be read + * afterwards. + */ + if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, + compare_val_reg + (is_64bit ? 1 : 0), &sync_addr, + &compare_val)) + return; + + /* 64-bit WAITs or SETs are split into 2 32-bit MOVEs. sync_val would contain the higher + * 32 bits, so the lower 32-bits are retrieved afterwards, to recover the full u64 value. + */ + if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE && is_64bit) { + u64 compare_val_lower = 0; + + ringbuff_offset -= sizeof(u64); + move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); + + if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, + &sync_addr, &compare_val_lower)) + return; + /* Mask off upper 32 bits of compare_val_lower, and combine with the higher 32 bits + * to restore the original u64 compare value. + */ + compare_val = (compare_val << 32) | (compare_val_lower & ((u64)U32_MAX)); + } + + /* 3. Get values from next MOVE command, which should be the CQS object address */ + ringbuff_offset -= sizeof(u64); + move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); + if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, + &sync_addr, &compare_val)) + return; + + /* 4. Get CQS object value */ + if (is_64bit) + err = kbasep_csf_sync_get_cqs_live_u64(kctx, sync_addr, &live_val); + else + err = kbasep_csf_sync_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val)); + + if (err) + return; + + /* 5. Print info */ + kbasep_print(kbpr, "queue:GPU-%u-%u-%u exec:%c at:0x%.16llx cmd:%s ", kctx->id, + queue->group->handle, queue->csi_index, + queue->enabled && !follows_wait ? 'S' : 'P', instr_addr, + gpu_sync_type_name[type]); + + if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID) + kbasep_print(kbpr, "slot:-"); + else + kbasep_print(kbpr, "slot:%d", (int)queue->group->csg_nr); + + kbasep_print(kbpr, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val); + + if (type == CSF_GPU_SYNC_WAIT) { + wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd); + kbasep_print(kbpr, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition)); + } else + kbasep_print(kbpr, "op:%s ", gpu_sync_type_op[type]); + + kbasep_print(kbpr, "arg_value:0x%.16llx\n", compare_val); +} + +/** + * kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information. + * + * @kbpr: Pointer to printer instance. + * @queue: Address of a GPU command queue to examine. + * + * This function will iterate through each command in the ring buffer of the given GPU queue from + * CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and + * print relevant information to the sync file. + * This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e. + * when there are no more commands to view) or a number of consumed GPU CALL commands have + * been observed. + */ +static void kbasep_csf_dump_active_queue_sync_info(struct kbasep_printer *kbpr, + struct kbase_queue *queue) +{ + struct kbase_context *kctx; + u64 *addr; + u64 cs_extract, cs_insert, instr, cursor, end_cursor; + u32 nr_nearby_instr_size; + bool follows_wait = false; + int nr_calls = 0; + + if (!queue) + return; + + kctx = queue->kctx; + + addr = queue->user_io_addr; + cs_insert = addr[CS_INSERT_LO / sizeof(*addr)]; + + addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr); + cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)]; + + nr_nearby_instr_size = + min((MAX_NR_NEARBY_INSTR * (u32)sizeof(u64)), ((queue->size / 2) & ~(0x7u))); + cursor = (cs_extract + queue->size - nr_nearby_instr_size) & ((u64)queue->size - 1); + end_cursor = min(cs_insert, ((cs_extract + nr_nearby_instr_size) & ((u64)queue->size - 1))); + + if (!is_power_of_2(queue->size)) { + dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2", + queue->csi_index, queue->size); + return; + } + + kbasep_print( + kbpr, + "queue:GPU-%u-%u-%u size:%u cs_insert:%8llx cs_extract:%8llx dump_begin:%8llx dump_end:%8llx\n", + kctx->id, queue->group->handle, queue->csi_index, queue->size, cs_insert, + cs_extract, cursor, end_cursor); + + while ((cs_insert != cs_extract) && (cursor != end_cursor) && + (nr_calls < MAX_NR_GPU_CALLS)) { + bool instr_is_64_bit = false; + u32 cursor_ringbuff_offset = (u32)cursor; + + /* Find instruction that cursor is currently on */ + instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset); + + switch (INSTR_OPCODE_GET(instr)) { + case GPU_CSF_SYNC_ADD64_OPCODE: + case GPU_CSF_SYNC_SET64_OPCODE: + case GPU_CSF_SYNC_WAIT64_OPCODE: + instr_is_64_bit = true; + break; + default: + break; + } + switch (INSTR_OPCODE_GET(instr)) { + case GPU_CSF_SYNC_ADD_OPCODE: + case GPU_CSF_SYNC_ADD64_OPCODE: + kbasep_csf_print_gpu_sync_op(kbpr, kctx, queue, cursor_ringbuff_offset, + cursor, instr, CSF_GPU_SYNC_ADD, + instr_is_64_bit, follows_wait); + break; + case GPU_CSF_SYNC_SET_OPCODE: + case GPU_CSF_SYNC_SET64_OPCODE: + kbasep_csf_print_gpu_sync_op(kbpr, kctx, queue, cursor_ringbuff_offset, + cursor, instr, CSF_GPU_SYNC_SET, + instr_is_64_bit, follows_wait); + break; + case GPU_CSF_SYNC_WAIT_OPCODE: + case GPU_CSF_SYNC_WAIT64_OPCODE: + kbasep_csf_print_gpu_sync_op(kbpr, kctx, queue, cursor_ringbuff_offset, + cursor, instr, CSF_GPU_SYNC_WAIT, + instr_is_64_bit, follows_wait); + follows_wait = true; /* Future commands will follow at least one wait */ + break; + case GPU_CSF_CALL_OPCODE: + nr_calls++; + kbasep_print(kbpr, + "queue:GPU-%u-%u-%u exec:%c at:0x%.16llx cmd:0x%.16llx\n", + kctx->id, queue->group->handle, queue->csi_index, + queue->enabled && !follows_wait ? 'S' : 'P', cursor, instr); + break; + default: + /* NOP instructions without metadata are not printed. */ + if (instr) { + kbasep_print( + kbpr, + "queue:GPU-%u-%u-%u exec:%c at:0x%.16llx cmd:0x%.16llx\n", + kctx->id, queue->group->handle, queue->csi_index, + queue->enabled && !follows_wait ? 'S' : 'P', cursor, instr); + } + break; + } + + cursor = (cursor + sizeof(u64)) & ((u64)queue->size - 1); + } +} + +/** + * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of + * the provided queue group. + * + * @kctx: The kbase context + * @kbpr: Pointer to printer instance. + * @group: Address of a GPU command group to iterate through. + * + * This function will iterate through each queue in the provided GPU queue group and + * print its SYNC related commands. + */ +static void kbasep_csf_dump_active_group_sync_state(struct kbase_context *kctx, + struct kbasep_printer *kbpr, + struct kbase_queue_group *const group) +{ + unsigned int i; + + kbasep_print(kbpr, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle, + group->csg_nr, kctx->tgid, kctx->id); + + for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) + kbasep_csf_dump_active_queue_sync_info(kbpr, group->bound_queues[i]); +} + +int kbasep_csf_sync_gpu_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr) +{ + u32 gr; + struct kbase_device *kbdev; + + if (WARN_ON(!kctx)) + return -EINVAL; + + kbdev = kctx->kbdev; + kbase_csf_scheduler_lock(kbdev); + kbase_csf_csg_update_status(kbdev); + + kbasep_print(kbpr, "CSF GPU queues sync info (version: v" __stringify( + MALI_CSF_SYNC_DUMP_VERSION) "):\n"); + + for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[gr].resident_group; + if (!group || group->kctx != kctx) + continue; + kbasep_csf_dump_active_group_sync_state(kctx, kbpr, group); + } + + kbase_csf_scheduler_unlock(kbdev); + + return 0; +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.h new file mode 100644 index 000000000000..e705e64b5962 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_SYNC_H_ +#define _KBASE_CSF_SYNC_H_ + +/* Forward declaration */ +struct kbase_context; +struct kbasep_printer; + +#define MALI_CSF_SYNC_DUMP_VERSION 0 + +/** + * kbasep_csf_sync_kcpu_dump_print() - Print CSF KCPU queue sync info + * + * @kctx: The kbase context. + * @kbpr: Pointer to printer instance. + * + * Return: Negative error code or 0 on success. + */ +int kbasep_csf_sync_kcpu_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr); + +/** + * kbasep_csf_sync_gpu_dump_print() - Print CSF GPU queue sync info + * + * @kctx: The kbase context + * @kbpr: Pointer to printer instance. + * + * Return: Negative error code or 0 on success. + */ +int kbasep_csf_sync_gpu_dump_print(struct kbase_context *kctx, struct kbasep_printer *kbpr); + +#endif /* _KBASE_CSF_SYNC_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c index a5e0ab5eaf17..e002ea40f61a 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,715 +18,15 @@ * http://www.gnu.org/licenses/gpl-2.0.html. * */ - #include "mali_kbase_csf_sync_debugfs.h" -#include "mali_kbase_csf_csg_debugfs.h" -#include -#include - -#if IS_ENABLED(CONFIG_SYNC_FILE) -#include "mali_kbase_sync.h" -#endif #if IS_ENABLED(CONFIG_DEBUG_FS) -#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)" - -/* GPU queue related values */ -#define GPU_CSF_MOVE_OPCODE ((u64)0x1) -#define GPU_CSF_MOVE32_OPCODE ((u64)0x2) -#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25) -#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26) -#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27) -#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33) -#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34) -#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35) -#define GPU_CSF_CALL_OPCODE ((u64)0x20) - -#define MAX_NR_GPU_CALLS (5) -#define INSTR_OPCODE_MASK ((u64)0xFF << 56) -#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56) -#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL) -#define MOVE_DEST_MASK ((u64)0xFF << 48) -#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48) -#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL) -#define SYNC_SRC0_MASK ((u64)0xFF << 40) -#define SYNC_SRC1_MASK ((u64)0xFF << 32) -#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40) -#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32) -#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28) -#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28) - -/* Enumeration for types of GPU queue sync events for - * the purpose of dumping them through debugfs. - */ -enum debugfs_gpu_sync_type { - DEBUGFS_GPU_SYNC_WAIT, - DEBUGFS_GPU_SYNC_SET, - DEBUGFS_GPU_SYNC_ADD, - NUM_DEBUGFS_GPU_SYNC_TYPES -}; - -/** - * kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object. - * - * @kctx: The context of the queue. - * @obj_addr: Pointer to the CQS live 32-bit value. - * @live_val: Pointer to the u32 that will be set to the CQS object's current, live - * value. - * - * Return: 0 if successful or a negative error code on failure. - */ -static int kbasep_csf_debugfs_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr, - u32 *live_val) -{ - struct kbase_vmap_struct *mapping; - u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); - - if (!cpu_ptr) - return -1; - - *live_val = *cpu_ptr; - kbase_phy_alloc_mapping_put(kctx, mapping); - return 0; -} - -/** - * kbasep_csf_debugfs_get_cqs_live_u64() - Obtain live (u64) value for a CQS object. - * - * @kctx: The context of the queue. - * @obj_addr: Pointer to the CQS live value (32 or 64-bit). - * @live_val: Pointer to the u64 that will be set to the CQS object's current, live - * value. - * - * Return: 0 if successful or a negative error code on failure. - */ -static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr, - u64 *live_val) -{ - struct kbase_vmap_struct *mapping; - u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); - - if (!cpu_ptr) - return -1; - - *live_val = *cpu_ptr; - kbase_phy_alloc_mapping_put(kctx, mapping); - return 0; -} - -/** - * kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait - * or Fence Signal command, contained in a - * KCPU queue. - * - * @file: The seq_file for printing to. - * @cmd: The KCPU Command to be printed. - * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT. - */ -static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file, - struct kbase_kcpu_command *cmd, - const char *cmd_name) -{ -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - struct fence *fence = NULL; -#else - struct dma_fence *fence = NULL; -#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ - - struct kbase_sync_fence_info info; - const char *timeline_name = NULL; - bool is_signaled = false; - - fence = cmd->info.fence.fence; - if (WARN_ON(!fence)) - return; - - kbase_sync_fence_info_get(cmd->info.fence.fence, &info); - timeline_name = fence->ops->get_timeline_name(fence); - is_signaled = info.status > 0; - - seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence, - is_signaled); - - /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */ - seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", - timeline_name, fence->context, (u64)fence->seqno); -} - -/** - * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command, - * contained in a KCPU queue. - * - * @file: The seq_file for printing to. - * @cmd: The KCPU Command to be printed. - */ -static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file, - struct kbase_kcpu_command *cmd) -{ - struct kbase_context *kctx = file->private; - size_t i; - - for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { - struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i]; - - u32 live_val; - int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); - bool live_val_valid = (ret >= 0); - - seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); - - if (live_val_valid) - seq_printf(file, "0x%.16llx", (u64)live_val); - else - seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); - - seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val); - } -} - -/** - * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS - * Set command, contained in a KCPU queue. - * - * @file: The seq_file for printing to. - * @cmd: The KCPU Command to be printed. - */ -static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file, - struct kbase_kcpu_command *cmd) -{ - struct kbase_context *kctx = file->private; - size_t i; - - for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { - struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i]; - - u32 live_val; - int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); - bool live_val_valid = (ret >= 0); - - seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); - - if (live_val_valid) - seq_printf(file, "0x%.16llx", (u64)live_val); - else - seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); - - seq_printf(file, " | op:add arg_value:0x%.8x", 1); - } -} - -/** - * kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation. - * - * @op: The numerical value of operation. - * - * Return: const static pointer to the command name, or '??' if unknown. - */ -static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op) -{ - const char *string; - - switch (op) { - case BASEP_CQS_WAIT_OPERATION_LE: - string = "le"; - break; - case BASEP_CQS_WAIT_OPERATION_GT: - string = "gt"; - break; - default: - string = "??"; - break; - } - return string; -} - -/** - * kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation. - * - * @op: The numerical value of operation. - * - * Return: const static pointer to the command name, or '??' if unknown. - */ -static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op) -{ - const char *string; - - switch (op) { - case BASEP_CQS_SET_OPERATION_ADD: - string = "add"; - break; - case BASEP_CQS_SET_OPERATION_SET: - string = "set"; - break; - default: - string = "???"; - break; - } - return string; -} - -/** - * kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS - * Wait Operation command, contained - * in a KCPU queue. - * - * @file: The seq_file for printing to. - * @cmd: The KCPU Command to be printed. - */ -static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file, - struct kbase_kcpu_command *cmd) -{ - size_t i; - struct kbase_context *kctx = file->private; - - for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { - struct base_cqs_wait_operation_info *wait_op = - &cmd->info.cqs_wait_operation.objs[i]; - const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation); - - u64 live_val; - int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, wait_op->addr, &live_val); - - bool live_val_valid = (ret >= 0); - - seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); - - if (live_val_valid) - seq_printf(file, "0x%.16llx", live_val); - else - seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); - - seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); - } -} - -/** - * kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS - * Set Operation command, contained - * in a KCPU queue. - * - * @file: The seq_file for printing to. - * @cmd: The KCPU Command to be printed. - */ -static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file, - struct kbase_kcpu_command *cmd) -{ - size_t i; - struct kbase_context *kctx = file->private; - - for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { - struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i]; - const char *op_name = kbasep_csf_sync_get_set_op_name( - (basep_cqs_set_operation_op)set_op->operation); - - u64 live_val; - int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, set_op->addr, &live_val); - - bool live_val_valid = (ret >= 0); - - seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); - - if (live_val_valid) - seq_printf(file, "0x%.16llx", live_val); - else - seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); - - seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val); - } -} - -/** - * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue - * - * @file: The seq_file to print to. - * @queue: Pointer to the KCPU queue. - */ -static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file, - struct kbase_kcpu_command_queue *queue) -{ - char started_or_pending; - struct kbase_kcpu_command *cmd; - struct kbase_context *kctx = file->private; - size_t i; - - if (WARN_ON(!queue)) - return; - - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); - mutex_lock(&queue->lock); - - for (i = 0; i != queue->num_pending_cmds; ++i) { - started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P'; - seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id, - started_or_pending); - - cmd = &queue->commands[queue->start_offset + i]; - switch (cmd->type) { -#if IS_ENABLED(CONFIG_SYNC_FILE) - case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: - kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL"); - break; - case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: - kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT"); - break; -#endif - case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: - kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd); - break; - case BASE_KCPU_COMMAND_TYPE_CQS_SET: - kbasep_csf_sync_print_kcpu_cqs_set(file, cmd); - break; - case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: - kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd); - break; - case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: - kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd); - break; - default: - seq_puts(file, ", U, Unknown blocking command"); - break; - } - - seq_puts(file, "\n"); - } - - mutex_unlock(&queue->lock); -} - -/** - * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info - * - * @file: The seq_file for printing to. - * - * Return: Negative error code or 0 on success. - */ -static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file) -{ - struct kbase_context *kctx = file->private; - unsigned long queue_idx; - - mutex_lock(&kctx->csf.kcpu_queues.lock); - seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id); - - queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); - - while (queue_idx < KBASEP_MAX_KCPU_QUEUES) { - kbasep_csf_sync_kcpu_debugfs_print_queue(file, - kctx->csf.kcpu_queues.array[queue_idx]); - - queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES, - queue_idx + 1); - } - - mutex_unlock(&kctx->csf.kcpu_queues.lock); - return 0; -} - -/** - * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations - * from a MOVE instruction. - * - * @move_cmd: Raw MOVE instruction. - * @sync_addr_reg: Register identifier from SYNC_* instruction. - * @compare_val_reg: Register identifier from SYNC_* instruction. - * @sync_val: Pointer to store CQS object address for sync operation. - * @compare_val: Pointer to store compare value for sync operation. - * - * Return: True if value is obtained by checking for correct register identifier, - * or false otherwise. - */ -static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg, - u64 compare_val_reg, u64 *sync_val, - u64 *compare_val) -{ - u64 imm_mask; - - /* Verify MOVE instruction and get immediate mask */ - if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE) - imm_mask = MOVE32_IMM_MASK; - else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE) - imm_mask = MOVE_IMM_MASK; - else - /* Error return */ - return false; - - /* Verify value from MOVE instruction and assign to variable */ - if (sync_addr_reg == MOVE_DEST_GET(move_cmd)) - *sync_val = move_cmd & imm_mask; - else if (compare_val_reg == MOVE_DEST_GET(move_cmd)) - *compare_val = move_cmd & imm_mask; - else - /* Error return */ - return false; - - return true; -} - -/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided - * offset. - * - * @queue: Pointer to the queue. - * @ringbuff_offset: Ringbuffer offset. - * - * Return: the u64 in the ringbuffer at the desired offset. - */ -static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset) -{ - u64 page_off = ringbuff_offset >> PAGE_SHIFT; - u64 offset_within_page = ringbuff_offset & ~PAGE_MASK; - struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]); - u64 *ringbuffer = kmap_atomic(page); - u64 value = ringbuffer[offset_within_page / sizeof(u64)]; - - kunmap_atomic(ringbuffer); - return value; -} - -/** - * kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command. - * - * @file: Pointer to debugfs seq_file file struct for writing output. - * @kctx: Pointer to kbase context. - * @queue: Pointer to the GPU command queue. - * @ringbuff_offset: Offset to index the ring buffer with, for the given sync command. - * (Useful for finding preceding MOVE commands) - * @sync_cmd: Entire u64 of the sync command, which has both sync address and - * comparison-value encoded in it. - * @type: Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT). - * @is_64bit: Bool to indicate if operation is 64 bit (true) or 32 bit (false). - * @follows_wait: Bool to indicate if the operation follows at least one wait - * operation. Used to determine whether it's pending or started. - */ -static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_context *kctx, - struct kbase_queue *queue, u32 ringbuff_offset, - u64 sync_cmd, enum debugfs_gpu_sync_type type, - bool is_64bit, bool follows_wait) -{ - u64 sync_addr = 0, compare_val = 0, live_val = 0; - u64 move_cmd; - u8 sync_addr_reg, compare_val_reg, wait_condition = 0; - int err; - - static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" }; - static const char *const gpu_sync_type_op[] = { - "wait", /* This should never be printed, only included to simplify indexing */ - "set", "add" - }; - - if (type >= NUM_DEBUGFS_GPU_SYNC_TYPES) { - dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!"); - return; - } - - /* We expect there to be at least 2 preceding MOVE instructions, and - * Base will always arrange for the 2 MOVE + SYNC instructions to be - * contiguously located, and is therefore never expected to be wrapped - * around the ringbuffer boundary. - */ - if (unlikely(ringbuff_offset < (2 * sizeof(u64)))) { - dev_warn(kctx->kbdev->dev, - "Unexpected wraparound detected between %s & MOVE instruction", - gpu_sync_type_name[type]); - return; - } - - /* 1. Get Register identifiers from SYNC_* instruction */ - sync_addr_reg = SYNC_SRC0_GET(sync_cmd); - compare_val_reg = SYNC_SRC1_GET(sync_cmd); - - /* 2. Get values from first MOVE command */ - ringbuff_offset -= sizeof(u64); - move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); - if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, - &sync_addr, &compare_val)) - return; - - /* 3. Get values from next MOVE command */ - ringbuff_offset -= sizeof(u64); - move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); - if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, - &sync_addr, &compare_val)) - return; - - /* 4. Get CQS object value */ - if (is_64bit) - err = kbasep_csf_debugfs_get_cqs_live_u64(kctx, sync_addr, &live_val); - else - err = kbasep_csf_debugfs_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val)); - - if (err) - return; - - /* 5. Print info */ - seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle, - queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P', - gpu_sync_type_name[type]); - - if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID) - seq_puts(file, "slot:-"); - else - seq_printf(file, "slot:%d", (int)queue->group->csg_nr); - - seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val); - - if (type == DEBUGFS_GPU_SYNC_WAIT) { - wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd); - seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition)); - } else - seq_printf(file, "op:%s ", gpu_sync_type_op[type]); - - seq_printf(file, "arg_value:0x%.16llx\n", compare_val); -} - -/** - * kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information. - * - * @file: seq_file for printing to. - * @queue: Address of a GPU command queue to examine. - * - * This function will iterate through each command in the ring buffer of the given GPU queue from - * CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and - * print relevant information to the debugfs file. - * This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e. - * when there are no more commands to view) or a number of consumed GPU CALL commands have - * been observed. - */ -static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue) -{ - struct kbase_context *kctx; - u32 *addr; - u64 cs_extract, cs_insert, instr, cursor; - bool follows_wait = false; - int nr_calls = 0; - - if (!queue) - return; - - kctx = queue->kctx; - - addr = (u32 *)queue->user_io_addr; - cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32); - - addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); - cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32); - - cursor = cs_extract; - - if (!is_power_of_2(queue->size)) { - dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2", - queue->csi_index, queue->size); - return; - } - - while ((cursor < cs_insert) && (nr_calls < MAX_NR_GPU_CALLS)) { - bool instr_is_64_bit = false; - /* Calculate offset into ringbuffer from the absolute cursor, - * by finding the remainder of the cursor divided by the - * ringbuffer size. The ringbuffer size is guaranteed to be - * a power of 2, so the remainder can be calculated without an - * explicit modulo. queue->size - 1 is the ringbuffer mask. - */ - u32 cursor_ringbuff_offset = (u32)(cursor & (queue->size - 1)); - - /* Find instruction that cursor is currently on */ - instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset); - - switch (INSTR_OPCODE_GET(instr)) { - case GPU_CSF_SYNC_ADD64_OPCODE: - case GPU_CSF_SYNC_SET64_OPCODE: - case GPU_CSF_SYNC_WAIT64_OPCODE: - instr_is_64_bit = true; - default: - break; - } - - switch (INSTR_OPCODE_GET(instr)) { - case GPU_CSF_SYNC_ADD_OPCODE: - case GPU_CSF_SYNC_ADD64_OPCODE: - kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, - instr, DEBUGFS_GPU_SYNC_ADD, instr_is_64_bit, - follows_wait); - break; - case GPU_CSF_SYNC_SET_OPCODE: - case GPU_CSF_SYNC_SET64_OPCODE: - kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, - instr, DEBUGFS_GPU_SYNC_SET, instr_is_64_bit, - follows_wait); - break; - case GPU_CSF_SYNC_WAIT_OPCODE: - case GPU_CSF_SYNC_WAIT64_OPCODE: - kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, - instr, DEBUGFS_GPU_SYNC_WAIT, instr_is_64_bit, - follows_wait); - follows_wait = true; /* Future commands will follow at least one wait */ - break; - case GPU_CSF_CALL_OPCODE: - nr_calls++; - /* Fallthrough */ - default: - /* Unrecognized command, skip past it */ - break; - } - - cursor += sizeof(u64); - } -} - -/** - * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of - * the provided queue group. - * - * @file: seq_file for printing to. - * @group: Address of a GPU command group to iterate through. - * - * This function will iterate through each queue in the provided GPU queue group and - * print its SYNC related commands. - */ -static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file, - struct kbase_queue_group *const group) -{ - struct kbase_context *kctx = file->private; - unsigned int i; - - seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle, - group->csg_nr, kctx->tgid, kctx->id); - - for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) - kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]); -} - -/** - * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info - * - * @file: The seq_file for printing to. - * - * Return: Negative error code or 0 on success. - */ -static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file) -{ - u32 gr; - struct kbase_context *kctx = file->private; - struct kbase_device *kbdev; - - if (WARN_ON(!kctx)) - return -EINVAL; - - kbdev = kctx->kbdev; - kbase_csf_scheduler_lock(kbdev); - kbase_csf_debugfs_update_active_groups_status(kbdev); - - for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) { - struct kbase_queue_group *const group = - kbdev->csf.scheduler.csg_slots[gr].resident_group; - if (!group || group->kctx != kctx) - continue; - kbasep_csf_dump_active_group_sync_state(file, group); - } - - kbase_csf_scheduler_unlock(kbdev); - return 0; -} +#include "mali_kbase_csf_sync.h" +#include "mali_kbase_csf_util.h" +#include +#include +#include /** * kbasep_csf_sync_debugfs_show() - Print CSF queue sync information @@ -738,10 +38,17 @@ static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file) */ static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data) { - seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION); + struct kbasep_printer *kbpr; + struct kbase_context *kctx = file->private; + CSTD_UNUSED(data); + + kbpr = kbasep_printer_file_init(file); + if (kbpr != NULL) { + kbasep_csf_sync_kcpu_dump_print(kctx, kbpr); + kbasep_csf_sync_gpu_dump_print(kctx, kbpr); + kbasep_printer_term(kbpr); + } - kbasep_csf_sync_kcpu_debugfs_show(file); - kbasep_csf_sync_gpu_debugfs_show(file); return 0; } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h index 177e15d85341..f67d31ed0a06 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,8 +25,6 @@ /* Forward declaration */ struct kbase_context; -#define MALI_CSF_SYNC_DEBUGFS_VERSION 0 - /** * kbase_csf_sync_debugfs_init() - Create a debugfs entry for CSF queue sync info * diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c index 8072a8bd2c32..f898535004c5 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c @@ -21,6 +21,7 @@ #include +#include "mali_kbase_reg_track.h" #include "mali_kbase_csf_tiler_heap.h" #include "mali_kbase_csf_tiler_heap_def.h" #include "mali_kbase_csf_heap_context_alloc.h" @@ -64,16 +65,13 @@ static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr) WARN_ON(chunk_size & ~CHUNK_SIZE_MASK); WARN_ON(chunk_addr & ~CHUNK_ADDR_MASK); - encoded_size = - (u64)(chunk_size >> CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) << - CHUNK_HDR_NEXT_SIZE_POS; + encoded_size = (u64)(chunk_size >> CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) + << CHUNK_HDR_NEXT_SIZE_POS; - encoded_addr = - (chunk_addr >> CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) << - CHUNK_HDR_NEXT_ADDR_POS; + encoded_addr = (chunk_addr >> CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) << CHUNK_HDR_NEXT_ADDR_POS; return (encoded_size & CHUNK_HDR_NEXT_SIZE_MASK) | - (encoded_addr & CHUNK_HDR_NEXT_ADDR_MASK); + (encoded_addr & CHUNK_HDR_NEXT_ADDR_MASK); } /** @@ -83,14 +81,12 @@ static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr) * * Return: The address of the most recently-linked chunk, or NULL if none. */ -static struct kbase_csf_tiler_heap_chunk *get_last_chunk( - struct kbase_csf_tiler_heap *const heap) +static struct kbase_csf_tiler_heap_chunk *get_last_chunk(struct kbase_csf_tiler_heap *const heap) { if (list_empty(&heap->chunks_list)) return NULL; - return list_last_entry(&heap->chunks_list, - struct kbase_csf_tiler_heap_chunk, link); + return list_last_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link); } /** @@ -136,7 +132,7 @@ static void remove_external_chunk_mappings(struct kbase_context *const kctx, * Return: 0 if successful or a negative error code on failure. */ static int link_chunk(struct kbase_csf_tiler_heap *const heap, - struct kbase_csf_tiler_heap_chunk *const chunk) + struct kbase_csf_tiler_heap_chunk *const chunk) { struct kbase_csf_tiler_heap_chunk *const prev = get_last_chunk(heap); @@ -149,8 +145,7 @@ static int link_chunk(struct kbase_csf_tiler_heap *const heap, *prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); - dev_dbg(kctx->kbdev->dev, - "Linked tiler heap chunks, 0x%llX -> 0x%llX\n", + dev_dbg(kctx->kbdev->dev, "Linked tiler heap chunks, 0x%llX -> 0x%llX\n", prev->gpu_va, chunk->gpu_va); } @@ -172,7 +167,7 @@ static int link_chunk(struct kbase_csf_tiler_heap *const heap, * Return: 0 if successful or a negative error code on failure. */ static int init_chunk(struct kbase_csf_tiler_heap *const heap, - struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev) + struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev) { int err = 0; u64 *chunk_hdr; @@ -181,8 +176,7 @@ static int init_chunk(struct kbase_csf_tiler_heap *const heap, lockdep_assert_held(&kctx->csf.tiler_heaps.lock); if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) { - dev_err(kctx->kbdev->dev, - "Tiler heap chunk address is unusable\n"); + dev_err(kctx->kbdev->dev, "Tiler heap chunk address is unusable\n"); return -EINVAL; } @@ -283,8 +277,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); if (unlikely(!chunk)) { - dev_err(kctx->kbdev->dev, - "No kernel memory for a new tiler heap chunk\n"); + dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap chunk\n"); return NULL; } @@ -362,7 +355,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * /* If page migration is enabled, we don't want to migrate tiler heap pages. * This does not change if the constituent pages are already marked as isolated. */ - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE); return chunk; @@ -445,8 +438,8 @@ static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller"); list_for_each_safe(entry, tmp, &heap->chunks_list) { - struct kbase_csf_tiler_heap_chunk *chunk = list_entry( - entry, struct kbase_csf_tiler_heap_chunk, link); + struct kbase_csf_tiler_heap_chunk *chunk = + list_entry(entry, struct kbase_csf_tiler_heap_chunk, link); list_del_init(&chunk->link); heap->chunk_count--; @@ -466,8 +459,7 @@ static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) * * Return: 0 if successful or a negative error code on failure. */ -static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap, - u32 const nchunks) +static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap, u32 const nchunks) { int err = 0; u32 i; @@ -520,13 +512,12 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap) * may be overwritten with new data, meaning heap->gpu_va should not * be used past this point. */ - kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc, - heap->gpu_va); + kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc, heap->gpu_va); WARN_ON(heap->chunk_count); - KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, - heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0, - heap->target_in_flight, 0); + KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, 0, 0, + heap->max_chunks, heap->chunk_size, 0, + heap->target_in_flight, 0); if (heap->buf_desc_reg) { kbase_vunmap(kctx, &heap->buf_desc_map); @@ -552,8 +543,8 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap) * * Return: pointer to the tiler heap object, or NULL if not found. */ -static struct kbase_csf_tiler_heap *find_tiler_heap( - struct kbase_context *const kctx, u64 const heap_gpu_va) +static struct kbase_csf_tiler_heap *find_tiler_heap(struct kbase_context *const kctx, + u64 const heap_gpu_va) { struct kbase_csf_tiler_heap *heap = NULL; @@ -564,8 +555,7 @@ static struct kbase_csf_tiler_heap *find_tiler_heap( return heap; } - dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n", - heap_gpu_va); + dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n", heap_gpu_va); return NULL; } @@ -589,8 +579,7 @@ static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx) { - int err = kbase_csf_heap_context_allocator_init( - &kctx->csf.tiler_heaps.ctx_alloc, kctx); + int err = kbase_csf_heap_context_allocator_init(&kctx->csf.tiler_heaps.ctx_alloc, kctx); if (unlikely(err)) return err; @@ -615,8 +604,8 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx) mutex_unlock(&kctx->csf.tiler_heaps.lock); list_for_each_safe(entry, tmp, &local_heaps_list) { - struct kbase_csf_tiler_heap *heap = list_entry( - entry, struct kbase_csf_tiler_heap, link); + struct kbase_csf_tiler_heap *heap = + list_entry(entry, struct kbase_csf_tiler_heap, link); list_del_init(&heap->link); delete_heap(heap); @@ -678,8 +667,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_ { int err = 0; struct kbase_csf_tiler_heap *heap = NULL; - struct kbase_csf_heap_context_allocator *const ctx_alloc = - &kctx->csf.tiler_heaps.ctx_alloc; + struct kbase_csf_heap_context_allocator *const ctx_alloc = &kctx->csf.tiler_heaps.ctx_alloc; struct kbase_csf_tiler_heap_chunk *chunk = NULL; struct kbase_va_region *gpu_va_reg = NULL; void *vmap_ptr = NULL; @@ -748,7 +736,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_ KBASE_REG_CPU_RD, &heap->buf_desc_map, KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE); kbase_gpu_vm_unlock(kctx); @@ -843,8 +831,7 @@ buf_desc_not_suitable: return err; } -int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, - u64 const heap_gpu_va) +int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, u64 const heap_gpu_va) { int err = 0; struct kbase_csf_tiler_heap *heap = NULL; @@ -868,13 +855,11 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, if (likely(kctx->running_total_tiler_heap_memory >= heap_size)) kctx->running_total_tiler_heap_memory -= heap_size; else - dev_warn(kctx->kbdev->dev, - "Running total tiler heap memory lower than expected!"); + dev_warn(kctx->kbdev->dev, "Running total tiler heap memory lower than expected!"); if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count)) kctx->running_total_tiler_heap_nr_chunks -= chunk_count; else - dev_warn(kctx->kbdev->dev, - "Running total tiler chunk count lower than expected!"); + dev_warn(kctx->kbdev->dev, "Running total tiler chunk count lower than expected!"); if (!err) dev_dbg(kctx->kbdev->dev, "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", @@ -937,8 +922,9 @@ static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr return -ENOMEM; } -int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, - u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) +int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, u64 gpu_heap_va, + u32 nr_in_flight, u32 pending_frag_count, + u64 *new_chunk_ptr) { struct kbase_csf_tiler_heap *heap; struct kbase_csf_tiler_heap_chunk *chunk; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h index 1b5cb560894f..6b875e381c4f 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -114,8 +114,9 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va); * available upon completion of a render pass and -EINVAL when * invalid value was passed for one of the argument). */ -int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, - u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr); +int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, u64 gpu_heap_va, + u32 nr_in_flight, u32 pending_frag_count, + u64 *new_chunk_ptr); /** * kbase_csf_tiler_heap_scan_kctx_unused_pages - Performs the tiler heap shrinker calim's scan diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c index 96e0f2829854..dd3b82932da5 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,7 +41,10 @@ static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data) struct kbase_csf_tiler_heap *heap; struct kbase_csf_tiler_heap_chunk *chunk; - seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n", MALI_CSF_TILER_HEAP_DEBUGFS_VERSION); + CSTD_UNUSED(data); + + seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n", + MALI_CSF_TILER_HEAP_DEBUGFS_VERSION); mutex_lock(&tiler_heaps_p->lock); @@ -56,8 +59,7 @@ static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data) seq_printf(file, "\ttarget_in_flight = %u\n", heap->target_in_flight); list_for_each_entry(chunk, &heap->chunks_list, link) - seq_printf(file, "\t\tchunk gpu_va = 0x%llx\n", - chunk->gpu_va); + seq_printf(file, "\t\tchunk gpu_va = 0x%llx\n", chunk->gpu_va); } mutex_unlock(&tiler_heaps_p->lock); @@ -78,6 +80,8 @@ static int kbasep_csf_tiler_heap_total_debugfs_show(struct seq_file *file, void { struct kbase_context *kctx = file->private; + CSTD_UNUSED(data); + seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n", MALI_CSF_TILER_HEAP_DEBUGFS_VERSION); seq_printf(file, "Total number of chunks of all heaps in the context: %lu\n", @@ -121,12 +125,11 @@ void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) return; - file = debugfs_create_file("tiler_heaps", 0444, kctx->kctx_dentry, - kctx, &kbasep_csf_tiler_heap_debugfs_fops); + file = debugfs_create_file("tiler_heaps", 0444, kctx->kctx_dentry, kctx, + &kbasep_csf_tiler_heap_debugfs_fops); if (IS_ERR_OR_NULL(file)) { - dev_warn(kctx->kbdev->dev, - "Unable to create tiler heap debugfs entry"); + dev_warn(kctx->kbdev->dev, "Unable to create tiler heap debugfs entry"); } } @@ -137,12 +140,12 @@ void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx) if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) return; - file = debugfs_create_file("tiler_heaps_total", 0444, kctx->kctx_dentry, - kctx, &kbasep_csf_tiler_heap_total_debugfs_fops); + file = debugfs_create_file("tiler_heaps_total", 0444, kctx->kctx_dentry, kctx, + &kbasep_csf_tiler_heap_total_debugfs_fops); if (IS_ERR_OR_NULL(file)) { dev_warn(kctx->kbdev->dev, - "Unable to create total tiler heap allocated memory debugfs entry"); + "Unable to create total tiler heap allocated memory debugfs entry"); } } @@ -159,4 +162,3 @@ void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx) } #endif /* CONFIG_DEBUG_FS */ - diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h index 96f2b03d2d31..6a424e71628d 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -48,13 +48,11 @@ /* Bitmask of valid chunk sizes. This is also the maximum chunk size, in bytes. */ #define CHUNK_SIZE_MASK \ - ((CHUNK_HDR_NEXT_SIZE_MASK >> CHUNK_HDR_NEXT_SIZE_POS) << \ - CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) + ((CHUNK_HDR_NEXT_SIZE_MASK >> CHUNK_HDR_NEXT_SIZE_POS) << CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) /* Bitmask of valid chunk addresses. This is also the highest address. */ #define CHUNK_ADDR_MASK \ - ((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \ - CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) + ((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) /* The size of the area needed to be vmapped prior to handing the tiler heap * over to the tiler, so that the shrinker could be invoked. diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c index 6357e3518d87..788cfb2ad601 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,6 +20,7 @@ */ #include +#include "backend/gpu/mali_kbase_pm_internal.h" #include "mali_kbase_csf.h" #include "mali_kbase_csf_tiler_heap.h" #include "mali_kbase_csf_tiler_heap_reclaim.h" @@ -63,8 +64,8 @@ static void detach_ctx_from_heap_reclaim_mgr(struct kbase_context *kctx) if (!list_empty(&info->mgr_link)) { u32 remaining = (info->nr_est_unused_pages > info->nr_freed_pages) ? - info->nr_est_unused_pages - info->nr_freed_pages : - 0; + info->nr_est_unused_pages - info->nr_freed_pages : + 0; list_del_init(&info->mgr_link); if (remaining) @@ -193,7 +194,29 @@ static unsigned long reclaim_unused_heap_pages(struct kbase_device *kbdev) unsigned long total_freed_pages = 0; int prio; - lockdep_assert_held(&kbdev->csf.scheduler.lock); + lockdep_assert_held(&scheduler->lock); + + if (scheduler->state != SCHED_SUSPENDED) { + /* Clean and invalidate the L2 cache before reading from the heap contexts, + * headers of the individual chunks and buffer descriptors. + */ + kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); + if (kbase_gpu_wait_cache_clean_timeout(kbdev, + kbdev->mmu_or_gpu_cache_op_wait_time_ms)) + dev_warn( + kbdev->dev, + "[%llu] Timeout waiting for CACHE_CLN_INV_L2 to complete before Tiler heap reclaim", + kbase_backend_get_cycle_cnt(kbdev)); + + } else { + /* Make sure power down transitions have completed, i.e. L2 has been + * powered off as that would ensure its contents are flushed to memory. + * This is needed as Scheduler doesn't wait for the power down to finish. + */ + if (kbase_pm_wait_for_desired_state(kbdev)) + dev_warn(kbdev->dev, + "Wait for power down transition failed before Tiler heap reclaim"); + } for (prio = KBASE_QUEUE_GROUP_PRIORITY_LOW; total_freed_pages < HEAP_RECLAIM_SCAN_BATCH_SIZE && @@ -257,6 +280,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_count_free_pages(struct kbase_ struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr; unsigned long page_cnt = atomic_read(&mgr->unused_pages); + CSTD_UNUSED(sc); + dev_dbg(kbdev->dev, "Reclaim count unused pages (estimate): %lu", page_cnt); return page_cnt; @@ -324,7 +349,6 @@ static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker * void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx) { /* Per-kctx heap_info object initialization */ - memset(&kctx->csf.sched.heap_info, 0, sizeof(struct kbase_csf_ctx_heap_reclaim_info)); INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link); } @@ -338,8 +362,6 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) prio++) INIT_LIST_HEAD(&scheduler->reclaim_mgr.ctx_lists[prio]); - atomic_set(&scheduler->reclaim_mgr.unused_pages, 0); - reclaim->count_objects = kbase_csf_tiler_heap_reclaim_count_objects; reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects; reclaim->seeks = HEAP_SHRINKER_SEEKS; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h index b6e580e48df6..7880de04c84f 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -55,17 +55,17 @@ void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_gr /** * kbase_csf_tiler_heap_reclaim_ctx_init - Initializer on per context data fields for use * with the tiler heap reclaim manager. - * * @kctx: Pointer to the kbase_context. * + * This function must be called only when a kbase context is instantiated. */ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx); /** * kbase_csf_tiler_heap_reclaim_mgr_init - Initializer for the tiler heap reclaim manger. - * * @kbdev: Pointer to the device. * + * This function must be called only when a kbase device is initialized. */ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c index ea6c11624157..3b2e5ae6ae79 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,6 +52,7 @@ static int set_timeout(struct kbase_device *const kbdev, u64 const timeout) dev_dbg(kbdev->dev, "New progress timeout: %llu cycles\n", timeout); atomic64_set(&kbdev->csf.progress_timeout, timeout); + kbase_device_set_timeout(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT, timeout, 1); return 0; } @@ -71,14 +72,15 @@ static int set_timeout(struct kbase_device *const kbdev, u64 const timeout) * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t progress_timeout_store(struct device * const dev, - struct device_attribute * const attr, const char * const buf, - size_t const count) +static ssize_t progress_timeout_store(struct device *const dev, struct device_attribute *const attr, + const char *const buf, size_t const count) { struct kbase_device *const kbdev = dev_get_drvdata(dev); int err; u64 timeout; + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; @@ -91,16 +93,15 @@ static ssize_t progress_timeout_store(struct device * const dev, err = kstrtou64(buf, 0, &timeout); if (err) - dev_err(kbdev->dev, - "Couldn't process progress_timeout write operation.\n" - "Use format \n"); + dev_err(kbdev->dev, "Couldn't process progress_timeout write operation.\n" + "Use format \n"); else err = set_timeout(kbdev, timeout); if (!err) { kbase_csf_scheduler_pm_active(kbdev); - err = kbase_csf_scheduler_wait_mcu_active(kbdev); + err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev); if (!err) err = kbase_csf_firmware_set_timeout(kbdev, timeout); @@ -124,19 +125,20 @@ static ssize_t progress_timeout_store(struct device * const dev, * * Return: The number of bytes output to @buf. */ -static ssize_t progress_timeout_show(struct device * const dev, - struct device_attribute * const attr, char * const buf) +static ssize_t progress_timeout_show(struct device *const dev, struct device_attribute *const attr, + char *const buf) { struct kbase_device *const kbdev = dev_get_drvdata(dev); int err; + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; err = scnprintf(buf, PAGE_SIZE, "%llu\n", kbase_csf_timeout_get(kbdev)); return err; - } static DEVICE_ATTR_RW(progress_timeout); @@ -147,26 +149,30 @@ int kbase_csf_timeout_init(struct kbase_device *const kbdev) int err; #if IS_ENABLED(CONFIG_OF) - err = of_property_read_u64(kbdev->dev->of_node, - "progress_timeout", &timeout); + /* Read "progress-timeout" property and fallback to "progress_timeout" + * if not found. + */ + err = of_property_read_u64(kbdev->dev->of_node, "progress-timeout", &timeout); + + if (err == -EINVAL) + err = of_property_read_u64(kbdev->dev->of_node, "progress_timeout", &timeout); + if (!err) - dev_info(kbdev->dev, "Found progress_timeout = %llu in Devicetree\n", - timeout); + dev_info(kbdev->dev, "Found progress_timeout = %llu in Devicetree\n", timeout); #endif err = set_timeout(kbdev, timeout); if (err) return err; - err = sysfs_create_file(&kbdev->dev->kobj, - &dev_attr_progress_timeout.attr); + err = sysfs_create_file(&kbdev->dev->kobj, &dev_attr_progress_timeout.attr); if (err) dev_err(kbdev->dev, "SysFS file creation failed\n"); return err; } -void kbase_csf_timeout_term(struct kbase_device * const kbdev) +void kbase_csf_timeout_term(struct kbase_device *const kbdev) { sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_progress_timeout.attr); } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c index 6859d65295c4..54054661f7a9 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c @@ -38,8 +38,6 @@ #include #endif -/* Name of the CSFFW timeline tracebuffer. */ -#define KBASE_CSFFW_TRACEBUFFER_NAME "timeline" /* Name of the timeline header metatadata */ #define KBASE_CSFFW_TIMELINE_HEADER_NAME "timeline_header" @@ -82,15 +80,13 @@ static int kbase_csf_tl_debugfs_poll_interval_write(void *data, u64 val) return 0; } -DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops, - kbase_csf_tl_debugfs_poll_interval_read, - kbase_csf_tl_debugfs_poll_interval_write, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops, kbase_csf_tl_debugfs_poll_interval_read, + kbase_csf_tl_debugfs_poll_interval_write, "%llu\n"); void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev) { - debugfs_create_file("csf_tl_poll_interval_in_ms", 0644, - kbdev->debugfs_instr_directory, kbdev, - &kbase_csf_tl_poll_interval_fops); + debugfs_create_file("csf_tl_poll_interval_in_ms", 0644, kbdev->debugfs_instr_directory, + kbdev, &kbase_csf_tl_poll_interval_fops); } #endif @@ -101,20 +97,18 @@ void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev) * @msg_buf_start: Start of the message. * @msg_buf_end: End of the message buffer. */ -static void tl_reader_overflow_notify( - const struct kbase_csf_tl_reader *self, - u8 *const msg_buf_start, - u8 *const msg_buf_end) +static void tl_reader_overflow_notify(const struct kbase_csf_tl_reader *self, + u8 *const msg_buf_start, u8 *const msg_buf_end) { struct kbase_device *kbdev = self->kbdev; - struct kbase_csffw_tl_message message = {0}; + struct kbase_csffw_tl_message message = { 0 }; /* Reuse the timestamp and cycle count from current event if possible */ if (msg_buf_start + sizeof(message) <= msg_buf_end) memcpy(&message, msg_buf_start, sizeof(message)); - KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( - kbdev, message.timestamp, message.cycle_counter); + KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW(kbdev, message.timestamp, + message.cycle_counter); } /** @@ -125,9 +119,7 @@ static void tl_reader_overflow_notify( * * Return: True, if an overflow has happened, False otherwise. */ -static bool tl_reader_overflow_check( - struct kbase_csf_tl_reader *self, - u16 event_id) +static bool tl_reader_overflow_check(struct kbase_csf_tl_reader *self, u16 event_id) { struct kbase_device *kbdev = self->kbdev; bool has_overflow = false; @@ -136,13 +128,11 @@ static bool tl_reader_overflow_check( * after reset, we should skip overflow check when reset happened. */ if (event_id != 0) { - has_overflow = self->got_first_event - && self->expected_event_id != event_id; + has_overflow = self->got_first_event && self->expected_event_id != event_id; if (has_overflow) - dev_warn(kbdev->dev, - "CSFFW overflow, event_id: %u, expected: %u.", - event_id, self->expected_event_id); + dev_warn(kbdev->dev, "CSFFW overflow, event_id: %u, expected: %u.", + event_id, self->expected_event_id); } self->got_first_event = true; @@ -176,7 +166,7 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) struct kbase_device *kbdev = self->kbdev; struct kbase_tlstream *stream = self->stream; - u8 *read_buffer = self->read_buffer; + u8 *read_buffer = self->read_buffer; const size_t read_buffer_size = sizeof(self->read_buffer); u32 bytes_read; @@ -197,13 +187,12 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) /* Copying the whole buffer in a single shot. We assume * that the buffer will not contain partially written messages. */ - bytes_read = kbase_csf_firmware_trace_buffer_read_data( - self->trace_buffer, read_buffer, read_buffer_size); + bytes_read = kbase_csf_firmware_trace_buffer_read_data(self->trace_buffer, read_buffer, + read_buffer_size); csffw_data_begin = read_buffer; - csffw_data_end = read_buffer + bytes_read; + csffw_data_end = read_buffer + bytes_read; - for (csffw_data_it = csffw_data_begin; - csffw_data_it < csffw_data_end;) { + for (csffw_data_it = csffw_data_begin; csffw_data_it < csffw_data_end;) { u32 event_header; u16 event_id; u16 event_size; @@ -212,30 +201,25 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) /* Can we safely read event_id? */ if (csffw_data_it + sizeof(event_header) > csffw_data_end) { - dev_warn( - kbdev->dev, - "Unable to parse CSFFW tracebuffer event header."); + dev_warn(kbdev->dev, "Unable to parse CSFFW tracebuffer event header."); ret = -EBUSY; break; } /* Read and parse the event header. */ memcpy(&event_header, csffw_data_it, sizeof(event_header)); - event_id = (event_header >> 0) & 0xFFFF; + event_id = (event_header >> 0) & 0xFFFF; event_size = (event_header >> 16) & 0xFFFF; csffw_data_it += sizeof(event_header); /* Detect if an overflow has happened. */ if (tl_reader_overflow_check(self, event_id)) - tl_reader_overflow_notify(self, - csffw_data_it, - csffw_data_end); + tl_reader_overflow_notify(self, csffw_data_it, csffw_data_end); /* Can we safely read the message body? */ if (csffw_data_it + event_size > csffw_data_end) { - dev_warn(kbdev->dev, - "event_id: %u, can't read with event_size: %u.", - event_id, event_size); + dev_warn(kbdev->dev, "event_id: %u, can't read with event_size: %u.", + event_id, event_size); ret = -EBUSY; break; } @@ -243,14 +227,13 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) /* Convert GPU timestamp to CPU timestamp. */ { struct kbase_csffw_tl_message *msg = - (struct kbase_csffw_tl_message *) csffw_data_it; + (struct kbase_csffw_tl_message *)csffw_data_it; msg->timestamp = kbase_backend_time_convert_gpu_to_cpu(kbdev, msg->timestamp); } /* Copy the message out to the tl_stream. */ - buffer = kbase_tlstream_msgbuf_acquire( - stream, event_size, &acq_flags); + buffer = kbase_tlstream_msgbuf_acquire(stream, event_size, &acq_flags); kbasep_serialize_bytes(buffer, 0, csffw_data_it, event_size); kbase_tlstream_msgbuf_release(stream, acq_flags); csffw_data_it += event_size; @@ -269,8 +252,7 @@ static void kbasep_csf_tl_reader_read_callback(struct timer_list *timer) kbase_csf_tl_reader_flush_buffer(self); - rcode = mod_timer(&self->read_timer, - jiffies + msecs_to_jiffies(self->timer_interval)); + rcode = mod_timer(&self->read_timer, jiffies + msecs_to_jiffies(self->timer_interval)); CSTD_UNUSED(rcode); } @@ -287,9 +269,7 @@ static void kbasep_csf_tl_reader_read_callback(struct timer_list *timer) * * Return: Zero on success, -1 otherwise. */ -static int tl_reader_init_late( - struct kbase_csf_tl_reader *self, - struct kbase_device *kbdev) +static int tl_reader_init_late(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev) { struct firmware_trace_buffer *tb; size_t hdr_size = 0; @@ -298,24 +278,19 @@ static int tl_reader_init_late( if (self->kbdev) return 0; - tb = kbase_csf_firmware_get_trace_buffer( - kbdev, KBASE_CSFFW_TRACEBUFFER_NAME); - hdr = kbase_csf_firmware_get_timeline_metadata( - kbdev, KBASE_CSFFW_TIMELINE_HEADER_NAME, &hdr_size); + tb = kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_TIMELINE_BUF_NAME); + hdr = kbase_csf_firmware_get_timeline_metadata(kbdev, KBASE_CSFFW_TIMELINE_HEADER_NAME, + &hdr_size); if (!tb) { - dev_warn( - kbdev->dev, - "'%s' tracebuffer is not present in the firmware image.", - KBASE_CSFFW_TRACEBUFFER_NAME); + dev_warn(kbdev->dev, "'%s' tracebuffer is not present in the firmware image.", + KBASE_CSFFW_TIMELINE_BUF_NAME); return -1; } if (!hdr) { - dev_warn( - kbdev->dev, - "'%s' timeline metadata is not present in the firmware image.", - KBASE_CSFFW_TIMELINE_HEADER_NAME); + dev_warn(kbdev->dev, "'%s' timeline metadata is not present in the firmware image.", + KBASE_CSFFW_TIMELINE_HEADER_NAME); return -1; } @@ -338,25 +313,20 @@ static int tl_reader_init_late( * * Return: 0 on success, or negative error code for failure. */ -static int tl_reader_update_enable_bit( - struct kbase_csf_tl_reader *self, - bool value) +static int tl_reader_update_enable_bit(struct kbase_csf_tl_reader *self, bool value) { int err = 0; - err = kbase_csf_firmware_trace_buffer_update_trace_enable_bit( - self->trace_buffer, 0, value); + err = kbase_csf_firmware_trace_buffer_update_trace_enable_bit(self->trace_buffer, 0, value); return err; } -void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, - struct kbase_tlstream *stream) +void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, struct kbase_tlstream *stream) { self->timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT; - kbase_timer_setup(&self->read_timer, - kbasep_csf_tl_reader_read_callback); + kbase_timer_setup(&self->read_timer, kbasep_csf_tl_reader_read_callback); self->stream = stream; @@ -376,8 +346,7 @@ void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) del_timer_sync(&self->read_timer); } -int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, - struct kbase_device *kbdev) +int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev) { int rcode; @@ -387,9 +356,7 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, if (tl_reader_init_late(self, kbdev)) { #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - dev_warn( - kbdev->dev, - "CSFFW timeline is not available for MALI_BIFROST_NO_MALI builds!"); + dev_warn(kbdev->dev, "CSFFW timeline is not available for MALI_BIFROST_NO_MALI builds!"); return 0; #else return -EINVAL; @@ -409,8 +376,7 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, if (rcode != 0) return rcode; - rcode = mod_timer(&self->read_timer, - jiffies + msecs_to_jiffies(self->timer_interval)); + rcode = mod_timer(&self->read_timer, jiffies + msecs_to_jiffies(self->timer_interval)); return 0; } diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h index 12b285fd2929..2f8eb1dd4168 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h @@ -33,7 +33,7 @@ /* CSFFW Timeline read polling default period in milliseconds. */ #define KBASE_CSF_TL_READ_INTERVAL_DEFAULT 200 /* CSFFW Timeline read polling maximum period in milliseconds. */ -#define KBASE_CSF_TL_READ_INTERVAL_MAX (60*1000) +#define KBASE_CSF_TL_READ_INTERVAL_MAX (60 * 1000) struct firmware_trace_buffer; struct kbase_tlstream; @@ -90,8 +90,7 @@ struct kbase_csf_tl_reader { * @self: CSFFW TL Reader instance. * @stream: Destination timeline stream. */ -void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, - struct kbase_tlstream *stream); +void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, struct kbase_tlstream *stream); /** * kbase_csf_tl_reader_term() - Terminate CSFFW Timelime Stream Reader. @@ -117,8 +116,7 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self); * * Return: zero on success, a negative error code otherwise. */ -int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, - struct kbase_device *kbdev); +int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev); /** * kbase_csf_tl_reader_stop() - Stop asynchronous copying of CSFFW timeline stream. diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c index 46872f937dbf..8ed7c91553a6 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -88,7 +88,7 @@ struct firmware_trace_buffer { } cpu_va; u32 num_pages; u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX]; - char name[1]; /* this field must be last */ + char name[]; /* this field must be last */ }; /** @@ -117,11 +117,14 @@ struct firmware_trace_buffer_data { */ static const struct firmware_trace_buffer_data trace_buffer_data[] = { #if MALI_UNIT_TEST - { "fwutf", { 0 }, 1 }, + { KBASE_CSFFW_UTF_BUF_NAME, { 0 }, 1 }, #endif - { FIRMWARE_LOG_BUF_NAME, { 0 }, 4 }, - { "benchmark", { 0 }, 2 }, - { "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES }, + { KBASE_CSFFW_LOG_BUF_NAME, { 0 }, 4 }, + { KBASE_CSFFW_BENCHMARK_BUF_NAME, { 0 }, 2 }, + { KBASE_CSFFW_TIMELINE_BUF_NAME, { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES }, +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + { KBASE_CSFFW_GPU_METRICS_BUF_NAME, { 0 }, 8 }, +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ }; int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev) @@ -137,10 +140,9 @@ int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev) } /* GPU-readable,writable memory used for Extract variables */ - ret = kbase_csf_firmware_mcu_shared_mapping_init( - kbdev, 1, PROT_WRITE, - KBASE_REG_GPU_RD | KBASE_REG_GPU_WR, - &kbdev->csf.firmware_trace_buffers.mcu_rw); + ret = kbase_csf_firmware_mcu_shared_mapping_init(kbdev, 1, PROT_WRITE, + KBASE_REG_GPU_RD | KBASE_REG_GPU_WR, + &kbdev->csf.firmware_trace_buffers.mcu_rw); if (ret != 0) { dev_err(kbdev->dev, "Failed to map GPU-rw MCU shared memory\n"); goto out; @@ -148,42 +150,39 @@ int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev) /* GPU-writable memory used for Insert variables */ ret = kbase_csf_firmware_mcu_shared_mapping_init( - kbdev, 1, PROT_READ, KBASE_REG_GPU_WR, - &kbdev->csf.firmware_trace_buffers.mcu_write); + kbdev, 1, PROT_READ, KBASE_REG_GPU_WR, + &kbdev->csf.firmware_trace_buffers.mcu_write); if (ret != 0) { dev_err(kbdev->dev, "Failed to map GPU-writable MCU shared memory\n"); goto out; } list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { - u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, - trace_enable_size_dwords; + u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, trace_enable_size_dwords; u32 *extract_cpu_va, *insert_cpu_va; unsigned int i; /* GPU-writable data buffer for the individual trace buffer */ - ret = kbase_csf_firmware_mcu_shared_mapping_init( - kbdev, trace_buffer->num_pages, PROT_READ, KBASE_REG_GPU_WR, - &trace_buffer->data_mapping); + ret = kbase_csf_firmware_mcu_shared_mapping_init(kbdev, trace_buffer->num_pages, + PROT_READ, KBASE_REG_GPU_WR, + &trace_buffer->data_mapping); if (ret) { - dev_err(kbdev->dev, "Failed to map GPU-writable MCU shared memory for a trace buffer\n"); + dev_err(kbdev->dev, + "Failed to map GPU-writable MCU shared memory for a trace buffer\n"); goto out; } extract_gpu_va = (kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + mcu_rw_offset; - extract_cpu_va = (u32 *)( - kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + - mcu_rw_offset); - insert_gpu_va = - (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) + - mcu_write_offset; - insert_cpu_va = (u32 *)( - kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + - mcu_write_offset); - data_buffer_gpu_va = - (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); + extract_cpu_va = + (u32 *)(kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + mcu_rw_offset); + insert_gpu_va = (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn + << PAGE_SHIFT) + + mcu_write_offset; + insert_cpu_va = (u32 *)(kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + + mcu_write_offset); + data_buffer_gpu_va = (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); /* Initialize the Extract variable */ *extract_cpu_va = 0; @@ -191,23 +190,21 @@ int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev) /* Each FW address shall be mapped and set individually, as we can't * assume anything about their location in the memory address space. */ - kbase_csf_update_firmware_memory( - kbdev, trace_buffer->gpu_va.data_address, data_buffer_gpu_va); - kbase_csf_update_firmware_memory( - kbdev, trace_buffer->gpu_va.insert_address, insert_gpu_va); - kbase_csf_update_firmware_memory( - kbdev, trace_buffer->gpu_va.extract_address, extract_gpu_va); - kbase_csf_update_firmware_memory( - kbdev, trace_buffer->gpu_va.size_address, - trace_buffer->num_pages << PAGE_SHIFT); + kbase_csf_update_firmware_memory(kbdev, trace_buffer->gpu_va.data_address, + data_buffer_gpu_va); + kbase_csf_update_firmware_memory(kbdev, trace_buffer->gpu_va.insert_address, + insert_gpu_va); + kbase_csf_update_firmware_memory(kbdev, trace_buffer->gpu_va.extract_address, + extract_gpu_va); + kbase_csf_update_firmware_memory(kbdev, trace_buffer->gpu_va.size_address, + trace_buffer->num_pages << PAGE_SHIFT); - trace_enable_size_dwords = - (trace_buffer->trace_enable_entry_count + 31) >> 5; + trace_enable_size_dwords = (trace_buffer->trace_enable_entry_count + 31) >> 5; for (i = 0; i < trace_enable_size_dwords; i++) { - kbase_csf_update_firmware_memory( - kbdev, trace_buffer->gpu_va.trace_enable + i*4, - trace_buffer->trace_enable_init_mask[i]); + kbase_csf_update_firmware_memory(kbdev, + trace_buffer->gpu_va.trace_enable + i * 4, + trace_buffer->trace_enable_init_mask[i]); } /* Store CPU virtual addresses for permanently mapped variables */ @@ -232,23 +229,21 @@ void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev) struct firmware_trace_buffer *trace_buffer; trace_buffer = list_first_entry(&kbdev->csf.firmware_trace_buffers.list, - struct firmware_trace_buffer, node); + struct firmware_trace_buffer, node); kbase_csf_firmware_mcu_shared_mapping_term(kbdev, &trace_buffer->data_mapping); list_del(&trace_buffer->node); kfree(trace_buffer); } - kbase_csf_firmware_mcu_shared_mapping_term( - kbdev, &kbdev->csf.firmware_trace_buffers.mcu_rw); - kbase_csf_firmware_mcu_shared_mapping_term( - kbdev, &kbdev->csf.firmware_trace_buffers.mcu_write); + kbase_csf_firmware_mcu_shared_mapping_term(kbdev, + &kbdev->csf.firmware_trace_buffers.mcu_rw); + kbase_csf_firmware_mcu_shared_mapping_term(kbdev, + &kbdev->csf.firmware_trace_buffers.mcu_write); } -int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, - const u32 *entry, - unsigned int size, - bool updatable) +int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, const u32 *entry, + unsigned int size, bool updatable) { const char *name = (char *)&entry[7]; const unsigned int name_len = size - TRACE_BUFFER_ENTRY_NAME_OFFSET; @@ -258,8 +253,7 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, /* Allocate enough space for struct firmware_trace_buffer and the * trace buffer name (with NULL termination). */ - trace_buffer = - kmalloc(sizeof(*trace_buffer) + name_len + 1, GFP_KERNEL); + trace_buffer = kmalloc(struct_size(trace_buffer, name, name_len + 1), GFP_KERNEL); if (!trace_buffer) return -ENOMEM; @@ -308,8 +302,7 @@ void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev) const u32 cache_line_alignment = kbase_get_cache_line_alignment(kbdev); list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { - u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, - trace_enable_size_dwords; + u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, trace_enable_size_dwords; u32 *extract_cpu_va, *insert_cpu_va; unsigned int i; @@ -317,17 +310,14 @@ void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev) extract_gpu_va = (kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + mcu_rw_offset; - extract_cpu_va = (u32 *)( - kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + - mcu_rw_offset); - insert_gpu_va = - (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) + - mcu_write_offset; - insert_cpu_va = (u32 *)( - kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + - mcu_write_offset); - data_buffer_gpu_va = - (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); + extract_cpu_va = + (u32 *)(kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + mcu_rw_offset); + insert_gpu_va = (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn + << PAGE_SHIFT) + + mcu_write_offset; + insert_cpu_va = (u32 *)(kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + + mcu_write_offset); + data_buffer_gpu_va = (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); /* Notice that the function only re-updates firmware memory locations * with information that allows access to the trace buffers without @@ -339,23 +329,21 @@ void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev) /* Each FW address shall be mapped and set individually, as we can't * assume anything about their location in the memory address space. */ - kbase_csf_update_firmware_memory( - kbdev, trace_buffer->gpu_va.data_address, data_buffer_gpu_va); - kbase_csf_update_firmware_memory( - kbdev, trace_buffer->gpu_va.insert_address, insert_gpu_va); - kbase_csf_update_firmware_memory( - kbdev, trace_buffer->gpu_va.extract_address, extract_gpu_va); - kbase_csf_update_firmware_memory( - kbdev, trace_buffer->gpu_va.size_address, - trace_buffer->num_pages << PAGE_SHIFT); + kbase_csf_update_firmware_memory(kbdev, trace_buffer->gpu_va.data_address, + data_buffer_gpu_va); + kbase_csf_update_firmware_memory(kbdev, trace_buffer->gpu_va.insert_address, + insert_gpu_va); + kbase_csf_update_firmware_memory(kbdev, trace_buffer->gpu_va.extract_address, + extract_gpu_va); + kbase_csf_update_firmware_memory(kbdev, trace_buffer->gpu_va.size_address, + trace_buffer->num_pages << PAGE_SHIFT); - trace_enable_size_dwords = - (trace_buffer->trace_enable_entry_count + 31) >> 5; + trace_enable_size_dwords = (trace_buffer->trace_enable_entry_count + 31) >> 5; for (i = 0; i < trace_enable_size_dwords; i++) { - kbase_csf_update_firmware_memory( - kbdev, trace_buffer->gpu_va.trace_enable + i*4, - trace_buffer->trace_enable_init_mask[i]); + kbase_csf_update_firmware_memory(kbdev, + trace_buffer->gpu_va.trace_enable + i * 4, + trace_buffer->trace_enable_init_mask[i]); } /* Store CPU virtual addresses for permanently mapped variables, @@ -370,8 +358,8 @@ void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev) } } -struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( - struct kbase_device *kbdev, const char *name) +struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer(struct kbase_device *kbdev, + const char *name) { struct firmware_trace_buffer *trace_buffer; @@ -391,8 +379,9 @@ unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( } EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count); -static void kbasep_csf_firmware_trace_buffer_update_trace_enable_bit( - struct firmware_trace_buffer *tb, unsigned int bit, bool value) +static void +kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(struct firmware_trace_buffer *tb, + unsigned int bit, bool value) { struct kbase_device *kbdev = tb->kbdev; @@ -418,14 +407,13 @@ static void kbasep_csf_firmware_trace_buffer_update_trace_enable_bit( * value of bitmask it cached after the boot. */ kbase_csf_update_firmware_memory( - kbdev, - tb->gpu_va.trace_enable + trace_enable_reg_offset * 4, + kbdev, tb->gpu_va.trace_enable + trace_enable_reg_offset * 4, tb->trace_enable_init_mask[trace_enable_reg_offset]); } } -int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( - struct firmware_trace_buffer *tb, unsigned int bit, bool value) +int kbase_csf_firmware_trace_buffer_update_trace_enable_bit(struct firmware_trace_buffer *tb, + unsigned int bit, bool value) { struct kbase_device *kbdev = tb->kbdev; int err = 0; @@ -442,16 +430,14 @@ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( * the User to retry the update. */ if (kbase_reset_gpu_silent(kbdev)) { - dev_warn( - kbdev->dev, - "GPU reset already in progress when enabling firmware timeline."); + dev_warn(kbdev->dev, + "GPU reset already in progress when enabling firmware timeline."); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return -EAGAIN; } } - kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, bit, - value); + kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, bit, value); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (tb->updatable) @@ -461,16 +447,14 @@ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( } EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_update_trace_enable_bit); -bool kbase_csf_firmware_trace_buffer_is_empty( - const struct firmware_trace_buffer *trace_buffer) +bool kbase_csf_firmware_trace_buffer_is_empty(const struct firmware_trace_buffer *trace_buffer) { - return *(trace_buffer->cpu_va.insert_cpu_va) == - *(trace_buffer->cpu_va.extract_cpu_va); + return *(trace_buffer->cpu_va.insert_cpu_va) == *(trace_buffer->cpu_va.extract_cpu_va); } EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_is_empty); -unsigned int kbase_csf_firmware_trace_buffer_read_data( - struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes) +unsigned int kbase_csf_firmware_trace_buffer_read_data(struct firmware_trace_buffer *trace_buffer, + u8 *data, unsigned int num_bytes) { unsigned int bytes_copied; u8 *data_cpu_va = trace_buffer->data_mapping.cpu_addr; @@ -479,19 +463,17 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data( u32 buffer_size = trace_buffer->num_pages << PAGE_SHIFT; if (insert_offset >= extract_offset) { - bytes_copied = min_t(unsigned int, num_bytes, - (insert_offset - extract_offset)); + bytes_copied = min_t(unsigned int, num_bytes, (insert_offset - extract_offset)); memcpy(data, &data_cpu_va[extract_offset], bytes_copied); extract_offset += bytes_copied; } else { unsigned int bytes_copied_head, bytes_copied_tail; - bytes_copied_tail = min_t(unsigned int, num_bytes, - (buffer_size - extract_offset)); + bytes_copied_tail = min_t(unsigned int, num_bytes, (buffer_size - extract_offset)); memcpy(data, &data_cpu_va[extract_offset], bytes_copied_tail); - bytes_copied_head = min_t(unsigned int, - (num_bytes - bytes_copied_tail), insert_offset); + bytes_copied_head = + min_t(unsigned int, (num_bytes - bytes_copied_tail), insert_offset); memcpy(&data[bytes_copied_tail], data_cpu_va, bytes_copied_head); bytes_copied = bytes_copied_head + bytes_copied_tail; @@ -506,6 +488,37 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data( } EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data); +void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer) +{ + unsigned int bytes_discarded; + u32 buffer_size = trace_buffer->num_pages << PAGE_SHIFT; + u32 extract_offset = *(trace_buffer->cpu_va.extract_cpu_va); + u32 insert_offset = *(trace_buffer->cpu_va.insert_cpu_va); + unsigned int trace_size; + + if (insert_offset >= extract_offset) { + trace_size = insert_offset - extract_offset; + if (trace_size > buffer_size / 2) { + bytes_discarded = trace_size - buffer_size / 2; + extract_offset += bytes_discarded; + *(trace_buffer->cpu_va.extract_cpu_va) = extract_offset; + } + } else { + unsigned int bytes_tail; + + bytes_tail = buffer_size - extract_offset; + trace_size = bytes_tail + insert_offset; + if (trace_size > buffer_size / 2) { + bytes_discarded = trace_size - buffer_size / 2; + extract_offset += bytes_discarded; + if (extract_offset >= buffer_size) + extract_offset = extract_offset - buffer_size; + *(trace_buffer->cpu_va.extract_cpu_va) = extract_offset; + } + } +} +EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_discard); + static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask) { unsigned int i; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h index 0389d093a904..90dfcb2699bc 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,15 @@ #include #define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4) -#define FIRMWARE_LOG_BUF_NAME "fwlog" +#if MALI_UNIT_TEST +#define KBASE_CSFFW_UTF_BUF_NAME "fwutf" +#endif +#define KBASE_CSFFW_LOG_BUF_NAME "fwlog" +#define KBASE_CSFFW_BENCHMARK_BUF_NAME "benchmark" +#define KBASE_CSFFW_TIMELINE_BUF_NAME "timeline" +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#define KBASE_CSFFW_GPU_METRICS_BUF_NAME "gpu_metrics" +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ /* Forward declarations */ struct firmware_trace_buffer; @@ -80,10 +88,8 @@ void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev); * * Return: 0 if successful, negative error code on failure. */ -int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, - const u32 *entry, - unsigned int size, - bool updatable); +int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, const u32 *entry, + unsigned int size, bool updatable); /** * kbase_csf_firmware_reload_trace_buffers_data - Reload trace buffers data for firmware reboot @@ -112,11 +118,12 @@ void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev); * Return: handle to a trace buffer, given the name, or NULL if a trace buffer * with that name couldn't be found. */ -struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( - struct kbase_device *kbdev, const char *name); +struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer(struct kbase_device *kbdev, + const char *name); /** - * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits for a trace buffer + * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits + * for a trace buffer * * @trace_buffer: Trace buffer handle * @@ -137,8 +144,7 @@ unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( * Return: 0 if successful, negative error code on failure. */ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( - struct firmware_trace_buffer *trace_buffer, unsigned int bit, - bool value); + struct firmware_trace_buffer *trace_buffer, unsigned int bit, bool value); /** * kbase_csf_firmware_trace_buffer_is_empty - Empty trace buffer predicate @@ -147,8 +153,7 @@ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( * * Return: True if the trace buffer is empty, or false otherwise. */ -bool kbase_csf_firmware_trace_buffer_is_empty( - const struct firmware_trace_buffer *trace_buffer); +bool kbase_csf_firmware_trace_buffer_is_empty(const struct firmware_trace_buffer *trace_buffer); /** * kbase_csf_firmware_trace_buffer_read_data - Read data from a trace buffer @@ -162,8 +167,17 @@ bool kbase_csf_firmware_trace_buffer_is_empty( * * Return: Number of bytes read from the trace buffer. */ -unsigned int kbase_csf_firmware_trace_buffer_read_data( - struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes); +unsigned int kbase_csf_firmware_trace_buffer_read_data(struct firmware_trace_buffer *trace_buffer, + u8 *data, unsigned int num_bytes); + +/** + * kbase_csf_firmware_trace_buffer_discard - Discard data from a trace buffer + * + * @trace_buffer: Trace buffer handle + * + * Discard part of the data in the trace buffer to reduce its utilization to half of its size. + */ +void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer); /** * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c new file mode 100644 index 000000000000..7dc32a11bb29 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_util.h" +#include +#include +#include +#include +#include +#include + +#define KBASEP_PRINTER_BUFFER_MAX_SIZE (2 * PAGE_SIZE) + +#define KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE 256 + +/** + * struct kbasep_printer - Object representing a logical printer device. + * + * @type: The print output type. + * @fifo: address of the fifo to be used. + * @kbdev: The kbase device. + * @file: The seq_file for printing to. This is NULL if printing to dmesg. + */ +struct kbasep_printer { + enum kbasep_printer_type type; + struct kbase_device *kbdev; + struct seq_file *file; + DECLARE_KFIFO_PTR(fifo, char); +}; + +/** + * kbasep_printer_alloc() - Allocate a kbasep_printer instance. + * + * @type: The type of kbasep_printer to be allocated. + * + * Return: The kbasep_printer instance pointer or NULL on error. + */ +static inline struct kbasep_printer *kbasep_printer_alloc(enum kbasep_printer_type type) +{ + struct kbasep_printer *kbpr = NULL; + + if (type == KBASEP_PRINT_TYPE_INVALID || type >= KBASEP_PRINT_TYPE_CNT) { + pr_err("printer type not supported"); + return NULL; + } + + kbpr = vzalloc(sizeof(struct kbasep_printer)); + if (kbpr) { + kbpr->type = type; + kbpr->file = NULL; + } + + return kbpr; +} + +/** + * kbasep_printer_validate() - Validate kbasep_printer instance. + * + * @kbpr: The kbasep_printer instance to be validated. + * + * Return: true if the instance is correctly configured else false. + */ +static inline bool kbasep_printer_validate(const struct kbasep_printer *kbpr) +{ + if (!kbpr || kbpr->type == KBASEP_PRINT_TYPE_INVALID || kbpr->type >= KBASEP_PRINT_TYPE_CNT) + return false; + + switch (kbpr->type) { + case KBASEP_PRINT_TYPE_DEV_INFO: + case KBASEP_PRINT_TYPE_DEV_WARN: + case KBASEP_PRINT_TYPE_DEV_ERR: + if (kbpr->kbdev == NULL || !kfifo_initialized(&kbpr->fifo)) + return false; + break; + case KBASEP_PRINT_TYPE_SEQ_FILE: + if (kbpr->file == NULL) + return false; + break; + default: + return false; + } + + return true; +} + +struct kbasep_printer *kbasep_printer_buffer_init(struct kbase_device *kbdev, + enum kbasep_printer_type type) +{ + struct kbasep_printer *kbpr = NULL; + + if (WARN_ON_ONCE((kbdev == NULL || !(type == KBASEP_PRINT_TYPE_DEV_INFO || + type == KBASEP_PRINT_TYPE_DEV_WARN || + type == KBASEP_PRINT_TYPE_DEV_ERR)))) + return NULL; + + kbpr = kbasep_printer_alloc(type); + + if (kbpr) { + if (kfifo_alloc(&kbpr->fifo, KBASEP_PRINTER_BUFFER_MAX_SIZE, GFP_KERNEL)) { + kfree(kbpr); + return NULL; + } + kbpr->kbdev = kbdev; + } + + return kbpr; +} + +struct kbasep_printer *kbasep_printer_file_init(struct seq_file *file) +{ + struct kbasep_printer *kbpr = NULL; + + if (WARN_ON_ONCE(file == NULL)) + return NULL; + + kbpr = kbasep_printer_alloc(KBASEP_PRINT_TYPE_SEQ_FILE); + + if (kbpr) + kbpr->file = file; + + return kbpr; +} + +void kbasep_printer_term(struct kbasep_printer *kbpr) +{ + if (kbpr) { + if (kfifo_initialized(&kbpr->fifo)) + kfifo_free(&kbpr->fifo); + vfree(kbpr); + } +} + +void kbasep_printer_buffer_flush(struct kbasep_printer *kbpr) +{ + char buffer[KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE]; + unsigned int i; + + if (WARN_ON_ONCE(!kbasep_printer_validate(kbpr))) + return; + + if (kfifo_is_empty(&kbpr->fifo)) + return; + + while (!kfifo_is_empty(&kbpr->fifo)) { + /* copy elements to fill the local string buffer */ + size_t copied = kfifo_out_peek(&kbpr->fifo, buffer, + KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE - 1); + buffer[copied] = '\0'; + /* pop all fifo copied elements until the first new-line char or + * the last copied element + */ + for (i = 0; i < copied; i++) { + kfifo_skip(&kbpr->fifo); + if (buffer[i] == '\n') { + buffer[i + 1] = '\0'; + break; + } + } + + switch (kbpr->type) { + case KBASEP_PRINT_TYPE_DEV_INFO: + dev_info(kbpr->kbdev->dev, buffer); + break; + case KBASEP_PRINT_TYPE_DEV_WARN: + dev_warn(kbpr->kbdev->dev, buffer); + break; + case KBASEP_PRINT_TYPE_DEV_ERR: + dev_err(kbpr->kbdev->dev, buffer); + break; + default: + pr_err("printer not supported"); + } + } +} + +void kbasep_puts(struct kbasep_printer *kbpr, const char *str) +{ + int len = 0; + + if (WARN_ON_ONCE(!kbasep_printer_validate(kbpr))) { + pr_warn("%s", str); + return; + } + + switch (kbpr->type) { + case KBASEP_PRINT_TYPE_DEV_INFO: + case KBASEP_PRINT_TYPE_DEV_WARN: + case KBASEP_PRINT_TYPE_DEV_ERR: + len = strlen(str); + if (len <= kfifo_avail(&kbpr->fifo)) + kfifo_in(&kbpr->fifo, str, len); + break; + case KBASEP_PRINT_TYPE_SEQ_FILE: + seq_printf(kbpr->file, str); + break; + default: + pr_err("printer not supported"); + } +} + +__attribute__((format(__printf__, 2, 3))) void kbasep_print(struct kbasep_printer *kbpr, + const char *fmt, ...) +{ + int len = 0; + char buffer[KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE]; + va_list arglist; + + va_start(arglist, fmt); + len = vsnprintf(buffer, KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE, fmt, arglist); + if (len <= 0) { + pr_err("message write to the buffer failed"); + goto exit; + } + + if (WARN_ON_ONCE(!kbasep_printer_validate(kbpr))) + pr_warn("%s", buffer); + + if (kbpr->type == KBASEP_PRINT_TYPE_SEQ_FILE) + seq_printf(kbpr->file, buffer); + else if (len <= kfifo_avail(&kbpr->fifo)) + kfifo_in(&kbpr->fifo, buffer, len); +exit: + va_end(arglist); +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.h new file mode 100644 index 000000000000..b17e7ae2d8f5 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_UTIL_H_ +#define _KBASE_CSF_UTIL_H_ + +/* Forward declaration */ +struct kbase_device; +struct kbasep_printer; +struct seq_file; + +/** + * enum kbasep_printer_type - Enumeration representing the different printing output types + * + * @KBASEP_PRINT_TYPE_INVALID: Invalid printing output (default). + * @KBASEP_PRINT_TYPE_DEV_INFO: Print to dmesg at info level. + * @KBASEP_PRINT_TYPE_DEV_WARN: Print to dmesg at warning level. + * @KBASEP_PRINT_TYPE_DEV_ERR: Print to dmesg at error level. + * @KBASEP_PRINT_TYPE_SEQ_FILE: Print to file. + * @KBASEP_PRINT_TYPE_CNT: Never set explicitly. + */ +enum kbasep_printer_type { + KBASEP_PRINT_TYPE_INVALID = 0, + KBASEP_PRINT_TYPE_DEV_INFO, + KBASEP_PRINT_TYPE_DEV_WARN, + KBASEP_PRINT_TYPE_DEV_ERR, + KBASEP_PRINT_TYPE_SEQ_FILE, + KBASEP_PRINT_TYPE_CNT, +}; + +/** + * kbasep_printer_buffer_init() - Helper function to initialise a printer to a buffer. + * + * @kbdev: Pointer to the device. + * @type: Printing output type. Only the following types are supported: + * @KBASEP_PRINT_TYPE_DEV_INFO, @KBASEP_PRINT_TYPE_DEV_WARN and + * @KBASEP_PRINT_TYPE_DEV_ERR. + * + * Return: The kbasep_printer instance pointer or NULL on error. + */ +struct kbasep_printer *kbasep_printer_buffer_init(struct kbase_device *kbdev, + enum kbasep_printer_type type); +/** + * kbasep_printer_file_init() - Helper function to initialise a printer to a file. + * + * @file: The seq_file for printing to. + * + * Return: The kbasep_printer instance pointer or NULL on error. + */ +struct kbasep_printer *kbasep_printer_file_init(struct seq_file *file); + +/** + * kbasep_printer_term() - Helper function to terminate printer. + * + * @kbpr: The print output device. + */ +void kbasep_printer_term(struct kbasep_printer *kbpr); + +/** + * kbasep_printer_buffer_flush() - Helper function to flush printer buffer to dmesg. + * + * @kbpr: The print output device. + */ +void kbasep_printer_buffer_flush(struct kbasep_printer *kbpr); + +/** + * kbasep_puts() - Print string using kbasep_printer instance. + * + * @kbpr: The kbasep_printer instance. + * @str: The string to print. + */ +void kbasep_puts(struct kbasep_printer *kbpr, const char *str); + +/** + * kbasep_print() - Helper function to print to either debugfs file or a dmesg buffer. + * + * @kbpr: The print output device. + * @fmt: The message to print. + * @...: Arguments to format the message. + */ +void kbasep_print(struct kbasep_printer *kbpr, const char *fmt, ...); + +#endif /* _KBASE_CSF_UTIL_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c index 185779c16815..2e87c08ded0e 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -63,8 +63,8 @@ static void kbase_debug_csf_fault_wakeup(struct kbase_device *kbdev) wake_up_interruptible(&kbdev->csf.dof.fault_wait_wq); } -bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, - struct kbase_context *kctx, enum dumpfault_error_type error) +bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, struct kbase_context *kctx, + enum dumpfault_error_type error) { unsigned long flags; @@ -75,8 +75,8 @@ bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, return false; if (kctx && kbase_ctx_flag(kctx, KCTX_DYING)) { - dev_info(kbdev->dev, "kctx %d_%d is dying when error %d is reported", - kctx->tgid, kctx->id, error); + dev_info(kbdev->dev, "kctx %d_%d is dying when error %d is reported", kctx->tgid, + kctx->id, error); kctx = NULL; } @@ -176,6 +176,9 @@ static ssize_t debug_csf_fault_write(struct file *file, const char __user *ubuf, struct kbase_device *kbdev; unsigned long flags; + CSTD_UNUSED(ubuf); + CSTD_UNUSED(ppos); + if (unlikely(!file)) { pr_warn("%s: file is NULL", __func__); return -EINVAL; @@ -202,6 +205,8 @@ static int debug_csf_fault_release(struct inode *in, struct file *file) struct kbase_device *kbdev; unsigned long flags; + CSTD_UNUSED(file); + if (unlikely(!in)) { pr_warn("%s: inode is NULL", __func__); return -EINVAL; @@ -267,5 +272,6 @@ int kbase_debug_csf_fault_init(struct kbase_device *kbdev) void kbase_debug_csf_fault_term(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); } #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h index 6e9b1a9d51de..73976ffc50b5 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -67,8 +67,8 @@ void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev); * Return: true if a dump on fault was initiated or was is in progress and * so caller can opt to wait for the dumping to complete. */ -bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, - struct kbase_context *kctx, enum dumpfault_error_type error); +bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, struct kbase_context *kctx, + enum dumpfault_error_type error); /** * kbase_debug_csf_fault_dump_enabled - Check if dump on fault is enabled. @@ -118,7 +118,8 @@ static inline void kbase_debug_csf_fault_wait_completion(struct kbase_device *kb } static inline bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, - struct kbase_context *kctx, enum dumpfault_error_type error) + struct kbase_context *kctx, + enum dumpfault_error_type error) { return false; } diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c index ff5f947e2da5..46ad63692a34 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -219,10 +219,16 @@ static int execute_op(struct kbase_device *kbdev, struct kbase_debug_coresight_c static int coresight_config_enable(struct kbase_device *kbdev, struct kbase_debug_coresight_csf_config *config) { + bool glb_init_request_pending; + unsigned long flags; int ret = 0; int i; - if (!config) + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + glb_init_request_pending = kbdev->csf.glb_init_request_pending; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!config || glb_init_request_pending) return -EINVAL; if (config->state == KBASE_DEBUG_CORESIGHT_CSF_ENABLED) @@ -249,10 +255,16 @@ static int coresight_config_enable(struct kbase_device *kbdev, static int coresight_config_disable(struct kbase_device *kbdev, struct kbase_debug_coresight_csf_config *config) { + bool glb_init_request_pending; + unsigned long flags; int ret = 0; int i; - if (!config) + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + glb_init_request_pending = kbdev->csf.glb_init_request_pending; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!config || glb_init_request_pending) return -EINVAL; if (config->state == KBASE_DEBUG_CORESIGHT_CSF_DISABLED) @@ -776,7 +788,7 @@ bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs, - link) { + link) { const enum kbase_debug_coresight_csf_state prev_state = config_entry->state; long remaining; @@ -836,13 +848,13 @@ void kbase_debug_coresight_csf_term(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs, - link) { + link) { list_del_init(&config_entry->link); kfree(config_entry); } list_for_each_entry_safe(client_entry, next_client_entry, &kbdev->csf.coresight.clients, - link) { + link) { list_del_init(&client_entry->link); kfree(client_entry); } diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h index 41b2b00f18c8..18520db15502 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,11 +39,11 @@ #if 0 /* Dummy section to avoid breaking formatting */ int dummy_array[] = { #endif - /* +/* * Generic CSF events */ - /* info_val = 0 */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START), +/* info_val = 0 */ +KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START), /* info_val == number of CSGs supported */ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_END), /* info_val[0:7] == fw version_minor @@ -209,13 +209,11 @@ int dummy_array[] = { KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END), /* info_val = scheduler state */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHED_BUSY), - KBASE_KTRACE_CODE_MAKE_CODE(SCHED_INACTIVE), - KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SUSPENDED), - KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SLEEPING), + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_BUSY), KBASE_KTRACE_CODE_MAKE_CODE(SCHED_INACTIVE), + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SUSPENDED), KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SLEEPING), - /* info_val = mcu state */ -#define KBASEP_MCU_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_MCU_ ## n), +/* info_val = mcu state */ +#define KBASEP_MCU_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_MCU_##n), #include "backend/gpu/mali_kbase_pm_mcu_states.h" #undef KBASEP_MCU_STATE @@ -237,8 +235,7 @@ int dummy_array[] = { /* info_val == queue->enabled */ KBASE_KTRACE_CODE_MAKE_CODE(CSI_START), /* info_val == queue->enabled before stop */ - KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP), - KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQ), + KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP), KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQ), /* info_val == CS_REQ ^ CS_ACK that were not processed due to the group * being suspended */ @@ -252,8 +249,7 @@ int dummy_array[] = { /* info_val == CS_ACK_PROTM_PEND ^ CS_REQ_PROTM_PEND */ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_ACK), /* info_val == group->run_State (for group the queue is bound to) */ - KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_START), - KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_STOP), + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_START), KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_STOP), /* info_val == contents of CS_STATUS_WAIT_SYNC_POINTER */ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_START), /* info_val == bool for result of the evaluation */ diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h index 6ba98b7c4a6c..14dcc265bc90 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,11 +40,11 @@ int dummy_array[] = { #endif - /* +/* * Job Slot management events */ - /* info_val==irq rawstat at start */ - KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ), +/* info_val==irq rawstat at start */ +KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ), /* info_val==jobs processed */ KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ_END), /* In the following: @@ -73,8 +73,7 @@ int dummy_array[] = { * JS_HEAD on submit * - otherwise gpu_addr==0 */ - KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP), - KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), + KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP), KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), /* gpu_addr==JS_HEAD read */ KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP), @@ -89,14 +88,12 @@ int dummy_array[] = { * - otherwise gpu_addr==0 */ /* gpu_addr==JS_HEAD read */ - KBASE_KTRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), - KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), + KBASE_KTRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), /* info_val == is_scheduled */ KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), /* info_val == is_scheduled */ - KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), - KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_DONE), + KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_DONE), /* info_val == nr jobs submitted */ KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), /* gpu_addr==JS_HEAD_NEXT last written */ @@ -203,4 +200,4 @@ int dummy_array[] = { }; #endif -/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ + /* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c index cff6f8959c35..46eb6db36c82 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,14 +27,14 @@ void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) { - *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), - "group,slot,prio,csi,kcpu"), 0); + *written += MAX( + snprintf(buffer + *written, MAX(sz - *written, 0), "group,slot,prio,csi,kcpu"), 0); } -void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, - char *buffer, int sz, s32 *written) +void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, char *buffer, int sz, + s32 *written) { - const union kbase_ktrace_backend * const be_msg = &trace_msg->backend; + const union kbase_ktrace_backend *const be_msg = &trace_msg->backend; /* At present, no need to check for KBASE_KTRACE_FLAG_BACKEND, as the * other backend-specific flags currently imply this anyway */ @@ -43,45 +43,38 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_GROUP) { const s8 slot = be_msg->gpu.csg_nr; /* group,slot, */ - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - "%u,%d,", be_msg->gpu.group_handle, slot), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), "%u,%d,", + be_msg->gpu.group_handle, slot), + 0); /* prio */ if (slot >= 0) - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - "%u", be_msg->gpu.slot_prio), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), "%u", + be_msg->gpu.slot_prio), + 0); /* , */ - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - ","), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ","), 0); } else { /* No group,slot,prio fields, but ensure ending with "," */ - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - ",,,"), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ",,,"), 0); } /* queue parts: csi */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_CSF_QUEUE) - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - "%d", be_msg->gpu.csi_index), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), "%d", + be_msg->gpu.csi_index), + 0); /* , */ - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - ","), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ","), 0); if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_KCPU) { /* kcpu data */ - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - "kcpu %d (0x%llx)", - be_msg->kcpu.id, - be_msg->kcpu.extra_info_val), 0); + *written += + MAX(snprintf(buffer + *written, MAX(sz - *written, 0), "kcpu %d (0x%llx)", + be_msg->kcpu.id, be_msg->kcpu.extra_info_val), + 0); } /* Don't end with a trailing "," - this is a 'standalone' formatted @@ -89,10 +82,9 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, */ } -void kbasep_ktrace_add_csf(struct kbase_device *kbdev, - enum kbase_ktrace_code code, struct kbase_queue_group *group, - struct kbase_queue *queue, kbase_ktrace_flag_t flags, - u64 info_val) +void kbasep_ktrace_add_csf(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_queue_group *group, struct kbase_queue *queue, + kbase_ktrace_flag_t flags, u64 info_val) { unsigned long irqflags; struct kbase_ktrace_msg *trace_msg; @@ -113,8 +105,7 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev, kctx = queue->kctx; /* Fill the common part of the message (including backend.gpu.flags) */ - kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, - info_val); + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, info_val); /* Indicate to the common code that backend-specific parts will be * valid @@ -140,11 +131,9 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev, trace_msg->backend.gpu.csg_nr = slot; if (slot >= 0) { - struct kbase_csf_csg_slot *csg_slot = - &kbdev->csf.scheduler.csg_slots[slot]; + struct kbase_csf_csg_slot *csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; - trace_msg->backend.gpu.slot_prio = - csg_slot->priority; + trace_msg->backend.gpu.slot_prio = csg_slot->priority; } /* slot >=0 indicates whether slot_prio valid, so no need to * initialize in the case where it's invalid @@ -159,10 +148,9 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev, spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); } -void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, - enum kbase_ktrace_code code, - struct kbase_kcpu_command_queue *queue, - u64 info_val1, u64 info_val2) +void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_kcpu_command_queue *queue, u64 info_val1, + u64 info_val2) { unsigned long irqflags; struct kbase_ktrace_msg *trace_msg; @@ -177,8 +165,7 @@ void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); /* Fill the common part of the message */ - kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, 0, - info_val1); + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, 0, info_val1); /* Indicate to the common code that backend-specific parts will be * valid diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h index e3d03730734d..f3739e9d4191 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,10 +38,9 @@ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF() instead. */ -void kbasep_ktrace_add_csf(struct kbase_device *kbdev, - enum kbase_ktrace_code code, struct kbase_queue_group *group, - struct kbase_queue *queue, kbase_ktrace_flag_t flags, - u64 info_val); +void kbasep_ktrace_add_csf(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_queue_group *group, struct kbase_queue *queue, + kbase_ktrace_flag_t flags, u64 info_val); /** * kbasep_ktrace_add_csf_kcpu - internal function to add trace about the CSF @@ -58,39 +57,36 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev, * * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF_KCPU() instead. */ -void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, - enum kbase_ktrace_code code, - struct kbase_kcpu_command_queue *queue, - u64 info_val1, u64 info_val2); +void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_kcpu_command_queue *queue, u64 info_val1, + u64 info_val2); #define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \ - kbasep_ktrace_add_csf(kbdev, KBASE_KTRACE_CODE(code), group, queue, \ - flags, info_val) + kbasep_ktrace_add_csf(kbdev, KBASE_KTRACE_CODE(code), group, queue, flags, info_val) -#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, \ - info_val2) kbasep_ktrace_add_csf_kcpu(kbdev, KBASE_KTRACE_CODE(code), \ - queue, info_val1, info_val2) +#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ + kbasep_ktrace_add_csf_kcpu(kbdev, KBASE_KTRACE_CODE(code), queue, info_val1, info_val2) #else /* KBASE_KTRACE_TARGET_RBUF */ #define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(group);\ - CSTD_UNUSED(queue);\ - CSTD_UNUSED(flags);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(group); \ + CSTD_UNUSED(queue); \ + CSTD_UNUSED(flags); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ } while (0) #define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(queue);\ - CSTD_UNUSED(info_val1);\ - CSTD_UNUSED(info_val2);\ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(queue); \ + CSTD_UNUSED(info_val1); \ + CSTD_UNUSED(info_val2); \ } while (0) #endif /* KBASE_KTRACE_TARGET_RBUF */ @@ -112,21 +108,21 @@ void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, #else /* KBASE_KTRACE_TARGET_FTRACE */ #define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(group);\ - CSTD_UNUSED(queue);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(group); \ + CSTD_UNUSED(queue); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ } while (0) #define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \ - do {\ - CSTD_NOP(code);\ - CSTD_UNUSED(queue);\ - CSTD_UNUSED(info_val1);\ - CSTD_UNUSED(info_val2);\ + do { \ + CSTD_NOP(code); \ + CSTD_UNUSED(queue); \ + CSTD_UNUSED(info_val1); \ + CSTD_UNUSED(info_val2); \ } while (0) #endif /* KBASE_KTRACE_TARGET_FTRACE */ @@ -149,15 +145,13 @@ void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, * a) be static or static inline, and * b) just return 0 and have no other statements present in the body. */ -#define KBASE_KTRACE_ADD_CSF_GRP(kbdev, code, group, info_val) \ - do { \ - /* capture values that could come from non-pure fn calls */ \ - struct kbase_queue_group *__group = group; \ - u64 __info_val = info_val; \ - KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, NULL, 0u, \ - __info_val); \ - KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, NULL, \ - __info_val); \ +#define KBASE_KTRACE_ADD_CSF_GRP(kbdev, code, group, info_val) \ + do { \ + /* capture values that could come from non-pure fn calls */ \ + struct kbase_queue_group *__group = group; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, NULL, 0u, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, NULL, __info_val); \ } while (0) /** @@ -175,29 +169,24 @@ void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, * a) be static or static inline, and * b) just return 0 and have no other statements present in the body. */ -#define KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, code, group, queue, info_val) \ - do { \ - /* capture values that could come from non-pure fn calls */ \ - struct kbase_queue_group *__group = group; \ - struct kbase_queue *__queue = queue; \ - u64 __info_val = info_val; \ - KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, __queue, 0u, \ - __info_val); \ - KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, \ - __queue, __info_val); \ +#define KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, code, group, queue, info_val) \ + do { \ + /* capture values that could come from non-pure fn calls */ \ + struct kbase_queue_group *__group = group; \ + struct kbase_queue *__queue = queue; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, __queue, 0u, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, __queue, __info_val); \ } while (0) - -#define KBASE_KTRACE_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ - do { \ - /* capture values that could come from non-pure fn calls */ \ - struct kbase_kcpu_command_queue *__queue = queue; \ - u64 __info_val1 = info_val1; \ - u64 __info_val2 = info_val2; \ - KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, __queue, \ - __info_val1, __info_val2); \ - KBASE_KTRACE_FTRACE_ADD_KCPU(code, __queue, \ - __info_val1, __info_val2); \ +#define KBASE_KTRACE_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ + do { \ + /* capture values that could come from non-pure fn calls */ \ + struct kbase_kcpu_command_queue *__queue = queue; \ + u64 __info_val1 = info_val1; \ + u64 __info_val2 = info_val2; \ + KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, __queue, __info_val1, __info_val2); \ + KBASE_KTRACE_FTRACE_ADD_KCPU(code, __queue, __info_val1, __info_val2); \ } while (0) #endif /* _KBASE_DEBUG_KTRACE_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h index 1896e10ed4ab..710f120c5671 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,18 +53,17 @@ #define KBASE_KTRACE_VERSION_MINOR 3 /* indicates if the trace message has valid queue-group related info. */ -#define KBASE_KTRACE_FLAG_CSF_GROUP (((kbase_ktrace_flag_t)1) << 0) +#define KBASE_KTRACE_FLAG_CSF_GROUP (((kbase_ktrace_flag_t)1) << 0) /* indicates if the trace message has valid queue related info. */ -#define KBASE_KTRACE_FLAG_CSF_QUEUE (((kbase_ktrace_flag_t)1) << 1) +#define KBASE_KTRACE_FLAG_CSF_QUEUE (((kbase_ktrace_flag_t)1) << 1) /* indicates if the trace message has valid KCPU-queue related info. */ -#define KBASE_KTRACE_FLAG_CSF_KCPU (((kbase_ktrace_flag_t)1) << 2) +#define KBASE_KTRACE_FLAG_CSF_KCPU (((kbase_ktrace_flag_t)1) << 2) /* Collect all the flags together for debug checking */ #define KBASE_KTRACE_FLAG_BACKEND_ALL \ - (KBASE_KTRACE_FLAG_CSF_GROUP | KBASE_KTRACE_FLAG_CSF_QUEUE | \ - KBASE_KTRACE_FLAG_CSF_KCPU) + (KBASE_KTRACE_FLAG_CSF_GROUP | KBASE_KTRACE_FLAG_CSF_QUEUE | KBASE_KTRACE_FLAG_CSF_KCPU) /** * union kbase_ktrace_backend - backend specific part of a trace message diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h index efa8ab05b1cd..c1f60dd16981 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -62,15 +62,14 @@ /* indicates if the trace message has a valid refcount member */ #define KBASE_KTRACE_FLAG_JM_REFCOUNT (((kbase_ktrace_flag_t)1) << 0) /* indicates if the trace message has a valid jobslot member */ -#define KBASE_KTRACE_FLAG_JM_JOBSLOT (((kbase_ktrace_flag_t)1) << 1) +#define KBASE_KTRACE_FLAG_JM_JOBSLOT (((kbase_ktrace_flag_t)1) << 1) /* indicates if the trace message has valid atom related info. */ -#define KBASE_KTRACE_FLAG_JM_ATOM (((kbase_ktrace_flag_t)1) << 2) +#define KBASE_KTRACE_FLAG_JM_ATOM (((kbase_ktrace_flag_t)1) << 2) #if KBASE_KTRACE_TARGET_RBUF /* Collect all the flags together for debug checking */ #define KBASE_KTRACE_FLAG_BACKEND_ALL \ - (KBASE_KTRACE_FLAG_JM_REFCOUNT | KBASE_KTRACE_FLAG_JM_JOBSLOT \ - | KBASE_KTRACE_FLAG_JM_ATOM) + (KBASE_KTRACE_FLAG_JM_REFCOUNT | KBASE_KTRACE_FLAG_JM_JOBSLOT | KBASE_KTRACE_FLAG_JM_ATOM) /** * union kbase_ktrace_backend - backend specific part of a trace message diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c index 6597a15e5000..8f95ca67f4d6 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,53 +28,48 @@ void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) { *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), - "katom,gpu_addr,jobslot,refcount"), 0); + "katom,gpu_addr,jobslot,refcount"), + 0); } -void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, - char *buffer, int sz, s32 *written) +void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, char *buffer, int sz, + s32 *written) { /* katom */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_ATOM) - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - "atom %d (ud: 0x%llx 0x%llx)", - trace_msg->backend.gpu.atom_number, - trace_msg->backend.gpu.atom_udata[0], - trace_msg->backend.gpu.atom_udata[1]), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), + "atom %d (ud: 0x%llx 0x%llx)", + trace_msg->backend.gpu.atom_number, + trace_msg->backend.gpu.atom_udata[0], + trace_msg->backend.gpu.atom_udata[1]), + 0); /* gpu_addr */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_BACKEND) - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - ",%.8llx,", trace_msg->backend.gpu.gpu_addr), + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ",%.8llx,", + trace_msg->backend.gpu.gpu_addr), 0); else - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - ",,"), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ",,"), 0); /* jobslot */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - "%d", trace_msg->backend.gpu.jobslot), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), "%d", + trace_msg->backend.gpu.jobslot), + 0); - *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), - ","), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ","), 0); /* refcount */ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) - *written += MAX(snprintf(buffer + *written, - MAX(sz - *written, 0), - "%d", trace_msg->backend.gpu.refcount), 0); + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), "%d", + trace_msg->backend.gpu.refcount), + 0); } -void kbasep_ktrace_add_jm(struct kbase_device *kbdev, - enum kbase_ktrace_code code, - struct kbase_context *kctx, - const struct kbase_jd_atom *katom, u64 gpu_addr, - kbase_ktrace_flag_t flags, int refcount, int jobslot, +void kbasep_ktrace_add_jm(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_context *kctx, const struct kbase_jd_atom *katom, + u64 gpu_addr, kbase_ktrace_flag_t flags, int refcount, int jobslot, u64 info_val) { unsigned long irqflags; @@ -89,8 +84,7 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); /* Fill the common part of the message (including backend.gpu.flags) */ - kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, - info_val); + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, info_val); /* Indicate to the common code that backend-specific parts will be * valid @@ -101,8 +95,7 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, if (katom) { trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_JM_ATOM; - trace_msg->backend.gpu.atom_number = - kbase_jd_atom_id(katom->kctx, katom); + trace_msg->backend.gpu.atom_number = kbase_jd_atom_id(katom->kctx, katom); trace_msg->backend.gpu.atom_udata[0] = katom->udata.blob[0]; trace_msg->backend.gpu.atom_udata[1] = katom->udata.blob[1]; } diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h index 41be7a120dcc..b91176deac26 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,33 +40,31 @@ * * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_JM() instead. */ -void kbasep_ktrace_add_jm(struct kbase_device *kbdev, - enum kbase_ktrace_code code, - struct kbase_context *kctx, - const struct kbase_jd_atom *katom, u64 gpu_addr, - kbase_ktrace_flag_t flags, int refcount, int jobslot, +void kbasep_ktrace_add_jm(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_context *kctx, const struct kbase_jd_atom *katom, + u64 gpu_addr, kbase_ktrace_flag_t flags, int refcount, int jobslot, u64 info_val); -#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \ - refcount, jobslot, info_val) \ - kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ - gpu_addr, flags, refcount, jobslot, info_val) +#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, refcount, jobslot, \ + info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, gpu_addr, flags, \ + refcount, jobslot, info_val) #else /* KBASE_KTRACE_TARGET_RBUF */ -#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \ - refcount, jobslot, info_val) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(kctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(flags);\ - CSTD_UNUSED(refcount);\ - CSTD_UNUSED(jobslot);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ +#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, refcount, jobslot, \ + info_val) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(katom); \ + CSTD_UNUSED(gpu_addr); \ + CSTD_UNUSED(flags); \ + CSTD_UNUSED(refcount); \ + CSTD_UNUSED(jobslot); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ } while (0) #endif /* KBASE_KTRACE_TARGET_RBUF */ @@ -77,85 +75,79 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, * included by the parent header file */ #if KBASE_KTRACE_TARGET_FTRACE -#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ - jobslot) \ +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, jobslot) \ trace_mali_##code(kctx, jobslot, 0) -#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ - gpu_addr, jobslot, info_val) \ +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, jobslot, \ + info_val) \ trace_mali_##code(kctx, jobslot, info_val) -#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ - gpu_addr, refcount) \ +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, refcount) \ trace_mali_##code(kctx, refcount, 0) -#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ - gpu_addr, refcount, info_val) \ +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, gpu_addr, refcount, \ + info_val) \ trace_mali_##code(kctx, refcount, info_val) -#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ - info_val) \ +#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ trace_mali_##code(kctx, gpu_addr, info_val) #else /* KBASE_KTRACE_TARGET_FTRACE */ -#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ - jobslot) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(kctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(jobslot);\ - CSTD_NOP(0);\ +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, jobslot) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(katom); \ + CSTD_UNUSED(gpu_addr); \ + CSTD_UNUSED(jobslot); \ + CSTD_NOP(0); \ } while (0) -#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ - gpu_addr, jobslot, info_val) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(kctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(jobslot);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, jobslot, \ + info_val) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(katom); \ + CSTD_UNUSED(gpu_addr); \ + CSTD_UNUSED(jobslot); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ } while (0) -#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ - gpu_addr, refcount) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(kctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(refcount);\ - CSTD_NOP(0);\ +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, refcount) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(katom); \ + CSTD_UNUSED(gpu_addr); \ + CSTD_UNUSED(refcount); \ + CSTD_NOP(0); \ } while (0) -#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ - gpu_addr, refcount, info_val) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(kctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, gpu_addr, refcount, \ + info_val) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(katom); \ + CSTD_UNUSED(gpu_addr); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ } while (0) -#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ - info_val)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(kctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ +#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(katom); \ + CSTD_UNUSED(gpu_addr); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ } while (0) #endif /* KBASE_KTRACE_TARGET_FTRACE */ @@ -179,15 +171,13 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, * a) be static or static inline, and * b) just return 0 and have no other statements present in the body. */ -#define KBASE_KTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ - jobslot) \ - do { \ - /* capture values that could come from non-pure function calls */ \ - u64 __gpu_addr = gpu_addr; \ - int __jobslot = jobslot; \ - KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ - KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, \ - 0); \ +#define KBASE_KTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, jobslot) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __jobslot = jobslot; \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ + KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, 0); \ KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ } while (0) @@ -208,17 +198,16 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, * a) be static or static inline, and * b) just return 0 and have no other statements present in the body. */ -#define KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ - jobslot, info_val) \ - do { \ - /* capture values that could come from non-pure function calls */ \ - u64 __gpu_addr = gpu_addr; \ - int __jobslot = jobslot; \ - u64 __info_val = info_val; \ - KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ - KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, \ - __info_val); \ - KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ +#define KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, jobslot, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __jobslot = jobslot; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ + KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, \ + __jobslot, __info_val); \ } while (0) /** @@ -237,16 +226,15 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, * a) be static or static inline, and * b) just return 0 and have no other statements present in the body. */ -#define KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ - refcount) \ - do { \ - /* capture values that could come from non-pure function calls */ \ - u64 __gpu_addr = gpu_addr; \ - int __refcount = refcount; \ - KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ - KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ - 0u); \ - KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ +#define KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, refcount) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __refcount = refcount; \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ + KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, 0u); \ + KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, \ + __refcount); \ } while (0) /** @@ -267,17 +255,17 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, * a) be static or static inline, and * b) just return 0 and have no other statements present in the body. */ -#define KBASE_KTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ - gpu_addr, refcount, info_val) \ - do { \ - /* capture values that could come from non-pure function calls */ \ - u64 __gpu_addr = gpu_addr; \ - int __refcount = refcount; \ - u64 __info_val = info_val; \ - KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ - KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ - __info_val); \ - KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ +#define KBASE_KTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, gpu_addr, refcount, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __refcount = refcount; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ + KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ + __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, \ + __refcount, __info_val); \ } while (0) /** @@ -296,13 +284,13 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, * a) be static or static inline, and * b) just return 0 and have no other statements present in the body. */ -#define KBASE_KTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ - do { \ - /* capture values that could come from non-pure function calls */ \ - u64 __gpu_addr = gpu_addr; \ - u64 __info_val = info_val; \ - KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ - 0u, 0, 0, __info_val); \ +#define KBASE_KTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, 0u, 0, 0, \ + __info_val); \ KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ } while (0) diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h index ddcac906c492..0b0de2385f85 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -69,77 +69,69 @@ DEFINE_MALI_ADD_EVENT(SCHED_BUSY); DEFINE_MALI_ADD_EVENT(SCHED_INACTIVE); DEFINE_MALI_ADD_EVENT(SCHED_SUSPENDED); DEFINE_MALI_ADD_EVENT(SCHED_SLEEPING); -#define KBASEP_MCU_STATE(n) DEFINE_MALI_ADD_EVENT(PM_MCU_ ## n); +#define KBASEP_MCU_STATE(n) DEFINE_MALI_ADD_EVENT(PM_MCU_##n); #include "backend/gpu/mali_kbase_pm_mcu_states.h" #undef KBASEP_MCU_STATE -DECLARE_EVENT_CLASS(mali_csf_grp_q_template, +DECLARE_EVENT_CLASS( + mali_csf_grp_q_template, TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, - struct kbase_queue *queue, u64 info_val), + struct kbase_queue *queue, u64 info_val), TP_ARGS(kbdev, group, queue, info_val), - TP_STRUCT__entry( - __field(u64, info_val) - __field(pid_t, kctx_tgid) - __field(u32, kctx_id) - __field(u8, group_handle) - __field(s8, csg_nr) - __field(u8, slot_prio) - __field(s8, csi_index) - ), - TP_fast_assign( - { - struct kbase_context *kctx = NULL; + TP_STRUCT__entry(__field(u64, info_val) __field(pid_t, kctx_tgid) __field(u32, kctx_id) + __field(u8, group_handle) __field(s8, csg_nr) + __field(u8, slot_prio) __field(s8, csi_index)), + TP_fast_assign({ + struct kbase_context *kctx = NULL; - __entry->info_val = info_val; - /* Note: if required in future, we could record some + __entry->info_val = info_val; + /* Note: if required in future, we could record some * flags in __entry about whether the group/queue parts * are valid, and add that to the trace message e.g. * by using __print_flags()/__print_symbolic() */ - if (queue) { - /* Note: kctx overridden by group->kctx later if group is valid */ - kctx = queue->kctx; - __entry->csi_index = queue->csi_index; - } else { - __entry->csi_index = -1; - } - - if (group) { - kctx = group->kctx; - __entry->group_handle = group->handle; - __entry->csg_nr = group->csg_nr; - if (group->csg_nr >= 0) - __entry->slot_prio = kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; - else - __entry->slot_prio = 0u; - } else { - __entry->group_handle = 0u; - __entry->csg_nr = -1; - __entry->slot_prio = 0u; - } - __entry->kctx_id = (kctx) ? kctx->id : 0u; - __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + if (queue) { + /* Note: kctx overridden by group->kctx later if group is valid */ + kctx = queue->kctx; + __entry->csi_index = queue->csi_index; + } else { + __entry->csi_index = -1; } - ), - TP_printk("kctx=%d_%u group=%u slot=%d prio=%u csi=%d info=0x%llx", - __entry->kctx_tgid, __entry->kctx_id, - __entry->group_handle, __entry->csg_nr, - __entry->slot_prio, __entry->csi_index, - __entry->info_val) -); + if (group) { + kctx = group->kctx; + __entry->group_handle = group->handle; + __entry->csg_nr = group->csg_nr; + if (group->csg_nr >= 0) + __entry->slot_prio = + kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; + else + __entry->slot_prio = 0u; + } else { + __entry->group_handle = 0u; + __entry->csg_nr = -1; + __entry->slot_prio = 0u; + } + __entry->kctx_id = (kctx) ? kctx->id : 0u; + __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + } + + ), + TP_printk("kctx=%d_%u group=%u slot=%d prio=%u csi=%d info=0x%llx", __entry->kctx_tgid, + __entry->kctx_id, __entry->group_handle, __entry->csg_nr, __entry->slot_prio, + __entry->csi_index, __entry->info_val)); /* * Group events */ -#define DEFINE_MALI_CSF_GRP_EVENT(name) \ - DEFINE_EVENT_PRINT(mali_csf_grp_q_template, mali_##name, \ - TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ - struct kbase_queue *queue, u64 info_val), \ - TP_ARGS(kbdev, group, queue, info_val), \ - TP_printk("kctx=%d_%u group=%u slot=%d prio=%u info=0x%llx", \ - __entry->kctx_tgid, __entry->kctx_id, __entry->group_handle, \ - __entry->csg_nr, __entry->slot_prio, __entry->info_val)) +#define DEFINE_MALI_CSF_GRP_EVENT(name) \ + DEFINE_EVENT_PRINT(mali_csf_grp_q_template, mali_##name, \ + TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ + struct kbase_queue *queue, u64 info_val), \ + TP_ARGS(kbdev, group, queue, info_val), \ + TP_printk("kctx=%d_%u group=%u slot=%d prio=%u info=0x%llx", \ + __entry->kctx_tgid, __entry->kctx_id, __entry->group_handle, \ + __entry->csg_nr, __entry->slot_prio, __entry->info_val)) DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START_REQ); DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP_REQ); @@ -191,11 +183,11 @@ DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_TERMINATED); /* * Group + Queue events */ -#define DEFINE_MALI_CSF_GRP_Q_EVENT(name) \ - DEFINE_EVENT(mali_csf_grp_q_template, mali_##name, \ - TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ - struct kbase_queue *queue, u64 info_val), \ - TP_ARGS(kbdev, group, queue, info_val)) +#define DEFINE_MALI_CSF_GRP_Q_EVENT(name) \ + DEFINE_EVENT(mali_csf_grp_q_template, mali_##name, \ + TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ + struct kbase_queue *queue, u64 info_val), \ + TP_ARGS(kbdev, group, queue, info_val)) DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_START); DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP); @@ -222,36 +214,29 @@ DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_CLEAR); * KCPU queue events */ DECLARE_EVENT_CLASS(mali_csf_kcpu_queue_template, - TP_PROTO(struct kbase_kcpu_command_queue *queue, - u64 info_val1, u64 info_val2), - TP_ARGS(queue, info_val1, info_val2), - TP_STRUCT__entry( - __field(u64, info_val1) - __field(u64, info_val2) - __field(pid_t, kctx_tgid) - __field(u32, kctx_id) - __field(u8, id) - ), - TP_fast_assign( - { - __entry->info_val1 = info_val1; - __entry->info_val2 = info_val2; - __entry->kctx_id = queue->kctx->id; - __entry->kctx_tgid = queue->kctx->tgid; - __entry->id = queue->id; - } + TP_PROTO(struct kbase_kcpu_command_queue *queue, u64 info_val1, u64 info_val2), + TP_ARGS(queue, info_val1, info_val2), + TP_STRUCT__entry(__field(u64, info_val1) __field(u64, info_val2) + __field(pid_t, kctx_tgid) __field(u32, kctx_id) + __field(u8, id)), + TP_fast_assign({ + __entry->info_val1 = info_val1; + __entry->info_val2 = info_val2; + __entry->kctx_id = queue->kctx->id; + __entry->kctx_tgid = queue->kctx->tgid; + __entry->id = queue->id; + } - ), - TP_printk("kctx=%d_%u id=%u info_val1=0x%llx info_val2=0x%llx", - __entry->kctx_tgid, __entry->kctx_id, __entry->id, - __entry->info_val1, __entry->info_val2) -); + ), + TP_printk("kctx=%d_%u id=%u info_val1=0x%llx info_val2=0x%llx", + __entry->kctx_tgid, __entry->kctx_id, __entry->id, __entry->info_val1, + __entry->info_val2)); -#define DEFINE_MALI_CSF_KCPU_EVENT(name) \ - DEFINE_EVENT(mali_csf_kcpu_queue_template, mali_##name, \ - TP_PROTO(struct kbase_kcpu_command_queue *queue, \ - u64 info_val1, u64 info_val2), \ - TP_ARGS(queue, info_val1, info_val2)) +#define DEFINE_MALI_CSF_KCPU_EVENT(name) \ + DEFINE_EVENT(mali_csf_kcpu_queue_template, mali_##name, \ + TP_PROTO(struct kbase_kcpu_command_queue *queue, u64 info_val1, \ + u64 info_val2), \ + TP_ARGS(queue, info_val1, info_val2)) DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_CREATE); DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DELETE); diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h index 8fa4e2a7c10b..fd62bae9e0a9 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,28 +28,20 @@ #define _KBASE_DEBUG_LINUX_KTRACE_JM_H_ DECLARE_EVENT_CLASS(mali_jm_slot_template, - TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), - TP_ARGS(kctx, jobslot, info_val), - TP_STRUCT__entry( - __field(pid_t, kctx_tgid) - __field(u32, kctx_id) - __field(unsigned int, jobslot) - __field(u64, info_val) - ), - TP_fast_assign( - __entry->kctx_id = (kctx) ? kctx->id : 0u; - __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; - __entry->jobslot = jobslot; - __entry->info_val = info_val; - ), - TP_printk("kctx=%d_%u jobslot=%u info=0x%llx", __entry->kctx_tgid, - __entry->kctx_id, __entry->jobslot, __entry->info_val) -); + TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), + TP_ARGS(kctx, jobslot, info_val), + TP_STRUCT__entry(__field(pid_t, kctx_tgid) __field(u32, kctx_id) + __field(unsigned int, jobslot) __field(u64, info_val)), + TP_fast_assign(__entry->kctx_id = (kctx) ? kctx->id : 0u; + __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + __entry->jobslot = jobslot; __entry->info_val = info_val;), + TP_printk("kctx=%d_%u jobslot=%u info=0x%llx", __entry->kctx_tgid, + __entry->kctx_id, __entry->jobslot, __entry->info_val)); -#define DEFINE_MALI_JM_SLOT_EVENT(name) \ -DEFINE_EVENT(mali_jm_slot_template, mali_##name, \ - TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), \ - TP_ARGS(kctx, jobslot, info_val)) +#define DEFINE_MALI_JM_SLOT_EVENT(name) \ + DEFINE_EVENT(mali_jm_slot_template, mali_##name, \ + TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), \ + TP_ARGS(kctx, jobslot, info_val)) DEFINE_MALI_JM_SLOT_EVENT(JM_RETURN_ATOM_TO_JS); DEFINE_MALI_JM_SLOT_EVENT(JM_MARK_FOR_RETURN_TO_JS); DEFINE_MALI_JM_SLOT_EVENT(JM_SUBMIT); @@ -86,28 +78,21 @@ DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_IS_BLOCKED); #undef DEFINE_MALI_JM_SLOT_EVENT DECLARE_EVENT_CLASS(mali_jm_refcount_template, - TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), - TP_ARGS(kctx, refcount, info_val), - TP_STRUCT__entry( - __field(pid_t, kctx_tgid) - __field(u32, kctx_id) - __field(unsigned int, refcount) - __field(u64, info_val) - ), - TP_fast_assign( - __entry->kctx_id = (kctx) ? kctx->id : 0u; - __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; - __entry->refcount = refcount; - __entry->info_val = info_val; - ), - TP_printk("kctx=%d_%u refcount=%u info=0x%llx", __entry->kctx_tgid, - __entry->kctx_id, __entry->refcount, __entry->info_val) -); + TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), + TP_ARGS(kctx, refcount, info_val), + TP_STRUCT__entry(__field(pid_t, kctx_tgid) __field(u32, kctx_id) + __field(unsigned int, refcount) + __field(u64, info_val)), + TP_fast_assign(__entry->kctx_id = (kctx) ? kctx->id : 0u; + __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + __entry->refcount = refcount; __entry->info_val = info_val;), + TP_printk("kctx=%d_%u refcount=%u info=0x%llx", __entry->kctx_tgid, + __entry->kctx_id, __entry->refcount, __entry->info_val)); -#define DEFINE_MALI_JM_REFCOUNT_EVENT(name) \ -DEFINE_EVENT(mali_jm_refcount_template, mali_##name, \ - TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), \ - TP_ARGS(kctx, refcount, info_val)) +#define DEFINE_MALI_JM_REFCOUNT_EVENT(name) \ + DEFINE_EVENT(mali_jm_refcount_template, mali_##name, \ + TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), \ + TP_ARGS(kctx, refcount, info_val)) DEFINE_MALI_JM_REFCOUNT_EVENT(JS_ADD_JOB); DEFINE_MALI_JM_REFCOUNT_EVENT(JS_REMOVE_JOB); DEFINE_MALI_JM_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); @@ -122,28 +107,20 @@ DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); #undef DEFINE_MALI_JM_REFCOUNT_EVENT DECLARE_EVENT_CLASS(mali_jm_add_template, - TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), - TP_ARGS(kctx, gpu_addr, info_val), - TP_STRUCT__entry( - __field(pid_t, kctx_tgid) - __field(u32, kctx_id) - __field(u64, gpu_addr) - __field(u64, info_val) - ), - TP_fast_assign( - __entry->kctx_id = (kctx) ? kctx->id : 0u; - __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; - __entry->gpu_addr = gpu_addr; - __entry->info_val = info_val; - ), - TP_printk("kctx=%d_%u gpu_addr=0x%llx info=0x%llx", __entry->kctx_tgid, - __entry->kctx_id, __entry->gpu_addr, __entry->info_val) -); + TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), + TP_ARGS(kctx, gpu_addr, info_val), + TP_STRUCT__entry(__field(pid_t, kctx_tgid) __field(u32, kctx_id) + __field(u64, gpu_addr) __field(u64, info_val)), + TP_fast_assign(__entry->kctx_id = (kctx) ? kctx->id : 0u; + __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + __entry->gpu_addr = gpu_addr; __entry->info_val = info_val;), + TP_printk("kctx=%d_%u gpu_addr=0x%llx info=0x%llx", __entry->kctx_tgid, + __entry->kctx_id, __entry->gpu_addr, __entry->info_val)); -#define DEFINE_MALI_JM_ADD_EVENT(name) \ -DEFINE_EVENT(mali_jm_add_template, mali_##name, \ - TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), \ - TP_ARGS(kctx, gpu_addr, info_val)) +#define DEFINE_MALI_JM_ADD_EVENT(name) \ + DEFINE_EVENT(mali_jm_add_template, mali_##name, \ + TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), \ + TP_ARGS(kctx, gpu_addr, info_val)) DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER); DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER_END); DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL_WORKER); diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c index f521b47120fb..12a722765c5e 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,6 +34,8 @@ int kbase_ktrace_init(struct kbase_device *kbdev) return -EINVAL; kbdev->ktrace.rbuf = rbuf; +#else + CSTD_UNUSED(kbdev); #endif /* KBASE_KTRACE_TARGET_RBUF */ return 0; } @@ -43,6 +45,8 @@ void kbase_ktrace_term(struct kbase_device *kbdev) #if KBASE_KTRACE_TARGET_RBUF kfree(kbdev->ktrace.rbuf); kbdev->ktrace.rbuf = NULL; +#else + CSTD_UNUSED(kbdev); #endif /* KBASE_KTRACE_TARGET_RBUF */ } @@ -55,33 +59,33 @@ void kbase_ktrace_hook_wrapper(void *param) #if KBASE_KTRACE_TARGET_RBUF -static const char * const kbasep_ktrace_code_string[] = { - /* +static const char *const kbasep_ktrace_code_string[] = { +/* * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE * THIS MUST BE USED AT THE START OF THE ARRAY */ -#define KBASE_KTRACE_CODE_MAKE_CODE(X) # X +#define KBASE_KTRACE_CODE_MAKE_CODE(X) #X #include "debug/mali_kbase_debug_ktrace_codes.h" -#undef KBASE_KTRACE_CODE_MAKE_CODE +#undef KBASE_KTRACE_CODE_MAKE_CODE }; static void kbasep_ktrace_format_header(char *buffer, int sz, s32 written) { written += MAX(snprintf(buffer + written, MAX(sz - written, 0), - "secs,thread_id,cpu,code,kctx,"), 0); + "secs,thread_id,cpu,code,kctx,"), + 0); kbasep_ktrace_backend_format_header(buffer, sz, &written); written += MAX(snprintf(buffer + written, MAX(sz - written, 0), - ",info_val,ktrace_version=%u.%u", - KBASE_KTRACE_VERSION_MAJOR, - KBASE_KTRACE_VERSION_MINOR), 0); + ",info_val,ktrace_version=%u.%u", KBASE_KTRACE_VERSION_MAJOR, + KBASE_KTRACE_VERSION_MINOR), + 0); buffer[sz - 1] = 0; } -static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, - char *buffer, int sz) +static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, char *buffer, int sz) { s32 written = 0; @@ -89,27 +93,24 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, * * secs,thread_id,cpu,code, */ - written += MAX(snprintf(buffer + written, MAX(sz - written, 0), - "%d.%.6d,%d,%d,%s,", - (int)trace_msg->timestamp.tv_sec, - (int)(trace_msg->timestamp.tv_nsec / 1000), - trace_msg->thread_id, trace_msg->cpu, - kbasep_ktrace_code_string[trace_msg->backend.gpu.code]), - 0); + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), "%d.%.6d,%d,%d,%s,", + (int)trace_msg->timestamp.tv_sec, + (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, + trace_msg->cpu, + kbasep_ktrace_code_string[trace_msg->backend.gpu.code]), + 0); /* kctx part: */ if (trace_msg->kctx_tgid) { - written += MAX(snprintf(buffer + written, MAX(sz - written, 0), - "%d_%u", - trace_msg->kctx_tgid, trace_msg->kctx_id), 0); + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), "%d_%u", + trace_msg->kctx_tgid, trace_msg->kctx_id), + 0); } /* Trailing comma */ - written += MAX(snprintf(buffer + written, MAX(sz - written, 0), - ","), 0); + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), ","), 0); /* Backend parts */ - kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, - &written); + kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, &written); /* Rest of message: * @@ -118,14 +119,13 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, * Note that the last column is empty, it's simply to hold the ktrace * version in the header */ - written += MAX(snprintf(buffer + written, MAX(sz - written, 0), - ",0x%.16llx", - (unsigned long long)trace_msg->info_val), 0); + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), ",0x%.16llx", + (unsigned long long)trace_msg->info_val), + 0); buffer[sz - 1] = 0; } -static void kbasep_ktrace_dump_msg(struct kbase_device *kbdev, - struct kbase_ktrace_msg *trace_msg) +static void kbasep_ktrace_dump_msg(struct kbase_device *kbdev, struct kbase_ktrace_msg *trace_msg) { char buffer[KTRACE_DUMP_MESSAGE_SIZE]; @@ -150,10 +150,9 @@ struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace) return trace_msg; } -void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, - struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, - struct kbase_context *kctx, kbase_ktrace_flag_t flags, - u64 info_val) +void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, struct kbase_ktrace_msg *trace_msg, + enum kbase_ktrace_code code, struct kbase_context *kctx, + kbase_ktrace_flag_t flags, u64 info_val) { lockdep_assert_held(&ktrace->lock); @@ -178,8 +177,7 @@ void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, } void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, - struct kbase_context *kctx, kbase_ktrace_flag_t flags, - u64 info_val) + struct kbase_context *kctx, kbase_ktrace_flag_t flags, u64 info_val) { unsigned long irqflags; struct kbase_ktrace_msg *trace_msg; @@ -195,8 +193,7 @@ void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); /* Fill the common part of the message (including backend.gpu.flags) */ - kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, - info_val); + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, info_val); /* Done */ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); @@ -254,7 +251,7 @@ struct trace_seq_state { static void *kbasep_ktrace_seq_start(struct seq_file *s, loff_t *pos) { struct trace_seq_state *state = s->private; - int i; + unsigned int i; if (*pos == 0) /* See Documentation/filesystems/seq_file.txt */ @@ -263,8 +260,7 @@ static void *kbasep_ktrace_seq_start(struct seq_file *s, loff_t *pos) if (*pos > KBASE_KTRACE_SIZE) return NULL; i = state->start + *pos; - if ((state->end >= state->start && i >= state->end) || - i >= state->end + KBASE_KTRACE_SIZE) + if ((state->end >= state->start && i >= state->end) || i >= state->end + KBASE_KTRACE_SIZE) return NULL; i &= KBASE_KTRACE_MASK; @@ -274,12 +270,14 @@ static void *kbasep_ktrace_seq_start(struct seq_file *s, loff_t *pos) static void kbasep_ktrace_seq_stop(struct seq_file *s, void *data) { + CSTD_UNUSED(s); + CSTD_UNUSED(data); } static void *kbasep_ktrace_seq_next(struct seq_file *s, void *data, loff_t *pos) { struct trace_seq_state *state = s->private; - int i; + unsigned int i; if (data != SEQ_START_TOKEN) (*pos)++; @@ -320,8 +318,7 @@ static int kbasep_ktrace_debugfs_open(struct inode *inode, struct file *file) struct trace_seq_state *state; - state = __seq_open_private(file, &kbasep_ktrace_seq_ops, - sizeof(*state)); + state = __seq_open_private(file, &kbasep_ktrace_seq_ops, sizeof(*state)); if (!state) return -ENOMEM; @@ -344,9 +341,8 @@ static const struct file_operations kbasep_ktrace_debugfs_fops = { void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) { - debugfs_create_file("mali_trace", 0444, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_ktrace_debugfs_fops); + debugfs_create_file("mali_trace", 0444, kbdev->mali_debugfs_directory, kbdev, + &kbasep_ktrace_debugfs_fops); } #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h index 11f0b5c42c89..688436ec98ec 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -104,8 +104,7 @@ static inline bool kbasep_ktrace_initialized(struct kbase_ktrace *ktrace) * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD() instead. */ void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, - struct kbase_context *kctx, kbase_ktrace_flag_t flags, - u64 info_val); + struct kbase_context *kctx, kbase_ktrace_flag_t flags, u64 info_val); /** * kbasep_ktrace_clear - clear the trace ringbuffer @@ -123,36 +122,33 @@ void kbasep_ktrace_clear(struct kbase_device *kbdev); */ void kbasep_ktrace_dump(struct kbase_device *kbdev); -#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ - kbasep_ktrace_add(kbdev, KBASE_KTRACE_CODE(code), kctx, 0, \ - info_val) \ +#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ + kbasep_ktrace_add(kbdev, KBASE_KTRACE_CODE(code), kctx, 0, info_val) -#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ - kbasep_ktrace_clear(kbdev) +#define KBASE_KTRACE_RBUF_CLEAR(kbdev) kbasep_ktrace_clear(kbdev) -#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ - kbasep_ktrace_dump(kbdev) +#define KBASE_KTRACE_RBUF_DUMP(kbdev) kbasep_ktrace_dump(kbdev) #else /* KBASE_KTRACE_TARGET_RBUF */ #define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ - do { \ - CSTD_UNUSED(kbdev); \ - CSTD_NOP(code); \ - CSTD_UNUSED(kctx); \ - CSTD_UNUSED(info_val); \ - CSTD_NOP(0); \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ } while (0) #define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ - do { \ - CSTD_UNUSED(kbdev); \ - CSTD_NOP(0); \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ } while (0) #define KBASE_KTRACE_RBUF_DUMP(kbdev) \ - do { \ - CSTD_UNUSED(kbdev); \ - CSTD_NOP(0); \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ } while (0) #endif /* KBASE_KTRACE_TARGET_RBUF */ @@ -161,32 +157,31 @@ void kbasep_ktrace_dump(struct kbase_device *kbdev); */ #if KBASE_KTRACE_TARGET_FTRACE -#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ - trace_mali_##code(kctx, info_val) +#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) trace_mali_##code(kctx, info_val) #else /* KBASE_KTRACE_TARGET_FTRACE */ #define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ - do { \ - CSTD_UNUSED(kbdev); \ - CSTD_NOP(code); \ - CSTD_UNUSED(kctx); \ - CSTD_UNUSED(info_val); \ - CSTD_NOP(0); \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ } while (0) #endif /* KBASE_KTRACE_TARGET_FTRACE */ /* No 'clear' implementation for ftrace yet */ #define KBASE_KTRACE_FTRACE_CLEAR(kbdev) \ - do { \ - CSTD_UNUSED(kbdev); \ - CSTD_NOP(0); \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ } while (0) /* No 'dump' implementation for ftrace yet */ #define KBASE_KTRACE_FTRACE_DUMP(kbdev) \ - do { \ - CSTD_UNUSED(kbdev); \ - CSTD_NOP(0); \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ } while (0) /* @@ -207,21 +202,21 @@ void kbasep_ktrace_dump(struct kbase_device *kbdev); * a) be static or static inline, and * b) just return 0 and have no other statements present in the body. */ -#define KBASE_KTRACE_ADD(kbdev, code, kctx, info_val) \ - do { \ +#define KBASE_KTRACE_ADD(kbdev, code, kctx, info_val) \ + do { \ /* capture values that could come from non-pure function calls */ \ - u64 __info_val = info_val; \ - KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, __info_val); \ - KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, __info_val); \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, __info_val); \ + KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, __info_val); \ } while (0) /** * KBASE_KTRACE_CLEAR - Clear the trace, if applicable to the target(s) * @kbdev: kbase device */ -#define KBASE_KTRACE_CLEAR(kbdev) \ - do { \ - KBASE_KTRACE_RBUF_CLEAR(kbdev); \ +#define KBASE_KTRACE_CLEAR(kbdev) \ + do { \ + KBASE_KTRACE_RBUF_CLEAR(kbdev); \ KBASE_KTRACE_FTRACE_CLEAR(kbdev); \ } while (0) @@ -229,9 +224,9 @@ void kbasep_ktrace_dump(struct kbase_device *kbdev); * KBASE_KTRACE_DUMP - Dump the trace, if applicable to the target(s) * @kbdev: kbase device */ -#define KBASE_KTRACE_DUMP(kbdev) \ - do { \ - KBASE_KTRACE_RBUF_DUMP(kbdev); \ +#define KBASE_KTRACE_DUMP(kbdev) \ + do { \ + KBASE_KTRACE_RBUF_DUMP(kbdev); \ KBASE_KTRACE_FTRACE_DUMP(kbdev); \ } while (0) diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h index 6103c3ee04a8..991f70fe8540 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2015, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,7 +42,7 @@ * strings * * Before #including, the includer MUST #define KBASE_KTRACE_CODE_MAKE_CODE. - * After #including, the includer MUST #under KBASE_KTRACE_CODE_MAKE_CODE. + * After #including, the includer MUST #undef KBASE_KTRACE_CODE_MAKE_CODE. * * e.g.: * #define KBASE_KTRACE_CODE( X ) KBASE_KTRACE_CODE_ ## X @@ -76,11 +76,11 @@ int dummy_array[] = { #endif - /* +/* * Core events */ - /* no info_val */ - KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), +/* no info_val */ +KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), /* no info_val */ KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), /* info_val == GPU_IRQ_STATUS register */ @@ -101,12 +101,9 @@ int dummy_array[] = { */ KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), - KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON), - KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_TILER), - KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_L2), - KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF), - KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), - KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON), KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_L2), KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_L2), KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED), KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), @@ -127,12 +124,10 @@ int dummy_array[] = { /* info_val == kbdev->pm.active_count*/ KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), /* info_val == kbdev->pm.active_count*/ - KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), - KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_ON), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_ON), KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_OFF), /* info_val == policy number, or -1 for "Already changing" */ - KBASE_KTRACE_CODE_MAKE_CODE(PM_SET_POLICY), - KBASE_KTRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), + KBASE_KTRACE_CODE_MAKE_CODE(PM_SET_POLICY), KBASE_KTRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), /* info_val == policy number */ KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), /* info_val == policy number */ @@ -142,8 +137,8 @@ int dummy_array[] = { KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_SUSPEND_CALLBACK), KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_RESUME_CALLBACK), - /* info_val = l2 state */ -#define KBASEP_L2_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_L2_ ## n), +/* info_val = l2 state */ +#define KBASEP_L2_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_L2_##n), #include "backend/gpu/mali_kbase_pm_l2_states.h" #undef KBASEP_L2_STATE @@ -158,8 +153,7 @@ int dummy_array[] = { /* * Arbitration events */ - KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST), - KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE), + KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST), KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE), KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_EVT), #endif @@ -178,4 +172,4 @@ int dummy_array[] = { }; #endif -/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ + /* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h index a0fc9e51d274..682ffe40c7e2 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -105,32 +105,30 @@ union kbase_ktrace_backend; * - code * - flags */ -#define KBASE_KTRACE_FLAG_BACKEND (((kbase_ktrace_flag_t)1) << 7) +#define KBASE_KTRACE_FLAG_BACKEND (((kbase_ktrace_flag_t)1) << 7) /* Collect all the common flags together for debug checking */ -#define KBASE_KTRACE_FLAG_COMMON_ALL \ - (KBASE_KTRACE_FLAG_BACKEND) +#define KBASE_KTRACE_FLAG_COMMON_ALL (KBASE_KTRACE_FLAG_BACKEND) -#define KBASE_KTRACE_FLAG_ALL \ - (KBASE_KTRACE_FLAG_COMMON_ALL | KBASE_KTRACE_FLAG_BACKEND_ALL) +#define KBASE_KTRACE_FLAG_ALL (KBASE_KTRACE_FLAG_COMMON_ALL | KBASE_KTRACE_FLAG_BACKEND_ALL) #define KBASE_KTRACE_SHIFT (9) /* 512 entries */ #define KBASE_KTRACE_SIZE (1 << KBASE_KTRACE_SHIFT) -#define KBASE_KTRACE_MASK ((1 << KBASE_KTRACE_SHIFT)-1) +#define KBASE_KTRACE_MASK ((1 << KBASE_KTRACE_SHIFT) - 1) -#define KBASE_KTRACE_CODE(X) KBASE_KTRACE_CODE_ ## X +#define KBASE_KTRACE_CODE(X) KBASE_KTRACE_CODE_##X /* Note: compiletime_assert() about this against kbase_ktrace_code_t is in * kbase_ktrace_init() */ enum kbase_ktrace_code { - /* +/* * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE * THIS MUST BE USED AT THE START OF THE ENUM */ #define KBASE_KTRACE_CODE_MAKE_CODE(X) KBASE_KTRACE_CODE(X) #include -#undef KBASE_KTRACE_CODE_MAKE_CODE +#undef KBASE_KTRACE_CODE_MAKE_CODE /* Comma on its own, to extend the list */ , /* Must be the last in the enum */ @@ -165,22 +163,21 @@ struct kbase_ktrace_msg { }; struct kbase_ktrace { - spinlock_t lock; - u16 first_out; - u16 next_in; + spinlock_t lock; + u16 first_out; + u16 next_in; struct kbase_ktrace_msg *rbuf; }; - static inline void kbase_ktrace_compiletime_asserts(void) { /* See also documentation of enum kbase_ktrace_code */ compiletime_assert(sizeof(kbase_ktrace_code_t) == sizeof(unsigned long long) || - KBASE_KTRACE_CODE_COUNT <= (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), - "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); + KBASE_KTRACE_CODE_COUNT <= + (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), + "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); compiletime_assert((KBASE_KTRACE_FLAG_BACKEND_ALL & KBASE_KTRACE_FLAG_COMMON_ALL) == 0, - "KTrace backend flags intersect with KTrace common flags"); - + "KTrace backend flags intersect with KTrace common flags"); } #endif /* KBASE_KTRACE_TARGET_RBUF */ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h index ba93f29fe5e6..6c8e0abe64c8 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,9 +53,8 @@ void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written); * %KBASE_KTRACE_FLAG_BACKEND clear. The backend must handle that setting * appropriately. */ -void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, - char *buffer, int sz, s32 *written); - +void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, char *buffer, int sz, + s32 *written); /** * kbasep_ktrace_reserve - internal function to reserve space for a ktrace @@ -80,10 +79,9 @@ struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace); * * The common part includes the mandatory parts of the backend part */ -void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, - struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, - struct kbase_context *kctx, kbase_ktrace_flag_t flags, - u64 info_val); +void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, struct kbase_ktrace_msg *trace_msg, + enum kbase_ktrace_code code, struct kbase_context *kctx, + kbase_ktrace_flag_t flags, u64 info_val); #endif /* KBASE_KTRACE_TARGET_RBUF */ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h index 6d96647161b4..1ebddfa3f44f 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,30 +29,22 @@ #if KBASE_KTRACE_TARGET_FTRACE -DECLARE_EVENT_CLASS(mali_add_template, - TP_PROTO(struct kbase_context *kctx, u64 info_val), - TP_ARGS(kctx, info_val), - TP_STRUCT__entry( - __field(pid_t, kctx_tgid) - __field(u32, kctx_id) - __field(u64, info_val) - ), - TP_fast_assign( - __entry->kctx_id = (kctx) ? kctx->id : 0u; - __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; - __entry->info_val = info_val; - ), - TP_printk("kctx=%d_%u info=0x%llx", __entry->kctx_tgid, - __entry->kctx_id, __entry->info_val) -); +DECLARE_EVENT_CLASS(mali_add_template, TP_PROTO(struct kbase_context *kctx, u64 info_val), + TP_ARGS(kctx, info_val), + TP_STRUCT__entry(__field(pid_t, kctx_tgid) __field(u32, kctx_id) + __field(u64, info_val)), + TP_fast_assign(__entry->kctx_id = (kctx) ? kctx->id : 0u; + __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + __entry->info_val = info_val;), + TP_printk("kctx=%d_%u info=0x%llx", __entry->kctx_tgid, __entry->kctx_id, + __entry->info_val)); /* DEFINE_MALI_ADD_EVENT is available also to backends for backend-specific * simple trace codes */ -#define DEFINE_MALI_ADD_EVENT(name) \ -DEFINE_EVENT(mali_add_template, mali_##name, \ - TP_PROTO(struct kbase_context *kctx, u64 info_val), \ - TP_ARGS(kctx, info_val)) +#define DEFINE_MALI_ADD_EVENT(name) \ + DEFINE_EVENT(mali_add_template, mali_##name, \ + TP_PROTO(struct kbase_context *kctx, u64 info_val), TP_ARGS(kctx, info_val)) DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); @@ -98,7 +90,7 @@ DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); DEFINE_MALI_ADD_EVENT(PM_POWEROFF_WAIT_WQ); DEFINE_MALI_ADD_EVENT(PM_RUNTIME_SUSPEND_CALLBACK); DEFINE_MALI_ADD_EVENT(PM_RUNTIME_RESUME_CALLBACK); -#define KBASEP_L2_STATE(n) DEFINE_MALI_ADD_EVENT(PM_L2_ ## n); +#define KBASEP_L2_STATE(n) DEFINE_MALI_ADD_EVENT(PM_L2_##n); #include "backend/gpu/mali_kbase_pm_l2_states.h" #undef KBASEP_L2_STATE DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK); diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c index f7054f5b0090..36778923f364 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c @@ -34,13 +34,15 @@ #include #include #include -#include #include #include +#include #include #include -#include #include +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include +#endif /** * kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC @@ -55,7 +57,6 @@ static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev) { if (kbdev->csf.firmware_inited) { kbase_kinstr_prfcnt_term(kbdev->kinstr_prfcnt_ctx); - kbase_vinstr_term(kbdev->vinstr_ctx); kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); kbase_csf_firmware_unload_term(kbdev); @@ -84,10 +85,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_pm_powerup; - err = kbase_backend_timer_init(kbdev); - if (err) - goto fail_timer; - #ifdef CONFIG_MALI_BIFROST_DEBUG #if IS_ENABLED(CONFIG_MALI_REAL_HW) if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { @@ -98,35 +95,33 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) #endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ - kbase_ipa_control_init(kbdev); + { + kbase_ipa_control_init(kbdev); - /* Initialise the metrics subsystem, it couldn't be initialized earlier - * due to dependency on kbase_ipa_control. - */ - err = kbasep_pm_metrics_init(kbdev); - if (err) - goto fail_pm_metrics_init; + /* Initialise the metrics subsystem, it couldn't be initialized earlier + * due to dependency on kbase_ipa_control. + */ + err = kbasep_pm_metrics_init(kbdev); + if (err) + goto fail_pm_metrics_init; - /* Do the initialisation of devfreq. - * Devfreq needs backend_timer_init() for completion of its - * initialisation and it also needs to catch the first callback - * occurrence of the runtime_suspend event for maintaining state - * coherence with the backend power management, hence needs to be - * placed before the kbase_pm_context_idle(). - */ - err = kbase_backend_devfreq_init(kbdev); - if (err) - goto fail_devfreq_init; + /* Do the initialisation of devfreq. + * Devfreq needs backend_timer_init() for completion of its + * initialisation and it also needs to catch the first callback + * occurrence of the runtime_suspend event for maintaining state + * coherence with the backend power management, hence needs to be + * placed before the kbase_pm_context_idle(). + */ + err = kbase_backend_devfreq_init(kbdev); + if (err) + goto fail_devfreq_init; + } /* Update gpuprops with L2_FEATURES if applicable */ err = kbase_gpuprops_update_l2_features(kbdev); if (err) goto fail_update_l2_features; - err = kbase_backend_time_init(kbdev); - if (err) - goto fail_update_l2_features; - init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); kbase_pm_context_idle(kbdev); @@ -148,8 +143,6 @@ fail_interrupt_test: #endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_BIFROST_DEBUG */ - kbase_backend_timer_term(kbdev); -fail_timer: kbase_pm_context_idle(kbdev); kbase_hwaccess_pm_halt(kbdev); fail_pm_powerup: @@ -224,8 +217,7 @@ static int kbase_csf_late_init(struct kbase_device *kbdev) */ static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev) { - return kbase_hwcnt_watchdog_if_timer_create( - &kbdev->hwcnt_watchdog_timer); + return kbase_hwcnt_watchdog_if_timer_create(&kbdev->hwcnt_watchdog_timer); } /** @@ -246,8 +238,7 @@ static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev) */ static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev) { - return kbase_hwcnt_backend_csf_if_fw_create( - kbdev, &kbdev->hwcnt_backend_csf_if_fw); + return kbase_hwcnt_backend_csf_if_fw_create(kbdev, &kbdev->hwcnt_backend_csf_if_fw); } /** @@ -268,10 +259,10 @@ static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev) */ static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev) { - return kbase_hwcnt_backend_csf_create( - &kbdev->hwcnt_backend_csf_if_fw, - KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, - &kbdev->hwcnt_watchdog_timer, &kbdev->hwcnt_gpu_iface); + return kbase_hwcnt_backend_csf_create(&kbdev->hwcnt_backend_csf_if_fw, + KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, + &kbdev->hwcnt_watchdog_timer, + &kbdev->hwcnt_gpu_iface); } /** @@ -285,20 +276,21 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) static const struct kbase_device_init dev_init[] = { #if !IS_ENABLED(CONFIG_MALI_REAL_HW) - { kbase_gpu_device_create, kbase_gpu_device_destroy, - "Dummy model initialization failed" }, + { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { assign_irqs, NULL, "IRQ search failed" }, #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) { registers_map, registers_unmap, "Register map failed" }, #endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" }, +#endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */ { power_control_init, power_control_term, "Power control initialization failed" }, { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, - { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, - { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, + { kbase_backend_time_init, NULL, "Time backend initialization failed" }, { kbase_device_misc_init, kbase_device_misc_term, "Miscellaneous device initialization failed" }, { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, @@ -330,6 +322,8 @@ static const struct kbase_device_init dev_init[] = { { kbase_debug_csf_fault_init, kbase_debug_csf_fault_term, "CSF fault debug initialization failed" }, { kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" }, + { kbase_csf_fence_timer_debugfs_init, kbase_csf_fence_timer_debugfs_term, + "Fence timeout DebugFS initialization failed" }, /* Sysfs init needs to happen before registering the device with * misc_register(), otherwise it causes a race condition between * registering the device and a uevent event being generated for @@ -354,8 +348,7 @@ static const struct kbase_device_init dev_init[] = { #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ }; -static void kbase_device_term_partial(struct kbase_device *kbdev, - unsigned int i) +static void kbase_device_term_partial(struct kbase_device *kbdev, unsigned int i) { while (i-- > 0) { if (dev_init[i].term) @@ -383,8 +376,7 @@ int kbase_device_init(struct kbase_device *kbdev) if (dev_init[i].init) { err = dev_init[i].init(kbdev); if (err) { - dev_err(kbdev->dev, "%s error = %d\n", - dev_init[i].err_mes, err); + dev_err(kbdev->dev, "%s error = %d\n", dev_init[i].err_mes, err); kbase_device_term_partial(kbdev, i); break; } @@ -417,42 +409,27 @@ static int kbase_device_hwcnt_csf_deferred_init(struct kbase_device *kbdev) */ ret = kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface); if (ret) { - dev_err(kbdev->dev, - "GPU hwcnt backend metadata creation failed"); + dev_err(kbdev->dev, "GPU hwcnt backend metadata creation failed"); return ret; } - ret = kbase_hwcnt_virtualizer_init( - kbdev->hwcnt_gpu_ctx, - KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, - &kbdev->hwcnt_gpu_virt); + ret = kbase_hwcnt_virtualizer_init(kbdev->hwcnt_gpu_ctx, + KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, + &kbdev->hwcnt_gpu_virt); if (ret) { - dev_err(kbdev->dev, - "GPU hwcnt virtualizer initialization failed"); + dev_err(kbdev->dev, "GPU hwcnt virtualizer initialization failed"); goto virt_fail; } - ret = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); + ret = kbase_kinstr_prfcnt_init(kbdev->hwcnt_gpu_virt, &kbdev->kinstr_prfcnt_ctx); if (ret) { - dev_err(kbdev->dev, - "Virtual instrumentation initialization failed"); - goto vinstr_fail; - } - - ret = kbase_kinstr_prfcnt_init(kbdev->hwcnt_gpu_virt, - &kbdev->kinstr_prfcnt_ctx); - if (ret) { - dev_err(kbdev->dev, - "Performance counter instrumentation initialization failed"); + dev_err(kbdev->dev, "Performance counter instrumentation initialization failed"); goto kinstr_prfcnt_fail; } return ret; kinstr_prfcnt_fail: - kbase_vinstr_term(kbdev->vinstr_ctx); - -vinstr_fail: kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); virt_fail: diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c index 2abd62aaa8b1..c7f34bc78137 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,11 +20,11 @@ */ #include +#include #include #include #include #include -#include #include #include #include @@ -39,14 +39,9 @@ * * This function is called from the interrupt handler when a GPU fault occurs. */ -static void kbase_report_gpu_fault(struct kbase_device *kbdev, u32 status, - u32 as_nr, bool as_valid) +static void kbase_report_gpu_fault(struct kbase_device *kbdev, u32 status, u32 as_nr, bool as_valid) { - u64 address = (u64) kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; - - address |= kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); + u64 address = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(GPU_FAULTADDRESS)); /* Report GPU fault for all contexts in case either * the address space is invalid or it's MCU address space. @@ -56,13 +51,11 @@ static void kbase_report_gpu_fault(struct kbase_device *kbdev, u32 status, static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev) { - const u32 status = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTSTATUS)); - const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; - const u32 as_nr = (status & GPU_FAULTSTATUS_JASID_MASK) >> - GPU_FAULTSTATUS_JASID_SHIFT; + const u32 status = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_FAULTSTATUS)); + const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_MASK; + const u32 as_nr = (status & GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT; bool bus_fault = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) == - GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT; + GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT; if (bus_fault) { /* If as_valid, reset gpu when ASID is for MCU. */ @@ -70,14 +63,12 @@ static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev) kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); dev_err(kbdev->dev, "GPU bus fault triggering gpu-reset ...\n"); - if (kbase_prepare_to_reset_gpu( - kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } else { /* Handle Bus fault */ if (kbase_mmu_bus_fault_interrupt(kbdev, status, as_nr)) - dev_warn(kbdev->dev, - "fail to handle GPU bus fault ...\n"); + dev_warn(kbdev->dev, "fail to handle GPU bus fault ...\n"); } } else kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); @@ -86,6 +77,9 @@ static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev) void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) { + u32 power_changed_mask = (POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ); + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); if (val & GPU_FAULT) kbase_gpu_fault_interrupt(kbdev); @@ -100,16 +94,14 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) * deluge of such interrupts. It will be unmasked on GPU reset. */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - GPU_IRQ_REG_ALL & ~GPU_PROTECTED_FAULT); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + GPU_IRQ_REG_ALL & ~GPU_PROTECTED_FAULT); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_csf_scheduler_spin_lock(kbdev, &flags); - if (!WARN_ON(!kbase_csf_scheduler_protected_mode_in_use( - kbdev))) { + if (!WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev))) { struct base_gpu_queue_group_error const - err_payload = { .error_type = - BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, .payload = { .fatal_group = { .status = @@ -120,14 +112,12 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) DF_GPU_PROTECTED_FAULT); scheduler->active_protm_grp->faulted = true; - kbase_csf_add_group_fatal_error( - scheduler->active_protm_grp, &err_payload); + kbase_csf_add_group_fatal_error(scheduler->active_protm_grp, &err_payload); kbase_event_wakeup(scheduler->active_protm_grp->kctx); } kbase_csf_scheduler_spin_unlock(kbdev, flags); - if (kbase_prepare_to_reset_gpu( - kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); /* Defer the clearing to the GPU reset sequence */ @@ -142,7 +132,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) * kbase_gpu_cache_flush_and_busy_wait */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED); #ifdef KBASE_PM_RUNTIME if (val & DOORBELL_MIRROR) { @@ -169,7 +159,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) if (val & CLEAN_CACHES_COMPLETED) kbase_clean_caches_done(kbdev); - if (val & (POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ)) { + if (val & power_changed_mask) { kbase_pm_power_changed(kbdev); } else if (val & CLEAN_CACHES_COMPLETED) { /* If cache line evict messages can be lost when shader cores @@ -179,74 +169,10 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) * cores. */ if (kbdev->pm.backend.l2_always_on || - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) kbase_pm_power_changed(kbdev); } KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); } -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -bool kbase_is_register_accessible(u32 offset) -{ -#ifdef CONFIG_MALI_BIFROST_DEBUG - if (((offset >= MCU_SUBSYSTEM_BASE) && (offset < IPA_CONTROL_BASE)) || - ((offset >= GPU_CONTROL_MCU_BASE) && (offset < USER_BASE))) { - WARN(1, "Invalid register offset 0x%x", offset); - return false; - } -#endif - - return true; -} -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ - -#if IS_ENABLED(CONFIG_MALI_REAL_HW) -void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) -{ - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) - return; - - if (WARN_ON(kbdev->dev == NULL)) - return; - - if (!kbase_is_register_accessible(offset)) - return; - - writel(value, kbdev->reg + offset); - -#if IS_ENABLED(CONFIG_DEBUG_FS) - if (unlikely(kbdev->io_history.enabled)) - kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, - value, 1); -#endif /* CONFIG_DEBUG_FS */ - dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); -} -KBASE_EXPORT_TEST_API(kbase_reg_write); - -u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) -{ - u32 val; - - if (WARN_ON(!kbdev->pm.backend.gpu_powered)) - return 0; - - if (WARN_ON(kbdev->dev == NULL)) - return 0; - - if (!kbase_is_register_accessible(offset)) - return 0; - - val = readl(kbdev->reg + offset); - -#if IS_ENABLED(CONFIG_DEBUG_FS) - if (unlikely(kbdev->io_history.enabled)) - kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, - val, 0); -#endif /* CONFIG_DEBUG_FS */ - dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); - - return val; -} -KBASE_EXPORT_TEST_API(kbase_reg_read); -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c index 38223af213d1..d530010c096e 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,6 +20,7 @@ */ #include +#include #include #include #include @@ -38,19 +39,14 @@ */ static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) { - u32 status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); - u64 address = (u64) kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; + u32 status = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_FAULTSTATUS)); + uintptr_t phys_addr = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(GPU_FAULTADDRESS)); - address |= kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); - - dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", - status, - kbase_gpu_exception_name(status & 0xFF), - address); + dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at PA 0x%pK", status, + kbase_gpu_exception_name(status & 0xFF), (void *)phys_addr); if (multiple) - dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); + dev_warn(kbdev->dev, + "There were multiple GPU faults - some have not been reported\n"); } @@ -68,7 +64,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) * kbase_gpu_cache_flush_and_busy_wait */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED); /* kbase_instr_hwcnt_sample_done frees the HWCNT pipeline to request another * sample. Therefore this must be called after clearing the IRQ to avoid a @@ -99,45 +95,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) * cores. */ if (kbdev->pm.backend.l2_always_on || - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) kbase_pm_power_changed(kbdev); } KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); } - -#if IS_ENABLED(CONFIG_MALI_REAL_HW) -void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) -{ - WARN_ON(!kbdev->pm.backend.gpu_powered); - - writel(value, kbdev->reg + offset); - -#if IS_ENABLED(CONFIG_DEBUG_FS) - if (unlikely(kbdev->io_history.enabled)) - kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, - value, 1); -#endif /* CONFIG_DEBUG_FS */ - dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); -} -KBASE_EXPORT_TEST_API(kbase_reg_write); - -u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) -{ - u32 val; - - WARN_ON(!kbdev->pm.backend.gpu_powered); - - val = readl(kbdev->reg + offset); - -#if IS_ENABLED(CONFIG_DEBUG_FS) - if (unlikely(kbdev->io_history.enabled)) - kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, - val, 0); -#endif /* CONFIG_DEBUG_FS */ - dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); - - return val; -} -KBASE_EXPORT_TEST_API(kbase_reg_read); -#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c index 2d3672383630..556e388e11bd 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c @@ -43,6 +43,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include +#endif /** * kbase_backend_late_init - Perform any backend-specific initialization. @@ -100,10 +103,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_update_l2_features; - err = kbase_backend_time_init(kbdev); - if (err) - goto fail_update_l2_features; - init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); /* Idle the GPU and/or cores, if the policy wants it to */ @@ -222,12 +221,14 @@ static const struct kbase_device_init dev_init[] = { #if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) { registers_map, registers_unmap, "Register map failed" }, #endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" }, +#endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */ { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, { kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" }, { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, - { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, - { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, + { kbase_backend_time_init, NULL, "Time backend initialization failed" }, { kbase_device_misc_init, kbase_device_misc_term, "Miscellaneous device initialization failed" }, { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, @@ -255,8 +256,6 @@ static const struct kbase_device_init dev_init[] = { "GPU hwcnt context initialization failed" }, { kbase_device_hwcnt_virtualizer_init, kbase_device_hwcnt_virtualizer_term, "GPU hwcnt virtualizer initialization failed" }, - { kbase_device_vinstr_init, kbase_device_vinstr_term, - "Virtual instrumentation initialization failed" }, { kbase_device_kinstr_prfcnt_init, kbase_device_kinstr_prfcnt_term, "Performance counter instrumentation initialization failed" }, { kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" }, @@ -284,8 +283,7 @@ static const struct kbase_device_init dev_init[] = { { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, }; -static void kbase_device_term_partial(struct kbase_device *kbdev, - unsigned int i) +static void kbase_device_term_partial(struct kbase_device *kbdev, unsigned int i) { while (i-- > 0) { if (dev_init[i].term) @@ -315,8 +313,8 @@ int kbase_device_init(struct kbase_device *kbdev) err = dev_init[i].init(kbdev); if (err) { if (err != -EPROBE_DEFER) - dev_err(kbdev->dev, "%s error = %d\n", - dev_init[i].err_mes, err); + dev_err(kbdev->dev, "%s error = %d\n", dev_init[i].err_mes, + err); kbase_device_term_partial(kbdev, i); break; } diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c index b2b0cfd6cc97..89b3dbb37692 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c @@ -42,7 +42,6 @@ #include #include "mali_kbase_kinstr_prfcnt.h" -#include "mali_kbase_vinstr.h" #include "hwcnt/mali_kbase_hwcnt_context.h" #include "hwcnt/mali_kbase_hwcnt_virtualizer.h" @@ -72,7 +71,7 @@ static int kbase_dev_nr; struct kbase_device *kbase_device_alloc(void) { - return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); + return vzalloc(sizeof(struct kbase_device)); } /** @@ -118,29 +117,28 @@ int kbase_device_pcm_dev_init(struct kbase_device *const kbdev) /* Check to see whether or not a platform specific priority control manager * is available. */ - prio_ctrl_node = of_parse_phandle(kbdev->dev->of_node, - "priority-control-manager", 0); + prio_ctrl_node = of_parse_phandle(kbdev->dev->of_node, "priority-control-manager", 0); if (!prio_ctrl_node) { - dev_info(kbdev->dev, - "No priority control manager is configured"); + dev_info(kbdev->dev, "No priority control manager is configured"); } else { - struct platform_device *const pdev = - of_find_device_by_node(prio_ctrl_node); + struct platform_device *const pdev = of_find_device_by_node(prio_ctrl_node); if (!pdev) { dev_err(kbdev->dev, "The configured priority control manager was not found"); } else { struct priority_control_manager_device *pcm_dev = - platform_get_drvdata(pdev); + platform_get_drvdata(pdev); if (!pcm_dev) { dev_info(kbdev->dev, "Priority control manager is not ready"); err = -EPROBE_DEFER; } else if (!try_module_get(pcm_dev->owner)) { - dev_err(kbdev->dev, "Failed to get priority control manager module"); + dev_err(kbdev->dev, + "Failed to get priority control manager module"); err = -ENODEV; } else { - dev_info(kbdev->dev, "Priority control manager successfully loaded"); + dev_info(kbdev->dev, + "Priority control manager successfully loaded"); kbdev->pcm_dev = pcm_dev; } } @@ -171,48 +169,46 @@ void kbase_device_pcm_dev_term(struct kbase_device *const kbdev) * * Return: NOTIFY_OK on success, NOTIFY_BAD otherwise. */ -static int mali_oom_notifier_handler(struct notifier_block *nb, - unsigned long action, void *data) +static int mali_oom_notifier_handler(struct notifier_block *nb, unsigned long action, void *data) { struct kbase_device *kbdev; struct kbase_context *kctx = NULL; unsigned long kbdev_alloc_total; + CSTD_UNUSED(action); + CSTD_UNUSED(data); + if (WARN_ON(nb == NULL)) return NOTIFY_BAD; kbdev = container_of(nb, struct kbase_device, oom_notifier_block); - kbdev_alloc_total = - KBASE_PAGES_TO_KIB(atomic_read(&(kbdev->memdev.used_pages))); + kbdev_alloc_total = KBASE_PAGES_TO_KIB(atomic_read(&(kbdev->memdev.used_pages))); - dev_err(kbdev->dev, "OOM notifier: dev %s %lu kB\n", kbdev->devname, - kbdev_alloc_total); + dev_err(kbdev->dev, "OOM notifier: dev %s %lu kB\n", kbdev->devname, kbdev_alloc_total); mutex_lock(&kbdev->kctx_list_lock); list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { - struct pid *pid_struct; - struct task_struct *task; + struct task_struct *task = kctx->task; struct pid *tgid_struct; struct task_struct *tgid_task; - unsigned long task_alloc_total = KBASE_PAGES_TO_KIB(atomic_read(&(kctx->used_pages))); rcu_read_lock(); - pid_struct = find_get_pid(kctx->pid); - task = pid_task(pid_struct, PIDTYPE_PID); tgid_struct = find_get_pid(kctx->tgid); tgid_task = pid_task(tgid_struct, PIDTYPE_PID); dev_err(kbdev->dev, "OOM notifier: tsk %s:%s tgid (%u) pid (%u) %lu kB\n", tgid_task ? tgid_task->comm : "[null task]", - task ? task->comm : "[null comm]", kctx->tgid, - kctx->pid, task_alloc_total); + task ? task->comm : "[null task]", + kctx->tgid, + kctx->pid, + task_alloc_total); - put_pid(pid_struct); + put_pid(tgid_struct); rcu_read_unlock(); } @@ -220,7 +216,7 @@ static int mali_oom_notifier_handler(struct notifier_block *nb, return NOTIFY_OK; } -int kbase_device_misc_init(struct kbase_device * const kbdev) +int kbase_device_misc_init(struct kbase_device *const kbdev) { int err; #if IS_ENABLED(CONFIG_ARM64) @@ -230,56 +226,37 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) spin_lock_init(&kbdev->mmu_mask_change); mutex_init(&kbdev->mmu_hw_mutex); #if IS_ENABLED(CONFIG_ARM64) - kbdev->cci_snoop_enabled = false; np = kbdev->dev->of_node; if (np != NULL) { - if (of_property_read_u32(np, "snoop_enable_smc", - &kbdev->snoop_enable_smc)) + /* Read "-" versions of the properties and fallback to "_" + * if these are not found + */ + if (of_property_read_u32(np, "snoop-enable-smc", &kbdev->snoop_enable_smc) && + of_property_read_u32(np, "snoop_enable_smc", &kbdev->snoop_enable_smc)) kbdev->snoop_enable_smc = 0; - if (of_property_read_u32(np, "snoop_disable_smc", - &kbdev->snoop_disable_smc)) + if (of_property_read_u32(np, "snoop-disable-smc", &kbdev->snoop_disable_smc) && + of_property_read_u32(np, "snoop_disable_smc", &kbdev->snoop_disable_smc)) kbdev->snoop_disable_smc = 0; /* Either both or none of the calls should be provided. */ - if (!((kbdev->snoop_disable_smc == 0 - && kbdev->snoop_enable_smc == 0) - || (kbdev->snoop_disable_smc != 0 - && kbdev->snoop_enable_smc != 0))) { + if (!((kbdev->snoop_disable_smc == 0 && kbdev->snoop_enable_smc == 0) || + (kbdev->snoop_disable_smc != 0 && kbdev->snoop_enable_smc != 0))) { WARN_ON(1); - err = -EINVAL; - goto fail; + return -EINVAL; } } #endif /* CONFIG_ARM64 */ - /* Get the list of workarounds for issues on the current HW - * (identified by the GPU_ID register) - */ - err = kbase_hw_set_issues_mask(kbdev); - if (err) - goto fail; - - /* Set the list of features available on the current HW - * (identified by the GPU_ID register) - */ - kbase_hw_set_features_mask(kbdev); - - err = kbase_gpuprops_set_features(kbdev); - if (err) - goto fail; - /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our * device structure was created by device-tree */ if (!kbdev->dev->dma_mask) kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; - err = dma_set_mask(kbdev->dev, - DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); + err = dma_set_mask(kbdev->dev, DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); if (err) goto dma_set_mask_failed; - err = dma_set_coherent_mask(kbdev->dev, - DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); + err = dma_set_coherent_mask(kbdev->dev, DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); if (err) goto dma_set_mask_failed; @@ -304,18 +281,16 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) kbase_debug_assert_register_hook(&kbase_ktrace_hook_wrapper, kbdev); - atomic_set(&kbdev->ctx_num, 0); - kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; #if MALI_USE_CSF - kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT); -#else + kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT); +#else /* MALI_USE_CSF */ kbdev->reset_timeout_ms = JM_DEFAULT_RESET_TIMEOUT_MS; -#endif /* MALI_USE_CSF */ +#endif /* !MALI_USE_CSF */ kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); - kbdev->mmu_as_inactive_wait_time_ms = + kbdev->mmu_or_gpu_cache_op_wait_time_ms = kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT); mutex_init(&kbdev->kctx_list_lock); INIT_LIST_HEAD(&kbdev->kctx_list); @@ -336,15 +311,15 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) kbdev->num_of_atoms_hw_completed = 0; #endif -#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) - atomic_set(&kbdev->live_fence_metadata, 0); +#if MALI_USE_CSF + atomic_set(&kbdev->fence_signal_timeout_enabled, 1); #endif + return 0; term_as: kbase_device_all_as_term(kbdev); dma_set_mask_failed: -fail: return err; } @@ -398,13 +373,12 @@ bool kbase_is_quick_reset_enabled(struct kbase_device *kbdev) void kbase_device_free(struct kbase_device *kbdev) { - kfree(kbdev); + vfree(kbdev); } void kbase_device_id_init(struct kbase_device *kbdev) { - scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, - kbase_dev_nr); + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", KBASE_DRV_NAME, kbase_dev_nr); kbdev->id = kbase_dev_nr; } @@ -415,8 +389,7 @@ void kbase_increment_device_id(void) int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) { - return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, - &kbdev->hwcnt_gpu_ctx); + return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, &kbdev->hwcnt_gpu_ctx); } void kbase_device_hwcnt_context_term(struct kbase_device *kbdev) @@ -427,8 +400,8 @@ void kbase_device_hwcnt_context_term(struct kbase_device *kbdev) int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev) { return kbase_hwcnt_virtualizer_init(kbdev->hwcnt_gpu_ctx, - KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, - &kbdev->hwcnt_gpu_virt); + KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, + &kbdev->hwcnt_gpu_virt); } void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev) @@ -438,7 +411,6 @@ void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev) int kbase_device_timeline_init(struct kbase_device *kbdev) { - atomic_set(&kbdev->timeline_flags, 0); return kbase_timeline_init(&kbdev->timeline, &kbdev->timeline_flags); } @@ -447,20 +419,9 @@ void kbase_device_timeline_term(struct kbase_device *kbdev) kbase_timeline_term(kbdev->timeline); } -int kbase_device_vinstr_init(struct kbase_device *kbdev) -{ - return kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); -} - -void kbase_device_vinstr_term(struct kbase_device *kbdev) -{ - kbase_vinstr_term(kbdev->vinstr_ctx); -} - int kbase_device_kinstr_prfcnt_init(struct kbase_device *kbdev) { - return kbase_kinstr_prfcnt_init(kbdev->hwcnt_gpu_virt, - &kbdev->kinstr_prfcnt_ctx); + return kbase_kinstr_prfcnt_init(kbdev->hwcnt_gpu_virt, &kbdev->kinstr_prfcnt_ctx); } void kbase_device_kinstr_prfcnt_term(struct kbase_device *kbdev) @@ -470,8 +431,7 @@ void kbase_device_kinstr_prfcnt_term(struct kbase_device *kbdev) int kbase_device_io_history_init(struct kbase_device *kbdev) { - return kbase_io_history_init(&kbdev->io_history, - KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); + return kbase_io_history_init(&kbdev->io_history, KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); } void kbase_device_io_history_term(struct kbase_device *kbdev) @@ -518,6 +478,7 @@ KBASE_EXPORT_TEST_API(kbase_device_get_list); void kbase_device_put_list(const struct list_head *dev_list) { + CSTD_UNUSED(dev_list); mutex_unlock(&kbase_dev_list_lock); } KBASE_EXPORT_TEST_API(kbase_device_put_list); @@ -530,14 +491,13 @@ int kbase_device_early_init(struct kbase_device *kbdev) if (err) return err; - err = kbasep_platform_device_init(kbdev); if (err) goto ktrace_term; err = kbase_pm_runtime_init(kbdev); if (err) - goto fail_runtime_pm; + goto platform_device_term; /* This spinlock is initialized before doing the first access to GPU * registers and installing interrupt handlers. @@ -547,13 +507,30 @@ int kbase_device_early_init(struct kbase_device *kbdev) /* Ensure we can access the GPU registers */ kbase_pm_register_access_enable(kbdev); - /* - * Find out GPU properties based on the GPU feature registers. - * Note that this does not populate the few properties that depend on - * hw_features being initialized. Those are set by kbase_gpuprops_set_features - * soon after this in the init process. + /* Initialize GPU_ID props */ + kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id, kbase_reg_get_gpu_id(kbdev)); + + /* Initialize register mapping LUTs */ + err = kbase_regmap_init(kbdev); + if (err) + goto pm_runtime_term; + + /* Set the list of features available on the current HW + * (identified by the GPU_ID register) */ - kbase_gpuprops_set(kbdev); + kbase_hw_set_features_mask(kbdev); + + /* Find out GPU properties based on the GPU feature registers. */ + err = kbase_gpuprops_init(kbdev); + if (err) + goto regmap_term; + + /* Get the list of workarounds for issues on the current HW + * (identified by the GPU_ID register and impl_tech in THREAD_FEATURES) + */ + err = kbase_hw_set_issues_mask(kbdev); + if (err) + goto gpuprops_term; /* We're done accessing the GPU registers for now. */ kbase_pm_register_access_disable(kbdev); @@ -567,13 +544,17 @@ int kbase_device_early_init(struct kbase_device *kbdev) err = kbase_install_interrupts(kbdev); #endif if (err) - goto fail_interrupts; + goto gpuprops_term; return 0; -fail_interrupts: +gpuprops_term: + kbase_gpuprops_term(kbdev); +regmap_term: + kbase_regmap_term(kbdev); +pm_runtime_term: kbase_pm_runtime_term(kbdev); -fail_runtime_pm: +platform_device_term: kbasep_platform_device_term(kbdev); ktrace_term: kbase_ktrace_term(kbdev); @@ -591,6 +572,7 @@ void kbase_device_early_term(struct kbase_device *kbdev) #else kbase_release_interrupts(kbdev); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ + kbase_gpuprops_term(kbdev); kbase_pm_runtime_term(kbdev); kbasep_platform_device_term(kbdev); kbase_ktrace_term(kbdev); diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h index f025011009d5..9cca6aff4554 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,7 +19,11 @@ * */ +#ifndef _MALI_KBASE_DEVICE_H_ +#define _MALI_KBASE_DEVICE_H_ + #include +#include /** * kbase_device_get_list - get device list. @@ -80,27 +84,6 @@ int kbase_device_init(struct kbase_device *kbdev); */ void kbase_device_term(struct kbase_device *kbdev); -/** - * kbase_reg_write - write to GPU register - * @kbdev: Kbase device pointer - * @offset: Offset of register - * @value: Value to write - * - * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). - */ -void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); - -/** - * kbase_reg_read - read from GPU register - * @kbdev: Kbase device pointer - * @offset: Offset of register - * - * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). - * - * Return: Value in desired register - */ -u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); - /** * kbase_is_gpu_removed() - Has the GPU been removed. * @kbdev: Kbase device pointer @@ -149,8 +132,7 @@ int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phy * * Return: 0 if successful or a negative error code on failure. */ -int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, - u32 flush_op); +int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, u32 flush_op); /** * kbase_gpu_start_cache_clean - Start a cache clean @@ -170,8 +152,7 @@ void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op); * Issue a given cache flush command to hardware. * hwaccess_lock must be held by the caller. */ -void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, - u32 flush_op); +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, u32 flush_op); /** * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish @@ -191,11 +172,11 @@ void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); * called from paths (like GPU reset) where an indefinite wait for the * completion of cache clean operation can cause deadlock, as the operation may * never complete. + * If cache clean times out, reset GPU to recover. * * Return: 0 if successful or a negative error code on failure. */ -int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, - unsigned int wait_timeout_ms); +int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, unsigned int wait_timeout_ms); /** * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is @@ -225,3 +206,5 @@ void kbase_clean_caches_done(struct kbase_device *kbdev); * handled. */ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); + +#endif /* _MALI_KBASE_DEVICE_H_ */ diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c index d55495045892..3b507c4fec63 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,34 +27,65 @@ #include #include -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) bool kbase_is_gpu_removed(struct kbase_device *kbdev) { - u32 val; + if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT)) + return false; - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); - - return val == 0; + return (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0); } -#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit) +/** + * busy_wait_cache_operation - Wait for a pending cache flush to complete + * + * @kbdev: Pointer of kbase device. + * @irq_bit: IRQ bit cache flush operation to wait on. + * + * It will reset GPU if the wait fails. + * + * Return: 0 on success, error code otherwise. + */ +static int busy_wait_cache_operation(struct kbase_device *kbdev, u32 irq_bit) { - char *irq_flag_name; - /* Previously MMU-AS command was used for L2 cache flush on page-table update. - * And we're using the same max-loops count for GPU command, because amount of - * L2 cache flush overhead are same between them. - */ - unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + const ktime_t wait_loop_start = ktime_get_raw(); + const u32 wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms; + bool completed = false; + s64 diff; + u32 irq_bits_to_check = irq_bit; - /* Wait for the GPU cache clean operation to complete */ - while (--max_loops && - !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) { - ; + /* hwaccess_lock must be held to prevent concurrent threads from + * cleaning the IRQ bits, otherwise it could be possible for this thread + * to lose the event it is waiting for. In particular, concurrent attempts + * to reset the GPU could go undetected and this thread would miss + * the completion of the cache flush operation it is waiting for. + */ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Add the RESET_COMPLETED bit. If this bit is set, then the GPU has + * been reset which implies that any cache flush operation has been + * completed, too. + */ + { + irq_bits_to_check |= RESET_COMPLETED; } - /* reset gpu if time-out occurred */ - if (max_loops == 0) { + do { + unsigned int i; + + for (i = 0; i < 1000; i++) { + if (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)) & + irq_bits_to_check) { + completed = true; + break; + } + } + + diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); + } while ((diff < wait_time_ms) && !completed); + + if (!completed) { + char *irq_flag_name; + switch (irq_bit) { case CLEAN_CACHES_COMPLETED: irq_flag_name = "CLEAN_CACHES_COMPLETED"; @@ -68,24 +99,22 @@ static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit) } dev_err(kbdev->dev, - "Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n", + "Stuck waiting on %s bit, might be due to unstable GPU clk/pwr or possible faulty FPGA connector\n", irq_flag_name); if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); + return -EBUSY; } - /* Clear the interrupt bit. */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), irq_bit); return 0; } #if MALI_USE_CSF -#define U64_LO_MASK ((1ULL << 32) - 1) -#define U64_HI_MASK (~U64_LO_MASK) int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys, size_t nr_bytes, u32 flush_op) @@ -96,28 +125,24 @@ int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phy lockdep_assert_held(&kbdev->hwaccess_lock); /* 1. Clear the interrupt FLUSH_PA_RANGE_COMPLETED bit. */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), FLUSH_PA_RANGE_COMPLETED); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), FLUSH_PA_RANGE_COMPLETED); /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_PA_RANGE operation. */ start_pa = phys; end_pa = start_pa + nr_bytes - 1; - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO), start_pa & U64_LO_MASK); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_HI), - (start_pa & U64_HI_MASK) >> 32); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_LO), end_pa & U64_LO_MASK); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI), (end_pa & U64_HI_MASK) >> 32); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND_ARG0), start_pa); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND_ARG1), end_pa); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), flush_op); /* 3. Busy-wait irq status to be enabled. */ - ret = busy_wait_on_irq(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED); + ret = busy_wait_cache_operation(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED); return ret; } #endif /* MALI_USE_CSF */ -int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, - u32 flush_op) +int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, u32 flush_op) { int need_to_wake_up = 0; int ret = 0; @@ -138,33 +163,32 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, */ if (kbdev->cache_clean_in_progress) { /* disable irq first */ - u32 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask & ~CLEAN_CACHES_COMPLETED); + u32 irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); + + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED); /* busy wait irq status to be enabled */ - ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); + ret = busy_wait_cache_operation(kbdev, (u32)CLEAN_CACHES_COMPLETED); if (ret) return ret; /* merge pended command if there's any */ - flush_op = GPU_COMMAND_FLUSH_CACHE_MERGE( - kbdev->cache_clean_queued, flush_op); + flush_op = GPU_COMMAND_FLUSH_CACHE_MERGE(kbdev->cache_clean_queued, flush_op); /* enable wake up notify flag */ need_to_wake_up = 1; } else { /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), - CLEAN_CACHES_COMPLETED); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED); } /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHE operation. */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), flush_op); /* 3. Busy-wait irq status to be enabled. */ - ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); + ret = busy_wait_cache_operation(kbdev, (u32)CLEAN_CACHES_COMPLETED); if (ret) return ret; @@ -175,8 +199,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, return ret; } -void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, - u32 flush_op) +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, u32 flush_op) { u32 irq_mask; @@ -188,18 +211,17 @@ void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, * the cache. Instead, accumulate all cache clean operations * and trigger that immediately after this one finishes. */ - kbdev->cache_clean_queued = GPU_COMMAND_FLUSH_CACHE_MERGE( - kbdev->cache_clean_queued, flush_op); + kbdev->cache_clean_queued = + GPU_COMMAND_FLUSH_CACHE_MERGE(kbdev->cache_clean_queued, flush_op); return; } /* Enable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask | CLEAN_CACHES_COMPLETED); + irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), irq_mask | CLEAN_CACHES_COMPLETED); KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), flush_op); kbdev->cache_clean_in_progress = true; } @@ -234,7 +256,7 @@ void kbase_clean_caches_done(struct kbase_device *kbdev) * It might have already been done by kbase_gpu_cache_flush_and_busy_wait. */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED); if (kbdev->cache_clean_queued) { u32 pended_flush_op = kbdev->cache_clean_queued; @@ -242,12 +264,12 @@ void kbase_clean_caches_done(struct kbase_device *kbdev) kbdev->cache_clean_queued = 0; KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, pended_flush_op); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), pended_flush_op); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), pended_flush_op); } else { /* Disable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask & ~CLEAN_CACHES_COMPLETED); + irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED); kbase_gpu_cache_clean_wait_complete(kbdev); } @@ -271,21 +293,31 @@ static inline bool get_cache_clean_flag(struct kbase_device *kbdev) void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) { while (get_cache_clean_flag(kbdev)) { - wait_event_interruptible(kbdev->cache_clean_wait, - !kbdev->cache_clean_in_progress); + if (wait_event_interruptible(kbdev->cache_clean_wait, + !kbdev->cache_clean_in_progress)) + dev_warn(kbdev->dev, "Wait for cache clean is interrupted"); } } -int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, - unsigned int wait_timeout_ms) +int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, unsigned int wait_timeout_ms) { long remaining = msecs_to_jiffies(wait_timeout_ms); + int result = 0; while (remaining && get_cache_clean_flag(kbdev)) { remaining = wait_event_timeout(kbdev->cache_clean_wait, - !kbdev->cache_clean_in_progress, - remaining); + !kbdev->cache_clean_in_progress, remaining); } - return (remaining ? 0 : -ETIMEDOUT); + if (!remaining) { + dev_err(kbdev->dev, + "Cache clean timed out. Might be caused by unstable GPU clk/pwr or faulty system"); + + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); + + result = -ETIMEDOUT; + } + + return result; } diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h index 6f77592dfff2..c900f57e1ff6 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,12 +36,17 @@ struct kbase_device_init { char *err_mes; }; -int kbase_device_vinstr_init(struct kbase_device *kbdev); -void kbase_device_vinstr_term(struct kbase_device *kbdev); - int kbase_device_kinstr_prfcnt_init(struct kbase_device *kbdev); void kbase_device_kinstr_prfcnt_term(struct kbase_device *kbdev); +/** + * kbase_device_timeline_init() - Initialize kbase device for timeline + * @kbdev: Pointer to the kbase device + * + * This function must be called only when a kbase device is initialized. + * + * Return: 0 on success + */ int kbase_device_timeline_init(struct kbase_device *kbdev); void kbase_device_timeline_term(struct kbase_device *kbdev); @@ -66,6 +71,8 @@ void kbase_device_id_init(struct kbase_device *kbdev); * kbase_device_early_init - Perform any device-specific initialization. * @kbdev: Device pointer * + * This function must be called only when a kbase device is initialized. + * * Return: 0 on success, or an error code on failure. */ int kbase_device_early_init(struct kbase_device *kbdev); @@ -89,13 +96,3 @@ int kbase_device_late_init(struct kbase_device *kbdev); * @kbdev: Device pointer */ void kbase_device_late_term(struct kbase_device *kbdev); - -#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -/** - * kbase_is_register_accessible - Checks if register is accessible - * @offset: Register offset - * - * Return: true if the register is accessible, false otherwise. - */ -bool kbase_is_register_accessible(u32 offset); -#endif /* MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h deleted file mode 100644 index e7457ddb5534..000000000000 --- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ /dev/null @@ -1,381 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_GPU_REGMAP_CSF_H_ -#define _KBASE_GPU_REGMAP_CSF_H_ - -#include - -#if !MALI_USE_CSF && defined(__KERNEL__) -#error "Cannot be compiled with JM" -#endif - -/* GPU_CONTROL_MCU base address */ -#define GPU_CONTROL_MCU_BASE 0x3000 - -/* MCU_SUBSYSTEM base address */ -#define MCU_SUBSYSTEM_BASE 0x20000 - -/* IPA control registers */ -#define COMMAND 0x000 /* (WO) Command register */ -#define TIMER 0x008 /* (RW) Timer control register */ - -#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ -#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */ -#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */ -#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */ -#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */ -#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */ -#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */ -#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */ - -/* Accumulated counter values for CS hardware */ -#define VALUE_CSHW_BASE 0x100 -#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ - -/* Accumulated counter values for memory system */ -#define VALUE_MEMSYS_BASE 0x140 -#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ - -#define VALUE_TILER_BASE 0x180 -#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ - -#define VALUE_SHADER_BASE 0x1C0 -#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ - -#define AS_STATUS_AS_ACTIVE_INT 0x2 - -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull -/* Set to inner non-cacheable, outer-non-cacheable - * Setting defined by the alloc bits is ignored, but set to a valid encoding: - * - no-alloc on read - * - no alloc on write - */ -#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull -/* Set to shared memory, that is inner cacheable on ACE and inner or outer - * shared, otherwise inner non-cacheable. - * Outer cacheable if inner or outer shared, otherwise outer non-cacheable. - */ -#define AS_MEMATTR_AARCH64_SHARED 0x8ull - -/* Symbols for default MEMATTR to use - * Default is - HW implementation defined caching - */ -#define AS_MEMATTR_INDEX_DEFAULT 0 -#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 - -/* HW implementation defined caching */ -#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 -/* Force cache on */ -#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 -/* Write-alloc */ -#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 -/* Outer coherent, inner implementation defined policy */ -#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 -/* Outer coherent, write alloc inner */ -#define AS_MEMATTR_INDEX_OUTER_WA 4 -/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ -#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 -/* Normal memory, shared between MCU and Host */ -#define AS_MEMATTR_INDEX_SHARED 6 - -/* Configuration bits for the CSF. */ -#define CSF_CONFIG 0xF00 - -/* CSF_CONFIG register */ -#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2 - -/* GPU control registers */ -#define CORE_FEATURES 0x008 /* () Shader Core Features */ -#define MCU_CONTROL 0x700 -#define MCU_STATUS 0x704 - -#define MCU_CNTRL_ENABLE (1 << 0) -#define MCU_CNTRL_AUTO (1 << 1) -#define MCU_CNTRL_DISABLE (0) - -#define MCU_CNTRL_DOORBELL_DISABLE_SHIFT (31) -#define MCU_CNTRL_DOORBELL_DISABLE_MASK (1 << MCU_CNTRL_DOORBELL_DISABLE_SHIFT) - -#define MCU_STATUS_HALTED (1 << 1) - -#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12) -#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT) -#define L2_CONFIG_PBHA_HWU_GET(reg_val) \ - (((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT) -#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \ - (((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \ - (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK)) - -/* JOB IRQ flags */ -#define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */ - -/* GPU_COMMAND codes */ -#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */ -#define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */ -#define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */ -#define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */ -#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */ -#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */ -#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */ -#define GPU_COMMAND_CODE_FLUSH_PA_RANGE 0x08 /* Flush the GPU caches for a physical range, TITX */ - -/* GPU_COMMAND_RESET payloads */ - -/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state. - * Power domains will remain powered on. - */ -#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00 - -/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and - * idle state. - */ -#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01 - -/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave - * the system bus in an inconsistent state. Use only as a last resort when nothing else works. - */ -#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02 - -/* GPU_COMMAND_TIME payloads */ -#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */ -#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */ - -/* GPU_COMMAND_FLUSH_CACHES payloads bits for L2 caches */ -#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE 0x000 /* No flush */ -#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN 0x001 /* CLN only */ -#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */ - -/* GPU_COMMAND_FLUSH_CACHES payloads bits for Load-store caches */ -#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE 0x000 /* No flush */ -#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */ -#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */ - -/* GPU_COMMAND_FLUSH_CACHES payloads bits for Other caches */ -#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE 0x000 /* No flush */ -#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */ - -/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for flush modes */ -#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_NONE 0x00 /* No flush */ -#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN 0x01 /* CLN only */ -#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_INVALIDATE 0x02 /* INV only */ -#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE 0x03 /* CLN + INV */ - -/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for which caches should be the target of the command */ -#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE 0x10 /* Other caches */ -#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE 0x20 /* Load-store caches */ -#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE 0x40 /* L2 caches */ - -/* GPU_COMMAND command + payload */ -#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \ - ((__u32)opcode | ((__u32)payload << 8)) - -/* Final GPU_COMMAND form */ -/* No operation, nothing happens */ -#define GPU_COMMAND_NOP \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0) - -/* Stop all external bus interfaces, and then reset the entire GPU. */ -#define GPU_COMMAND_SOFT_RESET \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET) - -/* Immediately reset the entire GPU. */ -#define GPU_COMMAND_HARD_RESET \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET) - -/* Starts the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CYCLE_COUNT_START \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE) - -/* Stops the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CYCLE_COUNT_STOP \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE) - -/* Clean and invalidate L2 cache (Equivalent to FLUSH_PT) */ -#define GPU_COMMAND_CACHE_CLN_INV_L2 \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ - (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE | \ - GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) - -/* Clean and invalidate L2 and LSC caches (Equivalent to FLUSH_MEM) */ -#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ - (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) - -/* Clean and invalidate L2, LSC, and Other caches */ -#define GPU_COMMAND_CACHE_CLN_INV_FULL \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ - (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE)) - -/* Clean and invalidate only LSC cache */ -#define GPU_COMMAND_CACHE_CLN_INV_LSC \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ - (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE | \ - GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) - -/* Clean and invalidate physical range L2 cache (equivalent to FLUSH_PT) */ -#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2 \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ - (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) - -/* Clean and invalidate physical range L2 and LSC cache (equivalent to FLUSH_MEM) */ -#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ - (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \ - GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) - -/* Clean and invalidate physical range L2, LSC and Other caches */ -#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ - (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE | \ - GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \ - GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) - -/* Merge cache flush commands */ -#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2)) - -/* Places the GPU in protected mode */ -#define GPU_COMMAND_SET_PROTECTED_MODE \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0) - -/* Halt CSF */ -#define GPU_COMMAND_FINISH_HALT \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0) - -/* Clear GPU faults */ -#define GPU_COMMAND_CLEAR_FAULT \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0) - -/* End Command Values */ - -/* GPU_FAULTSTATUS register */ -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul) -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ - (((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) \ - >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 -#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \ - (0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT) - -#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10 -#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \ - (1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT) - -#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11 -#define GPU_FAULTSTATUS_JASID_VALID_FLAG \ - (1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT) - -#define GPU_FAULTSTATUS_JASID_SHIFT 12 -#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT) -#define GPU_FAULTSTATUS_JASID_GET(reg_val) \ - (((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT) -#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \ - (((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) | \ - (((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK)) - -#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16 -#define GPU_FAULTSTATUS_SOURCE_ID_MASK \ - (0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT) -/* End GPU_FAULTSTATUS register */ - -/* GPU_FAULTSTATUS_ACCESS_TYPE values */ -#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0 -#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1 -#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2 -#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3 -/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */ - -/* Implementation-dependent exception codes used to indicate CSG - * and CS errors that are not specified in the specs. - */ -#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70) -#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71) -#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72) - -/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */ -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00 -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80 -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88 -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89 -#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A -/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */ - -#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10) -#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) -#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \ - (((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) -#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \ - (((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) | \ - (((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK)) - -/* IRQ flags */ -#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */ -#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ -#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ -#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ -#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */ -#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */ -#define FLUSH_PA_RANGE_COMPLETED \ - (1 << 20) /* Set when a physical range cache clean operation has completed. */ - -/* - * In Debug build, - * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask interupts sources of GPU_IRQ - * by writing it onto GPU_IRQ_CLEAR/MASK registers. - * - * In Release build, - * GPU_IRQ_REG_COMMON is used. - * - * Note: - * CLEAN_CACHES_COMPLETED - Used separately for cache operation. - * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON - * as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen - */ -#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \ - | POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ) - -/* GPU_FEATURES register */ -#define GPU_FEATURES_RAY_TRACING_SHIFT GPU_U(2) -#define GPU_FEATURES_RAY_TRACING_MASK (GPU_U(0x1) << GPU_FEATURES_RAY_TRACING_SHIFT) -#define GPU_FEATURES_RAY_TRACING_GET(reg_val) \ - (((reg_val)&GPU_FEATURES_RAY_TRACING_MASK) >> GPU_FEATURES_RAY_TRACING_SHIFT) -/* End of GPU_FEATURES register */ - -#endif /* _KBASE_GPU_REGMAP_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h deleted file mode 100644 index f86f493c7f7e..000000000000 --- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ /dev/null @@ -1,276 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ -#ifndef _KBASE_GPU_REGMAP_JM_H_ -#define _KBASE_GPU_REGMAP_JM_H_ - -#if MALI_USE_CSF && defined(__KERNEL__) -#error "Cannot be compiled with CSF" -#endif - -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull -/* Set to inner non-cacheable, outer-non-cacheable - * Setting defined by the alloc bits is ignored, but set to a valid encoding: - * - no-alloc on read - * - no alloc on write - */ -#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull - -/* Symbols for default MEMATTR to use - * Default is - HW implementation defined caching - */ -#define AS_MEMATTR_INDEX_DEFAULT 0 -#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 - -/* HW implementation defined caching */ -#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 -/* Force cache on */ -#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 -/* Write-alloc */ -#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 -/* Outer coherent, inner implementation defined policy */ -#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 -/* Outer coherent, write alloc inner */ -#define AS_MEMATTR_INDEX_OUTER_WA 4 -/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ -#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 - -/* GPU control registers */ - -#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ -#define JS_PRESENT 0x01C /* (RO) Job slots present */ - -#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory - * region base address, low word - */ -#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory - * region base address, high word - */ -#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter - * configuration - */ -#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable - * flags for Job Manager - */ -#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable - * flags for shader cores - */ -#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable - * flags for tiler - */ -#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable - * flags for MMU/L2 cache - */ - -#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ -#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ -#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ -#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ -#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ -#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ -#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ -#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ -#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ -#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ -#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ -#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ -#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ -#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ -#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ -#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ - -#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) - -#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */ - -/* Job control registers */ - -#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ -#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ - -#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ -#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ -#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ -#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ -#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ -#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ -#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ -#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ -#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ -#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ -#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ -#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ -#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ -#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ -#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ - -#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/ - -#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ -#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ - -#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */ - -#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ - -/* No JM-specific MMU control registers */ -/* No JM-specific MMU address space control registers */ - -/* JS_COMMAND register commands */ -#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ -#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ -#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ -#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ -#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ -#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ -#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ -#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ - -#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ - -/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ -#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) -#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) -#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8) -#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) -#define JS_CONFIG_START_MMU (1u << 10) -#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) -#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION -#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) -#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) -#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) -#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) -#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) - -/* JS_XAFFINITY register values */ -#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) -#define JS_XAFFINITY_TILER_ENABLE (1u << 8) -#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) - -/* JS_STATUS register values */ - -/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. - * The values are separated to avoid dependency of userspace and kernel code. - */ - -/* Group of values representing the job status instead of a particular fault */ -#define JS_STATUS_NO_EXCEPTION_BASE 0x00 -#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ -#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ -#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ - -/* General fault values */ -#define JS_STATUS_FAULT_BASE 0x40 -#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ -#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ -#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ -#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ -#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ -#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ - -/* Instruction or data faults */ -#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 -#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ -#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ -#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ -#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ -#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ -#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ -#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ -/* NOTE: No fault with 0x57 code defined in spec. */ -#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ -#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ -#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ - -/* Other faults */ -#define JS_STATUS_MEMORY_FAULT_BASE 0x60 -#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ -#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ - -/* JS_FEATURES register */ -#define JS_FEATURE_NULL_JOB (1u << 1) -#define JS_FEATURE_SET_VALUE_JOB (1u << 2) -#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) -#define JS_FEATURE_COMPUTE_JOB (1u << 4) -#define JS_FEATURE_VERTEX_JOB (1u << 5) -#define JS_FEATURE_GEOMETRY_JOB (1u << 6) -#define JS_FEATURE_TILER_JOB (1u << 7) -#define JS_FEATURE_FUSED_JOB (1u << 8) -#define JS_FEATURE_FRAGMENT_JOB (1u << 9) - -/* JM_CONFIG register */ -#define JM_TIMESTAMP_OVERRIDE (1ul << 0) -#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) -#define JM_JOB_THROTTLE_ENABLE (1ul << 2) -#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) -#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) -#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) - -/* GPU_COMMAND values */ -#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ -#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ -#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ -#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ -#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ -#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ -#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ -#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ - -/* GPU_COMMAND cache flush alias to CSF command payload */ -#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES -#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES -#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES -#define GPU_COMMAND_CACHE_CLN_INV_LSC GPU_COMMAND_CLEAN_INV_CACHES - -/* Merge cache flush commands */ -#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \ - ((cmd1) > (cmd2) ? (cmd1) : (cmd2)) - -/* IRQ flags */ -#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ -#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ -#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ -#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ -#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ -#define FLUSH_PA_RANGE_COMPLETED \ - (1 << 20) /* Set when a physical range cache clean operation has completed. */ - -/* - * In Debug build, - * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ - * by writing it onto GPU_IRQ_CLEAR/MASK registers. - * - * In Release build, - * GPU_IRQ_REG_COMMON is used. - * - * Note: - * CLEAN_CACHES_COMPLETED - Used separately for cache operation. - */ -#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ - | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) - -#endif /* _KBASE_GPU_REGMAP_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c index 8a84ef54100a..eee670f896bc 100644 --- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c +++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,7 +32,7 @@ const char *kbase_gpu_access_type_name(u32 fault_status) return "READ"; case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: return "WRITE"; - case AS_FAULTSTATUS_ACCESS_TYPE_EX: + case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE: return "EXECUTE"; default: WARN_ON(1); diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h deleted file mode 100644 index 6cef2bdd11e0..000000000000 --- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h +++ /dev/null @@ -1,637 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_GPU_REGMAP_H_ -#define _KBASE_GPU_REGMAP_H_ - -#include -#include -#include -#if MALI_USE_CSF -#include "backend/mali_kbase_gpu_regmap_csf.h" -#else -#include "backend/mali_kbase_gpu_regmap_jm.h" -#endif - -/* GPU_U definition */ -#ifdef __ASSEMBLER__ -#define GPU_U(x) x -#define GPU_UL(x) x -#define GPU_ULL(x) x -#else -#define GPU_U(x) x##u -#define GPU_UL(x) x##ul -#define GPU_ULL(x) x##ull -#endif /* __ASSEMBLER__ */ - -/* Begin Register Offsets */ -/* GPU control registers */ - -#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ -#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ -#define MEM_FEATURES 0x010 /* (RO) Memory system features */ -#define MMU_FEATURES 0x014 /* (RO) MMU features */ -#define AS_PRESENT 0x018 /* (RO) Address space slots present */ -#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ -#define GPU_IRQ_MASK 0x028 /* (RW) */ - -#define GPU_COMMAND 0x030 /* (WO) */ -#define GPU_STATUS 0x034 /* (RO) */ - -#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ - -#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ -#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ -#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ - -#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ - -#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ -#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core - * supergroup are l2 coherent - */ - -#define PWR_KEY 0x050 /* (WO) Power manager key register */ -#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ -#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ -#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */ -#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */ -#define PRFCNT_FEATURES 0x068 /* (RO) Performance counter features */ -#define TIMESTAMP_OFFSET_LO 0x088 /* (RW) Global time stamp offset, low word */ -#define TIMESTAMP_OFFSET_HI 0x08C /* (RW) Global time stamp offset, high word */ -#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ -#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ -#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ -#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ - -#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ -#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ -#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ -#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ -#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ - -#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ -#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ -#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ -#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ - -#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) - -#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */ -#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */ -#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */ -#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */ - -#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ -#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ - -#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ -#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ - -#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ -#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ - -#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ -#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ - -#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ -#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ - -#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */ - -#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ -#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ - -#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ -#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ - -#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ -#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ - -#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ -#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ - -#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ -#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ - -#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ -#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ - -#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ -#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ - -#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ -#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ - -#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */ -#define ASN_HASH(n) (ASN_HASH_0 + (n)*4) -#define ASN_HASH_COUNT 3 - -#define SYSC_ALLOC0 0x0340 /* (RW) System cache allocation hint from source ID */ -#define SYSC_ALLOC(n) (SYSC_ALLOC0 + (n)*4) -#define SYSC_ALLOC_COUNT 8 - -#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ -#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ - -#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ -#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ - -#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ -#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ - -#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ -#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ - -#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ - -#define AMBA_FEATURES 0x300 /* (RO) AMBA bus supported features */ -#define AMBA_ENABLE 0x304 /* (RW) AMBA features enable */ - -#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ -#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ -#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ - -/* Job control registers */ - -#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ - -/* MMU control registers */ - -#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ -#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ -#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ -#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ -#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ -#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ -#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ -#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ -#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ -#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ -#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ -#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ -#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ -#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ -#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ - -/* MMU address space control registers */ -#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ -#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ -#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ -#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ -#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ -#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ - -/* (RO) Secondary fault address for address space n, low word */ -#define AS_FAULTEXTRA_LO 0x38 -/* (RO) Secondary fault address for address space n, high word */ -#define AS_FAULTEXTRA_HI 0x3C - -/* End Register Offsets */ - -#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) - -/* - * MMU_IRQ_RAWSTAT register values. Values are valid also for - * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. - */ - -#define MMU_PAGE_FAULT_FLAGS 16 - -/* Macros returning a bitmask to retrieve page fault or bus error flags from - * MMU registers - */ -#define MMU_PAGE_FAULT(n) (1UL << (n)) -#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) - -/* - * Begin AARCH64 MMU TRANSTAB register values - */ -#define MMU_HW_OUTA_BITS 40 -#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) - -/* - * Begin MMU STATUS register values - */ -#define AS_STATUS_AS_ACTIVE 0x01 - -#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) - -#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 -#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 - -#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 -#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) -#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) - -#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) -#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) -#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) -#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) - -#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 -#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) -#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) - -#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT (0) -#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK \ - ((0xFF) << PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) -#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(reg_val) \ - (((reg_val)&PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK) >> \ - PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) - -/* - * Begin MMU TRANSCFG register values - */ -#define AS_TRANSCFG_ADRMODE_LEGACY 0 -#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 -#define AS_TRANSCFG_ADRMODE_IDENTITY 2 -#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 -#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 - -#define AS_TRANSCFG_ADRMODE_MASK 0xF - -/* - * Begin TRANSCFG register values - */ -#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) -#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) -#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) - -#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) -#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) -#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) -#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) - -/* - * Begin Command Values - */ - -/* AS_COMMAND register commands */ -#define AS_COMMAND_NOP 0x00 /* NOP Operation */ -#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ -#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ -#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ -/* Flush all L2 caches then issue a flush region command to all MMUs */ -#define AS_COMMAND_FLUSH_PT 0x04 -/* Wait for memory accesses to complete, flush all the L1s cache then flush all - * L2 caches then issue a flush region command to all MMUs - */ -#define AS_COMMAND_FLUSH_MEM 0x05 - -/* AS_LOCKADDR register */ -#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0) -#define AS_LOCKADDR_LOCKADDR_SIZE_MASK \ - (GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) -#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val) \ - (((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >> \ - AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) -#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value) \ - (((reg_val) & ~AS_LOCKADDR_LOCKADDR_SIZE_MASK) | \ - (((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & \ - AS_LOCKADDR_LOCKADDR_SIZE_MASK)) -#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12) -#define AS_LOCKADDR_LOCKADDR_BASE_MASK \ - (GPU_ULL(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) -#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \ - (((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \ - AS_LOCKADDR_LOCKADDR_BASE_SHIFT) -#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value) \ - (((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \ - (((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \ - AS_LOCKADDR_LOCKADDR_BASE_MASK)) -#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT (6) -#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK ((0xF) << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) -#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(reg_val, value) \ - (((reg_val) & ~AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) | \ - ((value << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK)) - -/* GPU_STATUS values */ -#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ -#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ -#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ - -/* PRFCNT_CONFIG register values */ -#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ -#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ -#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ - -/* The performance counters are disabled. */ -#define PRFCNT_CONFIG_MODE_OFF 0 -/* The performance counters are enabled, but are only written out when a - * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. - */ -#define PRFCNT_CONFIG_MODE_MANUAL 1 -/* The performance counters are enabled, and are written out each time a tile - * finishes rendering. - */ -#define PRFCNT_CONFIG_MODE_TILE 2 - -/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_WRITE_ALLOC 0x8Dull - -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull -/* There is no LPAE support for non-cacheable, since the memory type is always - * write-back. - * Marking this setting as reserved for LPAE - */ -#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED - -/* L2_MMU_CONFIG register */ -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) - -/* End L2_MMU_CONFIG register */ - -/* THREAD_* registers */ - -/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ -#define IMPLEMENTATION_UNSPECIFIED 0 -#define IMPLEMENTATION_SILICON 1 -#define IMPLEMENTATION_FPGA 2 -#define IMPLEMENTATION_MODEL 3 - -/* Default values when registers are not supported by the implemented hardware */ -#define THREAD_MT_DEFAULT 256 -#define THREAD_MWS_DEFAULT 256 -#define THREAD_MBS_DEFAULT 256 -#define THREAD_MR_DEFAULT 1024 -#define THREAD_MTQ_DEFAULT 4 -#define THREAD_MTGS_DEFAULT 10 - -/* End THREAD_* registers */ - -/* SHADER_CONFIG register */ -#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) -#define SC_TLS_HASH_ENABLE (1ul << 17) -#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) -#define SC_VAR_ALGORITHM (1ul << 29) -/* End SHADER_CONFIG register */ - -/* TILER_CONFIG register */ -#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) -/* End TILER_CONFIG register */ - -/* L2_CONFIG register */ -#define L2_CONFIG_SIZE_SHIFT 16 -#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) -#define L2_CONFIG_HASH_SHIFT 24 -#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) -#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24 -#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) -/* End L2_CONFIG register */ - -/* AMBA_FEATURES register */ -#define AMBA_FEATURES_ACE_LITE_SHIFT GPU_U(0) -#define AMBA_FEATURES_ACE_LITE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_LITE_SHIFT) -#define AMBA_FEATURES_ACE_LITE_GET(reg_val) \ - (((reg_val)&AMBA_FEATURES_ACE_LITE_MASK) >> \ - AMBA_FEATURES_ACE_LITE_SHIFT) -#define AMBA_FEATURES_ACE_LITE_SET(reg_val, value) \ - (((reg_val) & ~AMBA_FEATURES_ACE_LITE_MASK) | \ - (((value) << AMBA_FEATURES_ACE_LITE_SHIFT) & \ - AMBA_FEATURES_ACE_LITE_MASK)) -#define AMBA_FEATURES_ACE_SHIFT GPU_U(1) -#define AMBA_FEATURES_ACE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_SHIFT) -#define AMBA_FEATURES_ACE_GET(reg_val) \ - (((reg_val)&AMBA_FEATURES_ACE_MASK) >> AMBA_FEATURES_ACE_SHIFT) -#define AMBA_FEATURES_ACE_SET(reg_val, value) \ - (((reg_val) & ~AMBA_FEATURES_ACE_MASK) | \ - (((value) << AMBA_FEATURES_ACE_SHIFT) & AMBA_FEATURES_ACE_MASK)) -#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5) -#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK \ - (GPU_U(0x1) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) -#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_GET(reg_val) \ - (((reg_val)&AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) >> \ - AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) -#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \ - (((reg_val) & ~AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) | \ - (((value) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) & \ - AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK)) -#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6) -#define AMBA_FEATURES_INVALIDATE_HINT_MASK \ - (GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) -#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val) \ - (((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >> \ - AMBA_FEATURES_INVALIDATE_HINT_SHIFT) -#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value) \ - (((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) | \ - (((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) & \ - AMBA_FEATURES_INVALIDATE_HINT_MASK)) - -/* AMBA_ENABLE register */ -#define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0) -#define AMBA_ENABLE_COHERENCY_PROTOCOL_MASK \ - (GPU_U(0x1F) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) -#define AMBA_ENABLE_COHERENCY_PROTOCOL_GET(reg_val) \ - (((reg_val)&AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) >> \ - AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) -#define AMBA_ENABLE_COHERENCY_PROTOCOL_SET(reg_val, value) \ - (((reg_val) & ~AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) | \ - (((value) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) & \ - AMBA_ENABLE_COHERENCY_PROTOCOL_MASK)) -/* AMBA_ENABLE_coherency_protocol values */ -#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE_LITE 0x0 -#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE 0x1 -#define AMBA_ENABLE_COHERENCY_PROTOCOL_NO_COHERENCY 0x1F -/* End of AMBA_ENABLE_coherency_protocol values */ -#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5) -#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK \ - (GPU_U(0x1) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) -#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_GET(reg_val) \ - (((reg_val)&AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) >> \ - AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) -#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \ - (((reg_val) & ~AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) | \ - (((value) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) & \ - AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK)) -#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6) -#define AMBA_ENABLE_INVALIDATE_HINT_MASK \ - (GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) -#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val) \ - (((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >> \ - AMBA_ENABLE_INVALIDATE_HINT_SHIFT) -#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value) \ - (((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) | \ - (((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) & \ - AMBA_ENABLE_INVALIDATE_HINT_MASK)) - -/* IDVS_GROUP register */ -#define IDVS_GROUP_SIZE_SHIFT (16) -#define IDVS_GROUP_MAX_SIZE (0x3F) - -/* SYSC_ALLOC read IDs */ -#define SYSC_ALLOC_ID_R_OTHER 0x00 -#define SYSC_ALLOC_ID_R_CSF 0x02 -#define SYSC_ALLOC_ID_R_MMU 0x04 -#define SYSC_ALLOC_ID_R_TILER_VERT 0x08 -#define SYSC_ALLOC_ID_R_TILER_PTR 0x09 -#define SYSC_ALLOC_ID_R_TILER_INDEX 0x0A -#define SYSC_ALLOC_ID_R_TILER_OTHER 0x0B -#define SYSC_ALLOC_ID_R_IC 0x10 -#define SYSC_ALLOC_ID_R_ATTR 0x11 -#define SYSC_ALLOC_ID_R_SCM 0x12 -#define SYSC_ALLOC_ID_R_FSDC 0x13 -#define SYSC_ALLOC_ID_R_VL 0x14 -#define SYSC_ALLOC_ID_R_PLR 0x15 -#define SYSC_ALLOC_ID_R_TEX 0x18 -#define SYSC_ALLOC_ID_R_LSC 0x1c - -/* SYSC_ALLOC write IDs */ -#define SYSC_ALLOC_ID_W_OTHER 0x00 -#define SYSC_ALLOC_ID_W_CSF 0x02 -#define SYSC_ALLOC_ID_W_PCB 0x07 -#define SYSC_ALLOC_ID_W_TILER_PTR 0x09 -#define SYSC_ALLOC_ID_W_TILER_VERT_PLIST 0x0A -#define SYSC_ALLOC_ID_W_TILER_OTHER 0x0B -#define SYSC_ALLOC_ID_W_L2_EVICT 0x0C -#define SYSC_ALLOC_ID_W_L2_FLUSH 0x0D -#define SYSC_ALLOC_ID_W_TIB_COLOR 0x10 -#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH 0x11 -#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB 0x12 -#define SYSC_ALLOC_ID_W_TIB_CRC 0x13 -#define SYSC_ALLOC_ID_W_TIB_DS 0x14 -#define SYSC_ALLOC_ID_W_TIB_DS_AFBCH 0x15 -#define SYSC_ALLOC_ID_W_TIB_DS_AFBCB 0x16 -#define SYSC_ALLOC_ID_W_LSC 0x1C - -/* SYSC_ALLOC values */ -#define SYSC_ALLOC_L2_ALLOC 0x0 -#define SYSC_ALLOC_NEVER_ALLOC 0x2 -#define SYSC_ALLOC_ALWAYS_ALLOC 0x3 -#define SYSC_ALLOC_PTL_ALLOC 0x4 -#define SYSC_ALLOC_L2_PTL_ALLOC 0x5 - -/* SYSC_ALLOC register */ -#define SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT (0) -#define SYSC_ALLOC_R_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) -#define SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg_val) \ - (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC0_MASK) >> \ - SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) -#define SYSC_ALLOC_R_SYSC_ALLOC0_SET(reg_val, value) \ - (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC0_MASK) | \ - (((value) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) & \ - SYSC_ALLOC_R_SYSC_ALLOC0_MASK)) -/* End of SYSC_ALLOC_R_SYSC_ALLOC0 values */ -#define SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT (4) -#define SYSC_ALLOC_W_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) -#define SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg_val) \ - (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC0_MASK) >> \ - SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) -#define SYSC_ALLOC_W_SYSC_ALLOC0_SET(reg_val, value) \ - (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC0_MASK) | \ - (((value) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) & \ - SYSC_ALLOC_W_SYSC_ALLOC0_MASK)) -/* End of SYSC_ALLOC_W_SYSC_ALLOC0 values */ -#define SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT (8) -#define SYSC_ALLOC_R_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) -#define SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg_val) \ - (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC1_MASK) >> \ - SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) -#define SYSC_ALLOC_R_SYSC_ALLOC1_SET(reg_val, value) \ - (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC1_MASK) | \ - (((value) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) & \ - SYSC_ALLOC_R_SYSC_ALLOC1_MASK)) -/* End of SYSC_ALLOC_R_SYSC_ALLOC1 values */ -#define SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT (12) -#define SYSC_ALLOC_W_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) -#define SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg_val) \ - (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC1_MASK) >> \ - SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) -#define SYSC_ALLOC_W_SYSC_ALLOC1_SET(reg_val, value) \ - (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC1_MASK) | \ - (((value) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) & \ - SYSC_ALLOC_W_SYSC_ALLOC1_MASK)) -/* End of SYSC_ALLOC_W_SYSC_ALLOC1 values */ -#define SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT (16) -#define SYSC_ALLOC_R_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) -#define SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg_val) \ - (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC2_MASK) >> \ - SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) -#define SYSC_ALLOC_R_SYSC_ALLOC2_SET(reg_val, value) \ - (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC2_MASK) | \ - (((value) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) & \ - SYSC_ALLOC_R_SYSC_ALLOC2_MASK)) -/* End of SYSC_ALLOC_R_SYSC_ALLOC2 values */ -#define SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT (20) -#define SYSC_ALLOC_W_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) -#define SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg_val) \ - (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC2_MASK) >> \ - SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) -#define SYSC_ALLOC_W_SYSC_ALLOC2_SET(reg_val, value) \ - (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC2_MASK) | \ - (((value) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) & \ - SYSC_ALLOC_W_SYSC_ALLOC2_MASK)) -/* End of SYSC_ALLOC_W_SYSC_ALLOC2 values */ -#define SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT (24) -#define SYSC_ALLOC_R_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) -#define SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg_val) \ - (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC3_MASK) >> \ - SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) -#define SYSC_ALLOC_R_SYSC_ALLOC3_SET(reg_val, value) \ - (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC3_MASK) | \ - (((value) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) & \ - SYSC_ALLOC_R_SYSC_ALLOC3_MASK)) -/* End of SYSC_ALLOC_R_SYSC_ALLOC3 values */ -#define SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT (28) -#define SYSC_ALLOC_W_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) -#define SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg_val) \ - (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC3_MASK) >> \ - SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) -#define SYSC_ALLOC_W_SYSC_ALLOC3_SET(reg_val, value) \ - (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC3_MASK) | \ - (((value) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) & \ - SYSC_ALLOC_W_SYSC_ALLOC3_MASK)) -/* End of SYSC_ALLOC_W_SYSC_ALLOC3 values */ - -/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */ -#ifdef CONFIG_MALI_BIFROST_DEBUG -#undef GPU_IRQ_REG_ALL -#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) -#endif /* CONFIG_MALI_BIFROST_DEBUG */ - -#endif /* _KBASE_GPU_REGMAP_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/Kbuild b/drivers/gpu/arm/bifrost/hw_access/Kbuild new file mode 100644 index 000000000000..c4f83dd11369 --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/Kbuild @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2023 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +bifrost_kbase-y += hw_access/mali_kbase_hw_access.o + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + bifrost_kbase-y += hw_access/regmap/mali_kbase_regmap_csf.o +else + bifrost_kbase-y += hw_access/regmap/mali_kbase_regmap_jm.o +endif + +ifeq ($(CONFIG_MALI_REAL_HW), y) +bifrost_kbase-y += hw_access/backend/mali_kbase_hw_access_real_hw.o +else +bifrost_kbase-y += hw_access/backend/mali_kbase_hw_access_model_linux.o +endif diff --git a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c new file mode 100644 index 000000000000..ca1ccbfb3dbe --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include + +#include +#include +#include +#include + +u64 kbase_reg_get_gpu_id(struct kbase_device *kbdev) +{ + unsigned long flags; + u32 val[2] = { 0 }; + + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + midgard_model_read_reg(kbdev->model, GPU_CONTROL_REG(GPU_ID), &val[0]); + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); + + + return (u64)val[0] | ((u64)val[1] << 32); +} + +u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum) +{ + unsigned long flags; + u32 val = 0; + u32 offset; + + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return 0; + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, + KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_32_BIT))) + return 0; + + offset = kbdev->regmap.regs[reg_enum] - kbdev->reg; + + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + midgard_model_read_reg(kbdev->model, offset, &val); + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); + + return val; +} +KBASE_EXPORT_TEST_API(kbase_reg_read32); + +u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum) +{ + unsigned long flags; + u32 val32[2] = { 0 }; + u32 offset; + + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return 0; + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, + KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) + return 0; + + offset = kbdev->regmap.regs[reg_enum] - kbdev->reg; + + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + midgard_model_read_reg(kbdev->model, offset, &val32[0]); + midgard_model_read_reg(kbdev->model, offset + 4, &val32[1]); + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); + + return (u64)val32[0] | ((u64)val32[1] << 32); +} +KBASE_EXPORT_TEST_API(kbase_reg_read64); + +u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum) +{ + unsigned long flags; + u32 hi1 = 0, hi2 = 0, lo = 0; + u32 offset; + + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return 0; + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, + KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) + return 0; + + offset = kbdev->regmap.regs[reg_enum] - kbdev->reg; + + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + do { + midgard_model_read_reg(kbdev->model, offset + 4, &hi1); + midgard_model_read_reg(kbdev->model, offset, &lo); + midgard_model_read_reg(kbdev->model, offset + 4, &hi2); + } while (hi1 != hi2); + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); + + return lo | (((u64)hi1) << 32); +} +KBASE_EXPORT_TEST_API(kbase_reg_read64_coherent); + +void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value) +{ + unsigned long flags; + u32 offset; + + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return; + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, + KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_32_BIT))) + return; + + offset = kbdev->regmap.regs[reg_enum] - kbdev->reg; + + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + midgard_model_write_reg(kbdev->model, offset, value); + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); +} +KBASE_EXPORT_TEST_API(kbase_reg_write32); + +void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value) +{ + unsigned long flags; + u32 offset; + + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return; + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, + KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_64_BIT))) + return; + + offset = kbdev->regmap.regs[reg_enum] - kbdev->reg; + + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + midgard_model_write_reg(kbdev->model, offset, value & 0xFFFFFFFF); + midgard_model_write_reg(kbdev->model, offset + 4, value >> 32); + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); +} +KBASE_EXPORT_TEST_API(kbase_reg_write64); diff --git a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c new file mode 100644 index 000000000000..f4afbf55e312 --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include + +#include +#include + +u64 kbase_reg_get_gpu_id(struct kbase_device *kbdev) +{ + u32 val[2] = { 0 }; + + val[0] = readl(kbdev->reg); + + + return (u64)val[0] | ((u64)val[1] << 32); +} + +u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum) +{ + u32 val; + + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return 0; + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, + KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_32_BIT))) + return 0; + + val = readl(kbdev->regmap.regs[reg_enum]); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->regmap.regs[reg_enum], val, 0); +#endif /* CONFIG_DEBUG_FS */ + + dev_dbg(kbdev->dev, "r32: reg %08x val %08x", + (u32)(kbdev->regmap.regs[reg_enum] - kbdev->reg), val); + + return val; +} +KBASE_EXPORT_TEST_API(kbase_reg_read32); + +u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum) +{ + u64 val; + + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return 0; + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, + KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) + return 0; + + val = (u64)readl(kbdev->regmap.regs[reg_enum]) | + ((u64)readl(kbdev->regmap.regs[reg_enum] + 4) << 32); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(kbdev->io_history.enabled)) { + kbase_io_history_add(&kbdev->io_history, kbdev->regmap.regs[reg_enum], (u32)val, 0); + kbase_io_history_add(&kbdev->io_history, kbdev->regmap.regs[reg_enum] + 4, + (u32)(val >> 32), 0); + } +#endif /* CONFIG_DEBUG_FS */ + + dev_dbg(kbdev->dev, "r64: reg %08x val %016llx", + (u32)(kbdev->regmap.regs[reg_enum] - kbdev->reg), val); + + return val; +} +KBASE_EXPORT_TEST_API(kbase_reg_read64); + +u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum) +{ + u64 val; +#if !IS_ENABLED(CONFIG_MALI_64BIT_HW_ACCESS) + u32 hi1, hi2, lo; +#endif + + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return 0; + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, + KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) + return 0; + + do { + hi1 = readl(kbdev->regmap.regs[reg_enum] + 4); + lo = readl(kbdev->regmap.regs[reg_enum]); + hi2 = readl(kbdev->regmap.regs[reg_enum] + 4); + } while (hi1 != hi2); + + val = lo | (((u64)hi1) << 32); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(kbdev->io_history.enabled)) { + kbase_io_history_add(&kbdev->io_history, kbdev->regmap.regs[reg_enum], (u32)val, 0); + kbase_io_history_add(&kbdev->io_history, kbdev->regmap.regs[reg_enum] + 4, + (u32)(val >> 32), 0); + } +#endif /* CONFIG_DEBUG_FS */ + + dev_dbg(kbdev->dev, "r64: reg %08x val %016llx", + (u32)(kbdev->regmap.regs[reg_enum] - kbdev->reg), val); + + return val; +} +KBASE_EXPORT_TEST_API(kbase_reg_read64_coherent); + +void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value) +{ + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return; + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, + KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_32_BIT))) + return; + + writel(value, kbdev->regmap.regs[reg_enum]); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->regmap.regs[reg_enum], value, 1); +#endif /* CONFIG_DEBUG_FS */ + + dev_dbg(kbdev->dev, "w32: reg %08x val %08x", + (u32)(kbdev->regmap.regs[reg_enum] - kbdev->reg), value); +} +KBASE_EXPORT_TEST_API(kbase_reg_write32); + +void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value) +{ + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return; + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, + KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_64_BIT))) + return; + + writel(value & 0xFFFFFFFF, kbdev->regmap.regs[reg_enum]); + writel(value >> 32, kbdev->regmap.regs[reg_enum] + 4); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(kbdev->io_history.enabled)) { + kbase_io_history_add(&kbdev->io_history, kbdev->regmap.regs[reg_enum], (u32)value, + 1); + kbase_io_history_add(&kbdev->io_history, kbdev->regmap.regs[reg_enum] + 4, + (u32)(value >> 32), 1); + } +#endif /* CONFIG_DEBUG_FS */ + + dev_dbg(kbdev->dev, "w64: reg %08x val %016llx", + (u32)(kbdev->regmap.regs[reg_enum] - kbdev->reg), value); +} +KBASE_EXPORT_TEST_API(kbase_reg_write64); diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c new file mode 100644 index 000000000000..16a27c780d3b --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include + +#include +#include "mali_kbase_hw_access.h" + +#include + +bool kbase_reg_is_size64(struct kbase_device *kbdev, u32 reg_enum) +{ + if (WARN_ON(reg_enum >= kbdev->regmap.size)) + return false; + + return kbdev->regmap.flags[reg_enum] & KBASE_REGMAP_WIDTH_64_BIT; +} + +bool kbase_reg_is_size32(struct kbase_device *kbdev, u32 reg_enum) +{ + if (WARN_ON(reg_enum >= kbdev->regmap.size)) + return false; + + return kbdev->regmap.flags[reg_enum] & KBASE_REGMAP_WIDTH_32_BIT; +} + +bool kbase_reg_is_valid(struct kbase_device *kbdev, u32 reg_enum) +{ + return reg_enum < kbdev->regmap.size && kbdev->regmap.flags[reg_enum] != 0; +} + +bool kbase_reg_is_accessible(struct kbase_device *kbdev, u32 reg_enum, u32 flags) +{ +#ifdef CONFIG_MALI_BIFROST_DEBUG + if (WARN(!kbase_reg_is_valid(kbdev, reg_enum), "Invalid register enum 0x%x: %s", reg_enum, + kbase_reg_get_enum_string(reg_enum))) + return false; + if (WARN((kbdev->regmap.flags[reg_enum] & flags) != flags, + "Invalid register access permissions 0x%x: %s", reg_enum, + kbase_reg_get_enum_string(reg_enum))) + return false; +#else + CSTD_UNUSED(kbdev); + CSTD_UNUSED(reg_enum); + CSTD_UNUSED(flags); +#endif + + return true; +} + +int kbase_reg_get_offset(struct kbase_device *kbdev, u32 reg_enum, u32 *offset) +{ + if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, 0))) + return -EINVAL; + + *offset = kbdev->regmap.regs[reg_enum] - kbdev->reg; + return 0; +} + +int kbase_reg_get_enum(struct kbase_device *kbdev, u32 offset, u32 *reg_enum) +{ + size_t i = 0; + void __iomem *ptr = kbdev->reg + offset; + + for (i = 0; i < kbdev->regmap.size; i++) { + if (kbdev->regmap.regs[i] == ptr) { + *reg_enum = (u32)i; + return 0; + } + } + + return -EINVAL; +} + +int kbase_regmap_init(struct kbase_device *kbdev) +{ + u32 lut_arch_id; + + if (WARN_ON(kbdev->dev == NULL)) + return -ENODEV; + + if (!IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) && WARN_ON(kbdev->reg == NULL)) + return -ENXIO; + + lut_arch_id = kbase_regmap_backend_init(kbdev); + + if (kbdev->regmap.regs == NULL || kbdev->regmap.flags == NULL) { + kbase_regmap_term(kbdev); + return -ENOMEM; + } + + dev_info(kbdev->dev, "Register LUT %08x initialized for GPU arch 0x%08x\n", lut_arch_id, + kbdev->gpu_props.gpu_id.arch_id); + +#if IS_ENABLED(CONFIG_MALI_64BIT_HW_ACCESS) && IS_ENABLED(CONFIG_MALI_REAL_HW) + dev_info(kbdev->dev, "64-bit HW access enabled\n"); +#endif + return 0; +} + +void kbase_regmap_term(struct kbase_device *kbdev) +{ + kfree(kbdev->regmap.regs); + kfree(kbdev->regmap.flags); + + kbdev->regmap.regs = NULL; + kbdev->regmap.flags = NULL; + kbdev->regmap.size = 0; +} diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h new file mode 100644 index 000000000000..c56c0b67a17f --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_HW_ACCESS_H_ +#define _MALI_KBASE_HW_ACCESS_H_ + +#define KBASE_REGMAP_PERM_READ (1U << 0) +#define KBASE_REGMAP_PERM_WRITE (1U << 1) +#define KBASE_REGMAP_WIDTH_32_BIT (1U << 2) +#define KBASE_REGMAP_WIDTH_64_BIT (1U << 3) + +#define KBASE_REG_READ(kbdev, reg_enum) \ + (kbase_reg_is_size64(kbdev, reg_enum) ? kbase_reg_read64(kbdev, reg_enum) : \ + kbase_reg_read32(kbdev, reg_enum)) + +#define KBASE_REG_WRITE(kbdev, reg_enum, value) \ + (kbase_reg_is_size64(kbdev, reg_enum) ? kbase_reg_write64(kbdev, reg_enum, value) : \ + kbase_reg_write32(kbdev, reg_enum, value)) + +/** + * kbase_reg_read32 - read from 32-bit GPU register + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + * + * Return: Value in desired register + */ +u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum); + +/** + * kbase_reg_read64 - read from 64-bit GPU register + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + * + * Return: Value in desired register + */ +u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum); + +/** + * kbase_reg_read64_coherent - read from 64-bit GPU register while ensuring + * that hi1 == hi2 + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + * + * Return: Value in desired register + */ +u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum); + +/** + * kbase_reg_write32 - write to 32-bit GPU register + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * @value: Value to write + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + */ +void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value); + +/** + * kbase_reg_write64 - write to 64-bit GPU register + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * @value: Value to write + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + */ +void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value); + +/** + * kbase_reg_is_size64 - check GPU register size is 64-bit + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * + * Return: boolean if register is 64-bit + */ +bool kbase_reg_is_size64(struct kbase_device *kbdev, u32 reg_enum); + +/** + * kbase_reg_is_size32 - check GPU register size is 32-bit + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * + * Return: boolean if register is 32-bit + */ +bool kbase_reg_is_size32(struct kbase_device *kbdev, u32 reg_enum); + +/** + * kbase_reg_is_valid - check register enum is valid and present in regmap + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * + * Return: boolean if register is present and valid + */ +bool kbase_reg_is_valid(struct kbase_device *kbdev, u32 reg_enum); + +/** + * kbase_reg_is_accessible - check register enum is accessible + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * @flags: Register permissions and size checks + * + * Return: boolean if register is accessible + */ +bool kbase_reg_is_accessible(struct kbase_device *kbdev, u32 reg_enum, u32 flags); + +/** + * kbase_reg_get_offset - get register offset from enum + * @kbdev: Kbase device pointer + * @reg_enum: Register enum + * @offset: Pointer to store value of register offset + * + * Return: 0 on success, otherwise a standard Linux error code + */ +int kbase_reg_get_offset(struct kbase_device *kbdev, u32 reg_enum, u32 *offset); + +/** + * kbase_reg_get_enum - get enum from register offset + * @kbdev: Kbase device pointer + * @offset: Register offset + * @reg_enum: Pointer to store enum value + * + * Return: 0 on success, otherwise a standard Linux error code + */ +int kbase_reg_get_enum(struct kbase_device *kbdev, u32 offset, u32 *reg_enum); + +#ifdef CONFIG_MALI_BIFROST_DEBUG +/** + * kbase_reg_get_enum_string - get the string for a particular enum + * @reg_enum: Register enum + * + * Return: string containing the name of enum + */ +const char *kbase_reg_get_enum_string(u32 reg_enum); +#endif /* CONFIG_MALI_BIFROST_DEBUG */ + +/** + * kbase_reg_get_gpu_id - get GPU ID from register or dummy model + * @kbdev: Kbase device pointer + * + * Return: GPU ID on success, 0 otherwise. + */ +u64 kbase_reg_get_gpu_id(struct kbase_device *kbdev); + +/** + * kbase_regmap_init - regmap init function + * @kbdev: Kbase device pointer + * + * Return: 0 if successful, otherwise a standard Linux error code + */ +int kbase_regmap_init(struct kbase_device *kbdev); + +/** + * kbase_regmap_backend_init - Initialize register mapping backend + * @kbdev: Kbase device pointer + * + * Return: the arch_id of the selected look-up table. + */ +u32 kbase_regmap_backend_init(struct kbase_device *kbdev); + +/** + * kbase_regmap_term - regmap term function + * @kbdev: Kbase device pointer + */ +void kbase_regmap_term(struct kbase_device *kbdev); + +#endif /* _MALI_KBASE_HW_ACCESS_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h new file mode 100644 index 000000000000..ead7ac8dfb30 --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h @@ -0,0 +1,507 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_HW_ACCESS_REGMAP_H_ +#define _MALI_KBASE_HW_ACCESS_REGMAP_H_ + +#if MALI_USE_CSF +#include "regmap/mali_kbase_regmap_csf_enums.h" +#include "regmap/mali_kbase_regmap_csf_macros.h" +#else +#include "regmap/mali_kbase_regmap_jm_enums.h" +#include "regmap/mali_kbase_regmap_jm_macros.h" +#endif + +/* GPU_U definition */ +#ifdef __ASSEMBLER__ +#define GPU_U(x) x +#define GPU_UL(x) x +#define GPU_ULL(x) x +#else +#define GPU_U(x) x##u +#define GPU_UL(x) x##ul +#define GPU_ULL(x) x##ull +#endif /* __ASSEMBLER__ */ + +/* common GPU_STATUS values */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ +#define GPU_STATUS_GPU_DBG_ENABLED (1 << 8) /* DBGEN wire status */ + +#define GPU_SYSC_ALLOC_COUNT 8 +#define GPU_L2_SLICE_HASH_COUNT 3 +/* GPU_ASN_HASH_COUNT is an alias to GPU_L2_SLICE_HASH_COUNT */ +#define GPU_ASN_HASH_COUNT GPU_L2_SLICE_HASH_COUNT + +/* Cores groups are l2 coherent */ +#define MEM_FEATURES_COHERENT_CORE_GROUP_SHIFT GPU_U(0) +#define MEM_FEATURES_COHERENT_CORE_GROUP_MASK (GPU_U(0x1) << MEM_FEATURES_COHERENT_CORE_GROUP_SHIFT) + +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) + +/* + * MMU_IRQ_RAWSTAT register values. Values are valid also for + * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. + */ +#define MMU_PAGE_FAULT_FLAGS 16 + +/* Macros returning a bitmask to retrieve page fault or bus error flags from + * MMU registers + */ +#define MMU_PAGE_FAULT(n) (1UL << (n)) +#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) + +/* + * Begin MMU STATUS register values + */ +#define AS_STATUS_AS_ACTIVE_EXT_SHIFT GPU_U(0) +#define AS_STATUS_AS_ACTIVE_EXT_MASK (GPU_U(0x1) << AS_STATUS_AS_ACTIVE_EXT_SHIFT) +#define AS_STATUS_AS_ACTIVE_EXT_GET(reg_val) \ + (((reg_val)&AS_STATUS_AS_ACTIVE_EXT_MASK) >> AS_STATUS_AS_ACTIVE_EXT_SHIFT) +#define AS_STATUS_AS_ACTIVE_EXT_SET(reg_val, value) \ + (~(~(reg_val) | AS_STATUS_AS_ACTIVE_EXT_MASK) | \ + (((value) << AS_STATUS_AS_ACTIVE_EXT_SHIFT) & AS_STATUS_AS_ACTIVE_EXT_MASK)) + +/* + * Begin MMU FAULTSTATUS register values + */ +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT GPU_U(0) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (GPU_U(0xFF) << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SET(reg_val, value) \ + (~(~(reg_val) | AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) | \ + (((value) << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) & AS_FAULTSTATUS_EXCEPTION_TYPE_MASK)) + +#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT GPU_U(8) +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (GPU_U(0x3) << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) +#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) +#define AS_FAULTSTATUS_ACCESS_TYPE_SET(reg_val, value) \ + (~(~(reg_val) | AS_FAULTSTATUS_ACCESS_TYPE_MASK) | \ + (((value) << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) & AS_FAULTSTATUS_ACCESS_TYPE_MASK)) + +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0 +#define AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1 +#define AS_FAULTSTATUS_ACCESS_TYPE_READ 0x2 +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3 + +#define AS_FAULTSTATUS_SOURCE_ID_SHIFT GPU_U(16) +#define AS_FAULTSTATUS_SOURCE_ID_MASK (GPU_U(0xFFFF) << AS_FAULTSTATUS_SOURCE_ID_SHIFT) +#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) +#define AS_FAULTSTATUS_SOURCE_ID_SET(reg_val, value) \ + (~(~(reg_val) | AS_FAULTSTATUS_SOURCE_ID_MASK) | \ + (((value) << AS_FAULTSTATUS_SOURCE_ID_SHIFT) & AS_FAULTSTATUS_SOURCE_ID_MASK)) + +/* + * Begin MMU TRANSCFG register values + */ +#define AS_TRANSCFG_MODE_SHIFT GPU_U(0) +#define AS_TRANSCFG_MODE_MASK (GPU_U(0xF) << AS_TRANSCFG_MODE_SHIFT) +#define AS_TRANSCFG_MODE_GET(reg_val) (((reg_val)&AS_TRANSCFG_MODE_MASK) >> AS_TRANSCFG_MODE_SHIFT) +#define AS_TRANSCFG_MODE_SET(reg_val, value) \ + (~(~(reg_val) | AS_TRANSCFG_MODE_MASK) | \ + (((value) << AS_TRANSCFG_MODE_SHIFT) & AS_TRANSCFG_MODE_MASK)) + +#define AS_TRANSCFG_MODE_UNMAPPED 0x1 +#define AS_TRANSCFG_MODE_IDENTITY 0x2 +#define AS_TRANSCFG_MODE_AARCH64_4K 0x6 +#define AS_TRANSCFG_MODE_AARCH64_64K 0x8 + +#define AS_TRANSCFG_PTW_MEMATTR_SHIFT GPU_U(24) +#define AS_TRANSCFG_PTW_MEMATTR_MASK (GPU_U(0x3) << AS_TRANSCFG_PTW_MEMATTR_SHIFT) +#define AS_TRANSCFG_PTW_MEMATTR_GET(reg_val) \ + (((reg_val)&AS_TRANSCFG_PTW_MEMATTR_MASK) >> AS_TRANSCFG_PTW_MEMATTR_SHIFT) +#define AS_TRANSCFG_PTW_MEMATTR_SET(reg_val, value) \ + (~(~(reg_val) | AS_TRANSCFG_PTW_MEMATTR_MASK) | \ + (((value) << AS_TRANSCFG_PTW_MEMATTR_SHIFT) & AS_TRANSCFG_PTW_MEMATTR_MASK)) + +#define AS_TRANSCFG_PTW_MEMATTR_INVALID 0x0 +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE 0x1 +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK 0x2 + +#define AS_TRANSCFG_PTW_SH_SHIFT GPU_U(28) +#define AS_TRANSCFG_PTW_SH_MASK (GPU_U(0x3) << AS_TRANSCFG_PTW_SH_SHIFT) +#define AS_TRANSCFG_PTW_SH_GET(reg_val) \ + (((reg_val)&AS_TRANSCFG_PTW_SH_MASK) >> AS_TRANSCFG_PTW_SH_SHIFT) +#define AS_TRANSCFG_PTW_SH_SET(reg_val, value) \ + (~(~(reg_val) | AS_TRANSCFG_PTW_SH_MASK) | \ + (((value) << AS_TRANSCFG_PTW_SH_SHIFT) & AS_TRANSCFG_PTW_SH_MASK)) + +#define AS_TRANSCFG_PTW_SH_NON_SHAREABLE 0x0 +#define AS_TRANSCFG_PTW_SH_OUTER_SHAREABLE 0x2 +#define AS_TRANSCFG_PTW_SH_INNER_SHAREABLE 0x3 + +#define AS_TRANSCFG_R_ALLOCATE_SHIFT GPU_U(30) +#define AS_TRANSCFG_R_ALLOCATE_MASK (GPU_U(0x1) << AS_TRANSCFG_R_ALLOCATE_SHIFT) +#define AS_TRANSCFG_R_ALLOCATE_GET(reg_val) \ + (((reg_val)&AS_TRANSCFG_R_ALLOCATE_MASK) >> AS_TRANSCFG_R_ALLOCATE_SHIFT) +#define AS_TRANSCFG_R_ALLOCATE_SET(reg_val, value) \ + (~(~(reg_val) | AS_TRANSCFG_R_ALLOCATE_MASK) | \ + (((value) << AS_TRANSCFG_R_ALLOCATE_SHIFT) & AS_TRANSCFG_R_ALLOCATE_MASK)) + +#define AS_TRANSCFG_R_ALLOCATE_NO_READ_ALLOCATE 0x0 +#define AS_TRANSCFG_R_ALLOCATE_READ_ALLOCATE 0x1 + +/* AS_COMMAND register */ +#define AS_COMMAND_COMMAND_SHIFT GPU_U(0) +#define AS_COMMAND_COMMAND_MASK (GPU_U(0xFF) << AS_COMMAND_COMMAND_SHIFT) +#define AS_COMMAND_COMMAND_GET(reg_val) \ + (((reg_val)&AS_COMMAND_COMMAND_MASK) >> AS_COMMAND_COMMAND_SHIFT) +#define AS_COMMAND_COMMAND_SET(reg_val, value) \ + (~(~(reg_val) | AS_COMMAND_COMMAND_MASK) | \ + (((value) << AS_COMMAND_COMMAND_SHIFT) & AS_COMMAND_COMMAND_MASK)) + +#define AS_COMMAND_COMMAND_NOP 0x0 +#define AS_COMMAND_COMMAND_UPDATE 0x1 +#define AS_COMMAND_COMMAND_LOCK 0x2 +#define AS_COMMAND_COMMAND_UNLOCK 0x3 +#define AS_COMMAND_COMMAND_FLUSH_PT 0x4 +#define AS_COMMAND_COMMAND_FLUSH_MEM 0x5 + +/* AS_LOCKADDR register */ +#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0) +#define AS_LOCKADDR_LOCKADDR_SIZE_MASK (GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val) \ + (((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >> AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value) \ + (~(~(reg_val) | AS_LOCKADDR_LOCKADDR_SIZE_MASK) | \ + (((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & AS_LOCKADDR_LOCKADDR_SIZE_MASK)) +#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT GPU_U(6) +#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK (GPU_U(0xF) << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) +#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_GET(reg_val) \ + (((reg_val)&AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) >> AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) +#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(reg_val, value) \ + (~(~(reg_val) | AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) | \ + (((value) << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK)) +#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12) +#define AS_LOCKADDR_LOCKADDR_BASE_MASK (GPU_ULL(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \ + (((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> AS_LOCKADDR_LOCKADDR_BASE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value) \ + (~(~(reg_val) | AS_LOCKADDR_LOCKADDR_BASE_MASK) | \ + (((uint64_t)(value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \ + AS_LOCKADDR_LOCKADDR_BASE_MASK)) + +/* AS_MEMATTR_ATTRIBUTE0 register */ +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_W_SHIFT GPU_U(0) +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_W_MASK (GPU_U(0x1) << AS_MEMATTR_ATTRIBUTE0_ALLOC_W_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_W_GET(reg_val) \ + (((reg_val)&AS_MEMATTR_ATTRIBUTE0_ALLOC_W_MASK) >> AS_MEMATTR_ATTRIBUTE0_ALLOC_W_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_W_SET(reg_val, value) \ + (~(~(reg_val) | AS_MEMATTR_ATTRIBUTE0_ALLOC_W_MASK) | \ + (((value) << AS_MEMATTR_ATTRIBUTE0_ALLOC_W_SHIFT) & AS_MEMATTR_ATTRIBUTE0_ALLOC_W_MASK)) + +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_W_NOALLOCATE 0x0 +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_W_ALLOCATE 0x1 + +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_R_SHIFT GPU_U(1) +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_R_MASK (GPU_U(0x1) << AS_MEMATTR_ATTRIBUTE0_ALLOC_R_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_R_GET(reg_val) \ + (((reg_val)&AS_MEMATTR_ATTRIBUTE0_ALLOC_R_MASK) >> AS_MEMATTR_ATTRIBUTE0_ALLOC_R_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_R_SET(reg_val, value) \ + (~(~(reg_val) | AS_MEMATTR_ATTRIBUTE0_ALLOC_R_MASK) | \ + (((value) << AS_MEMATTR_ATTRIBUTE0_ALLOC_R_SHIFT) & AS_MEMATTR_ATTRIBUTE0_ALLOC_R_MASK)) + +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_R_NOALLOCATE 0x0 +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_R_ALLOCATE 0x1 + +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SHIFT GPU_U(2) +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_MASK (GPU_U(0x3) << AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_GET(reg_val) \ + (((reg_val)&AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_MASK) >> AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SET(reg_val, value) \ + (~(~(reg_val) | AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_MASK) | \ + (((value) << AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SHIFT) & \ + AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_MASK)) + +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_IMPL 0x2 +#define AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_ALLOC 0x3 + +#define AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_SHIFT GPU_U(4) +#define AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_MASK \ + (GPU_U(0x3) << AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_GET(reg_val) \ + (((reg_val)&AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_MASK) >> \ + AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_SET(reg_val, value) \ + (~(~(reg_val) | AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_MASK) | \ + (((value) << AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_SHIFT) & \ + AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_MASK)) + +#define AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_MIDGARD_INNER_DOMAIN 0x0 +#define AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_CPU_INNER_DOMAIN 0x1 +#define AS_MEMATTR_ATTRIBUTE0_NO_IDENTITY_COHERENCY_CPU_INNER_DOMAIN_SHADER_COH 0x2 + +#define AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SHIFT GPU_U(6) +#define AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_MASK \ + (GPU_U(0x3) << AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_GET(reg_val) \ + (((reg_val)&AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_MASK) >> \ + AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SET(reg_val, value) \ + (~(~(reg_val) | AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_MASK) | \ + (((value) << AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SHIFT) & \ + AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_MASK)) + +#define AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_NON_CACHEABLE 0x1 +#define AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_WRITE_BACK 0x2 +#define AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_FAULT 0x3 + +#define AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_SHIFT GPU_U(4) +#define AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_MASK \ + (GPU_U(0x3) << AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_GET(reg_val) \ + (((reg_val)&AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_MASK) >> \ + AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_SHIFT) +#define AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_SET(reg_val, value) \ + (~(~(reg_val) | AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_MASK) | \ + (((value) << AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_SHIFT) & \ + AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_MASK)) + +#define AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_NON_SHAREABLE 0x0 +#define AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_INTERNAL_SHAREABLE 0x1 +#define AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_OUTER_SHAREABLE 0x2 +#define AS_MEMATTR_ATTRIBUTE0_IDENTITY_COHERENCY_INNER_SHAREABLE 0x3 + +/* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) +/* End L2_MMU_CONFIG register */ + +/* THREAD_* registers */ + +/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ +#define THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_NOT_SPECIFIED 0 +#define THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_SILICON 1 +#define THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_FPGA 2 +#define THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_SOFTWARE 3 + +/* End THREAD_* registers */ + +/* SHADER_CONFIG register */ +#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) +#define SC_TLS_HASH_ENABLE (1ul << 17) +#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) +#define SC_VAR_ALGORITHM (1ul << 29) +/* End SHADER_CONFIG register */ + +/* TILER_CONFIG register */ +#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) +/* End TILER_CONFIG register */ + +/* L2_CONFIG register */ +#define L2_CONFIG_SIZE_SHIFT 16 +#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) +#define L2_CONFIG_HASH_SHIFT 24 +#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) +#define L2_CONFIG_L2_SLICE_HASH_ENABLE_SHIFT 24 +#define L2_CONFIG_L2_SLICE_HASH_ENABLE_MASK (1ul << L2_CONFIG_L2_SLICE_HASH_ENABLE_SHIFT) +/* Aliases for _ASN_HASH_ENABLE_ */ +#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT L2_CONFIG_L2_SLICE_HASH_ENABLE_SHIFT +#define L2_CONFIG_ASN_HASH_ENABLE_MASK L2_CONFIG_L2_SLICE_HASH_ENABLE_MASK + +/* End L2_CONFIG register */ + +/* AMBA_FEATURES register */ +#define AMBA_FEATURES_ACE_LITE_SHIFT GPU_U(0) +#define AMBA_FEATURES_ACE_LITE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_LITE_SHIFT) +#define AMBA_FEATURES_ACE_LITE_GET(reg_val) \ + (((reg_val)&AMBA_FEATURES_ACE_LITE_MASK) >> AMBA_FEATURES_ACE_LITE_SHIFT) +#define AMBA_FEATURES_ACE_LITE_SET(reg_val, value) \ + (((reg_val) & ~AMBA_FEATURES_ACE_LITE_MASK) | \ + (((value) << AMBA_FEATURES_ACE_LITE_SHIFT) & AMBA_FEATURES_ACE_LITE_MASK)) +#define AMBA_FEATURES_ACE_SHIFT GPU_U(1) +#define AMBA_FEATURES_ACE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_SHIFT) +#define AMBA_FEATURES_ACE_GET(reg_val) \ + (((reg_val)&AMBA_FEATURES_ACE_MASK) >> AMBA_FEATURES_ACE_SHIFT) +#define AMBA_FEATURES_ACE_SET(reg_val, value) \ + (((reg_val) & ~AMBA_FEATURES_ACE_MASK) | \ + (((value) << AMBA_FEATURES_ACE_SHIFT) & AMBA_FEATURES_ACE_MASK)) +#define AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_SHIFT GPU_U(5) +#define AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_MASK \ + (GPU_U(0x1) << AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_SHIFT) +#define AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_GET(reg_val) \ + (((reg_val)&AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_MASK) >> \ + AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_SHIFT) +#define AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_SET(reg_val, value) \ + (((reg_val) & ~AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_MASK) | \ + (((value) << AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_SHIFT) & \ + AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_MASK)) +#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6) +#define AMBA_FEATURES_INVALIDATE_HINT_MASK (GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) +#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val) \ + (((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >> AMBA_FEATURES_INVALIDATE_HINT_SHIFT) +#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value) \ + (((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) | \ + (((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) & AMBA_FEATURES_INVALIDATE_HINT_MASK)) + +/* AMBA_ENABLE register */ +#define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0) +#define AMBA_ENABLE_COHERENCY_PROTOCOL_MASK (GPU_U(0x1F) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) +#define AMBA_ENABLE_COHERENCY_PROTOCOL_GET(reg_val) \ + (((reg_val)&AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) >> AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) +#define AMBA_ENABLE_COHERENCY_PROTOCOL_SET(reg_val, value) \ + (((reg_val) & ~AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) | \ + (((value) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) & \ + AMBA_ENABLE_COHERENCY_PROTOCOL_MASK)) +/* AMBA_ENABLE_coherency_protocol values */ +#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE_LITE 0x0 +#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE 0x1 +#define AMBA_ENABLE_COHERENCY_PROTOCOL_NO_COHERENCY 0x1F +/* End of AMBA_ENABLE_coherency_protocol values */ +#define AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_SHIFT GPU_U(5) +#define AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_MASK \ + (GPU_U(0x1) << AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_SHIFT) +#define AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_GET(reg_val) \ + (((reg_val)&AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_MASK) >> \ + AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_SHIFT) +#define AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_SET(reg_val, value) \ + (((reg_val) & ~AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_MASK) | \ + (((value) << AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_SHIFT) & \ + AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_MASK)) +#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6) +#define AMBA_ENABLE_INVALIDATE_HINT_MASK (GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) +#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val) \ + (((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >> AMBA_ENABLE_INVALIDATE_HINT_SHIFT) +#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value) \ + (((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) | \ + (((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) & AMBA_ENABLE_INVALIDATE_HINT_MASK)) + +/* SYSC_ALLOC read IDs */ +#define SYSC_ALLOC_ID_R_OTHER 0x00 +#define SYSC_ALLOC_ID_R_CSF 0x02 +#define SYSC_ALLOC_ID_R_MMU 0x04 +#define SYSC_ALLOC_ID_R_TILER_VERT 0x08 +#define SYSC_ALLOC_ID_R_TILER_PTR 0x09 +#define SYSC_ALLOC_ID_R_TILER_INDEX 0x0A +#define SYSC_ALLOC_ID_R_TILER_OTHER 0x0B +#define SYSC_ALLOC_ID_R_IC 0x10 +#define SYSC_ALLOC_ID_R_ATTR 0x11 +#define SYSC_ALLOC_ID_R_SCM 0x12 +#define SYSC_ALLOC_ID_R_FSDC 0x13 +#define SYSC_ALLOC_ID_R_VL 0x14 +#define SYSC_ALLOC_ID_R_PLR 0x15 +#define SYSC_ALLOC_ID_R_TEX 0x18 +#define SYSC_ALLOC_ID_R_LSC 0x1c + +/* SYSC_ALLOC write IDs */ +#define SYSC_ALLOC_ID_W_OTHER 0x00 +#define SYSC_ALLOC_ID_W_CSF 0x02 +#define SYSC_ALLOC_ID_W_PCB 0x07 +#define SYSC_ALLOC_ID_W_TILER_PTR 0x09 +#define SYSC_ALLOC_ID_W_TILER_VERT_PLIST 0x0A +#define SYSC_ALLOC_ID_W_TILER_OTHER 0x0B +#define SYSC_ALLOC_ID_W_L2_EVICT 0x0C +#define SYSC_ALLOC_ID_W_L2_FLUSH 0x0D +#define SYSC_ALLOC_ID_W_TIB_COLOR 0x10 +#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH 0x11 +#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB 0x12 +#define SYSC_ALLOC_ID_W_TIB_CRC 0x13 +#define SYSC_ALLOC_ID_W_TIB_DS 0x14 +#define SYSC_ALLOC_ID_W_TIB_DS_AFBCH 0x15 +#define SYSC_ALLOC_ID_W_TIB_DS_AFBCB 0x16 +#define SYSC_ALLOC_ID_W_LSC 0x1C + +/* SYSC_ALLOC values */ +#define SYSC_ALLOC_L2_ALLOC 0x0 +#define SYSC_ALLOC_NEVER_ALLOC 0x2 +#define SYSC_ALLOC_ALWAYS_ALLOC 0x3 +#define SYSC_ALLOC_PTL_ALLOC 0x4 +#define SYSC_ALLOC_L2_PTL_ALLOC 0x5 + +/* SYSC_ALLOC register */ +#define SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT (0) +#define SYSC_ALLOC_R_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC0_MASK) >> SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC0_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC0_MASK) | \ + (((value) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) & SYSC_ALLOC_R_SYSC_ALLOC0_MASK)) +/* End of SYSC_ALLOC_R_SYSC_ALLOC0 values */ +#define SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT (4) +#define SYSC_ALLOC_W_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC0_MASK) >> SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC0_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC0_MASK) | \ + (((value) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) & SYSC_ALLOC_W_SYSC_ALLOC0_MASK)) +/* End of SYSC_ALLOC_W_SYSC_ALLOC0 values */ +#define SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT (8) +#define SYSC_ALLOC_R_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC1_MASK) >> SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC1_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC1_MASK) | \ + (((value) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) & SYSC_ALLOC_R_SYSC_ALLOC1_MASK)) +/* End of SYSC_ALLOC_R_SYSC_ALLOC1 values */ +#define SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT (12) +#define SYSC_ALLOC_W_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC1_MASK) >> SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC1_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC1_MASK) | \ + (((value) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) & SYSC_ALLOC_W_SYSC_ALLOC1_MASK)) +/* End of SYSC_ALLOC_W_SYSC_ALLOC1 values */ +#define SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT (16) +#define SYSC_ALLOC_R_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC2_MASK) >> SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC2_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC2_MASK) | \ + (((value) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) & SYSC_ALLOC_R_SYSC_ALLOC2_MASK)) +/* End of SYSC_ALLOC_R_SYSC_ALLOC2 values */ +#define SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT (20) +#define SYSC_ALLOC_W_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC2_MASK) >> SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC2_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC2_MASK) | \ + (((value) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) & SYSC_ALLOC_W_SYSC_ALLOC2_MASK)) +/* End of SYSC_ALLOC_W_SYSC_ALLOC2 values */ +#define SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT (24) +#define SYSC_ALLOC_R_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC3_MASK) >> SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC3_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC3_MASK) | \ + (((value) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) & SYSC_ALLOC_R_SYSC_ALLOC3_MASK)) +/* End of SYSC_ALLOC_R_SYSC_ALLOC3 values */ +#define SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT (28) +#define SYSC_ALLOC_W_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC3_MASK) >> SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC3_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC3_MASK) | \ + (((value) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) & SYSC_ALLOC_W_SYSC_ALLOC3_MASK)) +/* End of SYSC_ALLOC_W_SYSC_ALLOC3 values */ + +/* IDVS_GROUP register */ +#define IDVS_GROUP_SIZE_SHIFT (16) +#define IDVS_GROUP_MAX_SIZE (0x3F) + +/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */ +#ifdef CONFIG_MALI_BIFROST_DEBUG +#undef GPU_IRQ_REG_ALL +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) +#endif /* CONFIG_MALI_BIFROST_DEBUG */ + +#endif /* _MALI_KBASE_HW_ACCESS_REGMAP_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h new file mode 100644 index 000000000000..a62d1707ebb7 --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_REGMAP_LEGACY_H_ +#define _MALI_KBASE_REGMAP_LEGACY_H_ + +#if MALI_USE_CSF +#include "regmap/mali_kbase_regmap_legacy_csf.h" +#else +#include "regmap/mali_kbase_regmap_legacy_jm.h" +#endif + +/* Begin Register Offsets */ +/* GPU control registers */ +#define GPU_CONTROL_BASE 0x0000 +#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) + +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define MMU_FEATURES 0x014 /* (RO) MMU features */ +#define AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +#define GPU_IRQ_MASK 0x028 /* (RW) */ + +#define GPU_IRQ_STATUS 0x02C /* (RO) */ +#define GPU_COMMAND 0x030 /* (WO) */ + +#define GPU_STATUS 0x034 /* (RO) */ + +#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ +#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ +#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + +#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ + +#define PWR_KEY 0x050 /* (WO) Power manager key register */ +#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ +#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ +#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */ +#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */ +#define PRFCNT_FEATURES 0x068 /* (RO) Performance counter features */ +#define TIMESTAMP_OFFSET_LO 0x088 /* (RW) Global time stamp offset, low word */ +#define TIMESTAMP_OFFSET_HI 0x08C /* (RW) Global time stamp offset, high word */ +#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ +#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ +#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ +#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + +#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ +#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ + +#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ +#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ +#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ +#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ + +#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + +#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */ +#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */ +#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */ +#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */ + +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ + +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ +#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ + +#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ +#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ + +#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ +#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ + +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + +#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ +#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ + +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + +#define L2_SLICE_HASH_0 0x02C0 +#define L2_SLICE_HASH(n) (L2_SLICE_HASH_0 + (n)*4) +#define L2_SLICE_HASH_COUNT 3 +/* ASN_HASH aliases to L2_SLICE_HASH */ +#define ASN_HASH_0 L2_SLICE_HASH_0 +#define ASN_HASH(n) L2_SLICE_HASH(n) + + +#define SYSC_ALLOC0 0x0340 /* (RW) System cache allocation hint from source ID */ +#define SYSC_ALLOC(n) (SYSC_ALLOC0 + (n)*4) +#define SYSC_ALLOC_COUNT 8 + +#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ +#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ + +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + +#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ + +#define AMBA_FEATURES 0x300 /* (RO) AMBA bus supported features */ +#define AMBA_ENABLE 0x304 /* (RW) AMBA features enable */ + +#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ +#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ +#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ + +/* Job control registers */ + +#define JOB_CONTROL_BASE 0x1000 +#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + +#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ + +/* MMU control registers */ + +#define MMU_CONTROL_BASE 0x2000 +#define MMU_CONTROL_REG(r) (MMU_CONTROL_BASE + (r)) + +#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ +#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ +#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ +#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ +#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ +#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ +#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ +#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ +#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ +#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ +#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ +#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ +#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ +#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ +#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ +#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + +#define MMU_STAGE1 0x2000 +#define MMU_STAGE1_REG(r) (MMU_STAGE1 + (r)) +#define MMU_AS_REG(n, r) (MMU_AS0 + ((n) << 6) + (r)) + +#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ +#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ +#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ +#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ +#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ +#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ +#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ +#define AS_TRANSCFG_LO 0x30 /* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_HI \ + 0x34 /* (RW) Translation table configuration for address space n, high word */ +#define AS_FAULTEXTRA_LO 0x38 /* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_HI 0x3C /* (RO) Secondary fault address for address space n, high word */ + + +#endif /* _MALI_KBASE_REGMAP_LEGACY_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf.c b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf.c new file mode 100644 index 000000000000..cdeb6b65344d --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf.c @@ -0,0 +1,1525 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * This file is autogenerated. Avoid modifying this file unless absolutely + * necessary. + */ + +#include +#include + +#include +#include "mali_kbase_regmap_csf_enums.h" +#include + +static void kbase_regmap_v10_8_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V10_8_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbdev->regmap.flags[GPU_CONTROL__GPU_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TILER_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__MEM_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__MMU_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__AS_PRESENT] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__CSF_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__GPU_IRQ_RAWSTAT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_IRQ_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__GPU_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__GPU_COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__GPU_FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PWR_KEY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PWR_OVERRIDE0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PWR_OVERRIDE1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__TIMESTAMP_OFFSET] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__CYCLE_COUNT] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TIMESTAMP] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__THREAD_MAX_THREADS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__THREAD_MAX_WORKGROUP_SIZE] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__THREAD_MAX_BARRIER_SIZE] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__THREAD_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TEXTURE_FEATURES_0] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TEXTURE_FEATURES_1] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TEXTURE_FEATURES_2] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TEXTURE_FEATURES_3] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PRESENT] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TILER_PRESENT] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_PRESENT] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__SHADER_READY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TILER_READY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_READY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PWRON] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__TILER_PWRON] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__L2_PWRON] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PWROFF] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__TILER_PWROFF] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__L2_PWROFF] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PWRTRANS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TILER_PWRTRANS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_PWRTRANS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PWRACTIVE] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TILER_PWRACTIVE] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_PWRACTIVE] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__REVIDR] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__COHERENCY_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__COHERENCY_ENABLE] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__MCU_CONTROL] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__MCU_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__STACK_PRESENT] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__STACK_READY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__STACK_PWRON] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__STACK_PWROFF] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__STACK_PWRTRANS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__CSF_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SHADER_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__TILER_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__L2_MMU_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_DBG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_RAWSTAT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__IRQ_RAWSTAT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__IRQ_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS0__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS0__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS0__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS0__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS0__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS0__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS0__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS0__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS0__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS1__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS1__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS1__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS1__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS1__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS1__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS1__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS1__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS1__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS2__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS2__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS2__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS2__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS2__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS2__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS2__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS2__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS2__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS3__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS3__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS3__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS3__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS3__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS3__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS3__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS3__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS3__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS4__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS4__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS4__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS4__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS4__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS4__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS4__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS4__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS4__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS5__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS5__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS5__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS5__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS5__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS5__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS5__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS5__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS5__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS6__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS6__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS6__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS6__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS6__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS6__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS6__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS6__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS6__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS7__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS7__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS7__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS7__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS7__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS7__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS7__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS7__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS7__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS8__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS8__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS8__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS8__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS8__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS8__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS8__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS8__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS8__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS9__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS9__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS9__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS9__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS9__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS9__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS9__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS9__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS9__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS10__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS10__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS10__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS10__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS10__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS10__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS10__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS10__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS10__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS11__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS11__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS11__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS11__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS11__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS11__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS11__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS11__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS11__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS12__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS12__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS12__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS12__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS12__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS12__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS12__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS12__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS12__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS13__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS13__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS13__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS13__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS13__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS13__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS13__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS13__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS13__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS14__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS14__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS14__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS14__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS14__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS14__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS14__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS14__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS14__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS15__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS15__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS15__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS15__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS15__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS15__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS15__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_CONTROL__AS15__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_CONTROL__AS15__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[USER__LATEST_FLUSH] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[IPA_CONTROL__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__TIMER] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[IPA_CONTROL__SELECT_CSHW] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[IPA_CONTROL__SELECT_MEMSYS] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[IPA_CONTROL__SELECT_TILER] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[IPA_CONTROL__SELECT_SHADER] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[IPA_CONTROL__VALUE_CSHW_0] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_CSHW_1] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_CSHW_2] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_CSHW_3] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_CSHW_4] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_CSHW_5] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_CSHW_6] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_CSHW_7] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_MEMSYS_0] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_MEMSYS_1] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_MEMSYS_2] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_MEMSYS_3] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_MEMSYS_4] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_MEMSYS_5] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_MEMSYS_6] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_MEMSYS_7] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_TILER_0] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_TILER_1] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_TILER_2] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_TILER_3] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_TILER_4] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_TILER_5] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_TILER_6] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_TILER_7] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_SHADER_0] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_SHADER_1] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_SHADER_2] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_SHADER_3] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_SHADER_4] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_SHADER_5] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_SHADER_6] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[IPA_CONTROL__VALUE_SHADER_7] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[DOORBELL_BLOCK_0__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_1__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_2__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_3__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_4__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_5__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_6__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_7__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_8__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_9__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_10__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_11__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_12__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_13__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_14__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_15__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_16__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_17__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_18__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_19__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_20__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_21__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_22__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_23__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_24__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_25__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_26__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_27__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_28__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_29__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_30__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_31__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_32__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_33__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_34__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_35__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_36__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_37__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_38__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_39__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_40__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_41__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_42__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_43__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_44__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_45__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_46__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_47__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_48__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_49__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_50__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_51__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_52__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_53__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_54__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_55__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_56__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_57__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_58__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_59__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_60__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_61__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_62__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[DOORBELL_BLOCK_63__DOORBELL] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + + kbdev->regmap.regs[GPU_CONTROL__GPU_ID] = kbdev->reg + 0x0; + kbdev->regmap.regs[GPU_CONTROL__L2_FEATURES] = kbdev->reg + 0x4; + kbdev->regmap.regs[GPU_CONTROL__TILER_FEATURES] = kbdev->reg + 0xc; + kbdev->regmap.regs[GPU_CONTROL__MEM_FEATURES] = kbdev->reg + 0x10; + kbdev->regmap.regs[GPU_CONTROL__MMU_FEATURES] = kbdev->reg + 0x14; + kbdev->regmap.regs[GPU_CONTROL__AS_PRESENT] = kbdev->reg + 0x18; + kbdev->regmap.regs[GPU_CONTROL__CSF_ID] = kbdev->reg + 0x1c; + kbdev->regmap.regs[GPU_CONTROL__GPU_IRQ_RAWSTAT] = kbdev->reg + 0x20; + kbdev->regmap.regs[GPU_CONTROL__GPU_IRQ_CLEAR] = kbdev->reg + 0x24; + kbdev->regmap.regs[GPU_CONTROL__GPU_IRQ_MASK] = kbdev->reg + 0x28; + kbdev->regmap.regs[GPU_CONTROL__GPU_IRQ_STATUS] = kbdev->reg + 0x2c; + kbdev->regmap.regs[GPU_CONTROL__GPU_STATUS] = kbdev->reg + 0x34; + kbdev->regmap.regs[GPU_CONTROL__GPU_COMMAND] = kbdev->reg + 0x30; + kbdev->regmap.regs[GPU_CONTROL__GPU_FAULTSTATUS] = kbdev->reg + 0x3c; + kbdev->regmap.regs[GPU_CONTROL__GPU_FAULTADDRESS] = kbdev->reg + 0x40; + kbdev->regmap.regs[GPU_CONTROL__L2_CONFIG] = kbdev->reg + 0x48; + kbdev->regmap.regs[GPU_CONTROL__PWR_KEY] = kbdev->reg + 0x50; + kbdev->regmap.regs[GPU_CONTROL__PWR_OVERRIDE0] = kbdev->reg + 0x54; + kbdev->regmap.regs[GPU_CONTROL__PWR_OVERRIDE1] = kbdev->reg + 0x58; + kbdev->regmap.regs[GPU_CONTROL__TIMESTAMP_OFFSET] = kbdev->reg + 0x88; + kbdev->regmap.regs[GPU_CONTROL__CYCLE_COUNT] = kbdev->reg + 0x90; + kbdev->regmap.regs[GPU_CONTROL__TIMESTAMP] = kbdev->reg + 0x98; + kbdev->regmap.regs[GPU_CONTROL__THREAD_MAX_THREADS] = kbdev->reg + 0xa0; + kbdev->regmap.regs[GPU_CONTROL__THREAD_MAX_WORKGROUP_SIZE] = kbdev->reg + 0xa4; + kbdev->regmap.regs[GPU_CONTROL__THREAD_MAX_BARRIER_SIZE] = kbdev->reg + 0xa8; + kbdev->regmap.regs[GPU_CONTROL__THREAD_FEATURES] = kbdev->reg + 0xac; + kbdev->regmap.regs[GPU_CONTROL__TEXTURE_FEATURES_0] = kbdev->reg + 0xb0; + kbdev->regmap.regs[GPU_CONTROL__TEXTURE_FEATURES_1] = kbdev->reg + 0xb4; + kbdev->regmap.regs[GPU_CONTROL__TEXTURE_FEATURES_2] = kbdev->reg + 0xb8; + kbdev->regmap.regs[GPU_CONTROL__TEXTURE_FEATURES_3] = kbdev->reg + 0xbc; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PRESENT] = kbdev->reg + 0x100; + kbdev->regmap.regs[GPU_CONTROL__TILER_PRESENT] = kbdev->reg + 0x110; + kbdev->regmap.regs[GPU_CONTROL__L2_PRESENT] = kbdev->reg + 0x120; + kbdev->regmap.regs[GPU_CONTROL__SHADER_READY] = kbdev->reg + 0x140; + kbdev->regmap.regs[GPU_CONTROL__TILER_READY] = kbdev->reg + 0x150; + kbdev->regmap.regs[GPU_CONTROL__L2_READY] = kbdev->reg + 0x160; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PWRON] = kbdev->reg + 0x180; + kbdev->regmap.regs[GPU_CONTROL__TILER_PWRON] = kbdev->reg + 0x190; + kbdev->regmap.regs[GPU_CONTROL__L2_PWRON] = kbdev->reg + 0x1a0; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PWROFF] = kbdev->reg + 0x1c0; + kbdev->regmap.regs[GPU_CONTROL__TILER_PWROFF] = kbdev->reg + 0x1d0; + kbdev->regmap.regs[GPU_CONTROL__L2_PWROFF] = kbdev->reg + 0x1e0; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PWRTRANS] = kbdev->reg + 0x200; + kbdev->regmap.regs[GPU_CONTROL__TILER_PWRTRANS] = kbdev->reg + 0x210; + kbdev->regmap.regs[GPU_CONTROL__L2_PWRTRANS] = kbdev->reg + 0x220; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PWRACTIVE] = kbdev->reg + 0x240; + kbdev->regmap.regs[GPU_CONTROL__TILER_PWRACTIVE] = kbdev->reg + 0x250; + kbdev->regmap.regs[GPU_CONTROL__L2_PWRACTIVE] = kbdev->reg + 0x260; + kbdev->regmap.regs[GPU_CONTROL__REVIDR] = kbdev->reg + 0x280; + kbdev->regmap.regs[GPU_CONTROL__COHERENCY_FEATURES] = kbdev->reg + 0x300; + kbdev->regmap.regs[GPU_CONTROL__COHERENCY_ENABLE] = kbdev->reg + 0x304; + kbdev->regmap.regs[GPU_CONTROL__MCU_CONTROL] = kbdev->reg + 0x700; + kbdev->regmap.regs[GPU_CONTROL__MCU_STATUS] = kbdev->reg + 0x704; + kbdev->regmap.regs[GPU_CONTROL__STACK_PRESENT] = kbdev->reg + 0xe00; + kbdev->regmap.regs[GPU_CONTROL__STACK_READY] = kbdev->reg + 0xe10; + kbdev->regmap.regs[GPU_CONTROL__STACK_PWRON] = kbdev->reg + 0xe20; + kbdev->regmap.regs[GPU_CONTROL__STACK_PWROFF] = kbdev->reg + 0xe30; + kbdev->regmap.regs[GPU_CONTROL__STACK_PWRTRANS] = kbdev->reg + 0xe40; + kbdev->regmap.regs[GPU_CONTROL__CSF_CONFIG] = kbdev->reg + 0xf00; + kbdev->regmap.regs[GPU_CONTROL__SHADER_CONFIG] = kbdev->reg + 0xf04; + kbdev->regmap.regs[GPU_CONTROL__TILER_CONFIG] = kbdev->reg + 0xf08; + kbdev->regmap.regs[GPU_CONTROL__L2_MMU_CONFIG] = kbdev->reg + 0xf0c; + kbdev->regmap.regs[GPU_CONTROL__GPU_DBG] = kbdev->reg + 0xfe8; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_RAWSTAT] = kbdev->reg + 0x1000; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_CLEAR] = kbdev->reg + 0x1004; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_MASK] = kbdev->reg + 0x1008; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_STATUS] = kbdev->reg + 0x100c; + kbdev->regmap.regs[MMU_CONTROL__IRQ_RAWSTAT] = kbdev->reg + 0x2000; + kbdev->regmap.regs[MMU_CONTROL__IRQ_CLEAR] = kbdev->reg + 0x2004; + kbdev->regmap.regs[MMU_CONTROL__IRQ_MASK] = kbdev->reg + 0x2008; + kbdev->regmap.regs[MMU_CONTROL__IRQ_STATUS] = kbdev->reg + 0x200c; + kbdev->regmap.regs[MMU_CONTROL__AS0__TRANSTAB] = kbdev->reg + 0x2400; + kbdev->regmap.regs[MMU_CONTROL__AS0__MEMATTR] = kbdev->reg + 0x2408; + kbdev->regmap.regs[MMU_CONTROL__AS0__LOCKADDR] = kbdev->reg + 0x2410; + kbdev->regmap.regs[MMU_CONTROL__AS0__COMMAND] = kbdev->reg + 0x2418; + kbdev->regmap.regs[MMU_CONTROL__AS0__FAULTSTATUS] = kbdev->reg + 0x241c; + kbdev->regmap.regs[MMU_CONTROL__AS0__FAULTADDRESS] = kbdev->reg + 0x2420; + kbdev->regmap.regs[MMU_CONTROL__AS0__STATUS] = kbdev->reg + 0x2428; + kbdev->regmap.regs[MMU_CONTROL__AS0__TRANSCFG] = kbdev->reg + 0x2430; + kbdev->regmap.regs[MMU_CONTROL__AS0__FAULTEXTRA] = kbdev->reg + 0x2438; + kbdev->regmap.regs[MMU_CONTROL__AS1__TRANSTAB] = kbdev->reg + 0x2440; + kbdev->regmap.regs[MMU_CONTROL__AS1__MEMATTR] = kbdev->reg + 0x2448; + kbdev->regmap.regs[MMU_CONTROL__AS1__LOCKADDR] = kbdev->reg + 0x2450; + kbdev->regmap.regs[MMU_CONTROL__AS1__COMMAND] = kbdev->reg + 0x2458; + kbdev->regmap.regs[MMU_CONTROL__AS1__FAULTSTATUS] = kbdev->reg + 0x245c; + kbdev->regmap.regs[MMU_CONTROL__AS1__FAULTADDRESS] = kbdev->reg + 0x2460; + kbdev->regmap.regs[MMU_CONTROL__AS1__STATUS] = kbdev->reg + 0x2468; + kbdev->regmap.regs[MMU_CONTROL__AS1__TRANSCFG] = kbdev->reg + 0x2470; + kbdev->regmap.regs[MMU_CONTROL__AS1__FAULTEXTRA] = kbdev->reg + 0x2478; + kbdev->regmap.regs[MMU_CONTROL__AS2__TRANSTAB] = kbdev->reg + 0x2480; + kbdev->regmap.regs[MMU_CONTROL__AS2__MEMATTR] = kbdev->reg + 0x2488; + kbdev->regmap.regs[MMU_CONTROL__AS2__LOCKADDR] = kbdev->reg + 0x2490; + kbdev->regmap.regs[MMU_CONTROL__AS2__COMMAND] = kbdev->reg + 0x2498; + kbdev->regmap.regs[MMU_CONTROL__AS2__FAULTSTATUS] = kbdev->reg + 0x249c; + kbdev->regmap.regs[MMU_CONTROL__AS2__FAULTADDRESS] = kbdev->reg + 0x24a0; + kbdev->regmap.regs[MMU_CONTROL__AS2__STATUS] = kbdev->reg + 0x24a8; + kbdev->regmap.regs[MMU_CONTROL__AS2__TRANSCFG] = kbdev->reg + 0x24b0; + kbdev->regmap.regs[MMU_CONTROL__AS2__FAULTEXTRA] = kbdev->reg + 0x24b8; + kbdev->regmap.regs[MMU_CONTROL__AS3__TRANSTAB] = kbdev->reg + 0x24c0; + kbdev->regmap.regs[MMU_CONTROL__AS3__MEMATTR] = kbdev->reg + 0x24c8; + kbdev->regmap.regs[MMU_CONTROL__AS3__LOCKADDR] = kbdev->reg + 0x24d0; + kbdev->regmap.regs[MMU_CONTROL__AS3__COMMAND] = kbdev->reg + 0x24d8; + kbdev->regmap.regs[MMU_CONTROL__AS3__FAULTSTATUS] = kbdev->reg + 0x24dc; + kbdev->regmap.regs[MMU_CONTROL__AS3__FAULTADDRESS] = kbdev->reg + 0x24e0; + kbdev->regmap.regs[MMU_CONTROL__AS3__STATUS] = kbdev->reg + 0x24e8; + kbdev->regmap.regs[MMU_CONTROL__AS3__TRANSCFG] = kbdev->reg + 0x24f0; + kbdev->regmap.regs[MMU_CONTROL__AS3__FAULTEXTRA] = kbdev->reg + 0x24f8; + kbdev->regmap.regs[MMU_CONTROL__AS4__TRANSTAB] = kbdev->reg + 0x2500; + kbdev->regmap.regs[MMU_CONTROL__AS4__MEMATTR] = kbdev->reg + 0x2508; + kbdev->regmap.regs[MMU_CONTROL__AS4__LOCKADDR] = kbdev->reg + 0x2510; + kbdev->regmap.regs[MMU_CONTROL__AS4__COMMAND] = kbdev->reg + 0x2518; + kbdev->regmap.regs[MMU_CONTROL__AS4__FAULTSTATUS] = kbdev->reg + 0x251c; + kbdev->regmap.regs[MMU_CONTROL__AS4__FAULTADDRESS] = kbdev->reg + 0x2520; + kbdev->regmap.regs[MMU_CONTROL__AS4__STATUS] = kbdev->reg + 0x2528; + kbdev->regmap.regs[MMU_CONTROL__AS4__TRANSCFG] = kbdev->reg + 0x2530; + kbdev->regmap.regs[MMU_CONTROL__AS4__FAULTEXTRA] = kbdev->reg + 0x2538; + kbdev->regmap.regs[MMU_CONTROL__AS5__TRANSTAB] = kbdev->reg + 0x2540; + kbdev->regmap.regs[MMU_CONTROL__AS5__MEMATTR] = kbdev->reg + 0x2548; + kbdev->regmap.regs[MMU_CONTROL__AS5__LOCKADDR] = kbdev->reg + 0x2550; + kbdev->regmap.regs[MMU_CONTROL__AS5__COMMAND] = kbdev->reg + 0x2558; + kbdev->regmap.regs[MMU_CONTROL__AS5__FAULTSTATUS] = kbdev->reg + 0x255c; + kbdev->regmap.regs[MMU_CONTROL__AS5__FAULTADDRESS] = kbdev->reg + 0x2560; + kbdev->regmap.regs[MMU_CONTROL__AS5__STATUS] = kbdev->reg + 0x2568; + kbdev->regmap.regs[MMU_CONTROL__AS5__TRANSCFG] = kbdev->reg + 0x2570; + kbdev->regmap.regs[MMU_CONTROL__AS5__FAULTEXTRA] = kbdev->reg + 0x2578; + kbdev->regmap.regs[MMU_CONTROL__AS6__TRANSTAB] = kbdev->reg + 0x2580; + kbdev->regmap.regs[MMU_CONTROL__AS6__MEMATTR] = kbdev->reg + 0x2588; + kbdev->regmap.regs[MMU_CONTROL__AS6__LOCKADDR] = kbdev->reg + 0x2590; + kbdev->regmap.regs[MMU_CONTROL__AS6__COMMAND] = kbdev->reg + 0x2598; + kbdev->regmap.regs[MMU_CONTROL__AS6__FAULTSTATUS] = kbdev->reg + 0x259c; + kbdev->regmap.regs[MMU_CONTROL__AS6__FAULTADDRESS] = kbdev->reg + 0x25a0; + kbdev->regmap.regs[MMU_CONTROL__AS6__STATUS] = kbdev->reg + 0x25a8; + kbdev->regmap.regs[MMU_CONTROL__AS6__TRANSCFG] = kbdev->reg + 0x25b0; + kbdev->regmap.regs[MMU_CONTROL__AS6__FAULTEXTRA] = kbdev->reg + 0x25b8; + kbdev->regmap.regs[MMU_CONTROL__AS7__TRANSTAB] = kbdev->reg + 0x25c0; + kbdev->regmap.regs[MMU_CONTROL__AS7__MEMATTR] = kbdev->reg + 0x25c8; + kbdev->regmap.regs[MMU_CONTROL__AS7__LOCKADDR] = kbdev->reg + 0x25d0; + kbdev->regmap.regs[MMU_CONTROL__AS7__COMMAND] = kbdev->reg + 0x25d8; + kbdev->regmap.regs[MMU_CONTROL__AS7__FAULTSTATUS] = kbdev->reg + 0x25dc; + kbdev->regmap.regs[MMU_CONTROL__AS7__FAULTADDRESS] = kbdev->reg + 0x25e0; + kbdev->regmap.regs[MMU_CONTROL__AS7__STATUS] = kbdev->reg + 0x25e8; + kbdev->regmap.regs[MMU_CONTROL__AS7__TRANSCFG] = kbdev->reg + 0x25f0; + kbdev->regmap.regs[MMU_CONTROL__AS7__FAULTEXTRA] = kbdev->reg + 0x25f8; + kbdev->regmap.regs[MMU_CONTROL__AS8__TRANSTAB] = kbdev->reg + 0x2600; + kbdev->regmap.regs[MMU_CONTROL__AS8__MEMATTR] = kbdev->reg + 0x2608; + kbdev->regmap.regs[MMU_CONTROL__AS8__LOCKADDR] = kbdev->reg + 0x2610; + kbdev->regmap.regs[MMU_CONTROL__AS8__COMMAND] = kbdev->reg + 0x2618; + kbdev->regmap.regs[MMU_CONTROL__AS8__FAULTSTATUS] = kbdev->reg + 0x261c; + kbdev->regmap.regs[MMU_CONTROL__AS8__FAULTADDRESS] = kbdev->reg + 0x2620; + kbdev->regmap.regs[MMU_CONTROL__AS8__STATUS] = kbdev->reg + 0x2628; + kbdev->regmap.regs[MMU_CONTROL__AS8__TRANSCFG] = kbdev->reg + 0x2630; + kbdev->regmap.regs[MMU_CONTROL__AS8__FAULTEXTRA] = kbdev->reg + 0x2638; + kbdev->regmap.regs[MMU_CONTROL__AS9__TRANSTAB] = kbdev->reg + 0x2640; + kbdev->regmap.regs[MMU_CONTROL__AS9__MEMATTR] = kbdev->reg + 0x2648; + kbdev->regmap.regs[MMU_CONTROL__AS9__LOCKADDR] = kbdev->reg + 0x2650; + kbdev->regmap.regs[MMU_CONTROL__AS9__COMMAND] = kbdev->reg + 0x2658; + kbdev->regmap.regs[MMU_CONTROL__AS9__FAULTSTATUS] = kbdev->reg + 0x265c; + kbdev->regmap.regs[MMU_CONTROL__AS9__FAULTADDRESS] = kbdev->reg + 0x2660; + kbdev->regmap.regs[MMU_CONTROL__AS9__STATUS] = kbdev->reg + 0x2668; + kbdev->regmap.regs[MMU_CONTROL__AS9__TRANSCFG] = kbdev->reg + 0x2670; + kbdev->regmap.regs[MMU_CONTROL__AS9__FAULTEXTRA] = kbdev->reg + 0x2678; + kbdev->regmap.regs[MMU_CONTROL__AS10__TRANSTAB] = kbdev->reg + 0x2680; + kbdev->regmap.regs[MMU_CONTROL__AS10__MEMATTR] = kbdev->reg + 0x2688; + kbdev->regmap.regs[MMU_CONTROL__AS10__LOCKADDR] = kbdev->reg + 0x2690; + kbdev->regmap.regs[MMU_CONTROL__AS10__COMMAND] = kbdev->reg + 0x2698; + kbdev->regmap.regs[MMU_CONTROL__AS10__FAULTSTATUS] = kbdev->reg + 0x269c; + kbdev->regmap.regs[MMU_CONTROL__AS10__FAULTADDRESS] = kbdev->reg + 0x26a0; + kbdev->regmap.regs[MMU_CONTROL__AS10__STATUS] = kbdev->reg + 0x26a8; + kbdev->regmap.regs[MMU_CONTROL__AS10__TRANSCFG] = kbdev->reg + 0x26b0; + kbdev->regmap.regs[MMU_CONTROL__AS10__FAULTEXTRA] = kbdev->reg + 0x26b8; + kbdev->regmap.regs[MMU_CONTROL__AS11__TRANSTAB] = kbdev->reg + 0x26c0; + kbdev->regmap.regs[MMU_CONTROL__AS11__MEMATTR] = kbdev->reg + 0x26c8; + kbdev->regmap.regs[MMU_CONTROL__AS11__LOCKADDR] = kbdev->reg + 0x26d0; + kbdev->regmap.regs[MMU_CONTROL__AS11__COMMAND] = kbdev->reg + 0x26d8; + kbdev->regmap.regs[MMU_CONTROL__AS11__FAULTSTATUS] = kbdev->reg + 0x26dc; + kbdev->regmap.regs[MMU_CONTROL__AS11__FAULTADDRESS] = kbdev->reg + 0x26e0; + kbdev->regmap.regs[MMU_CONTROL__AS11__STATUS] = kbdev->reg + 0x26e8; + kbdev->regmap.regs[MMU_CONTROL__AS11__TRANSCFG] = kbdev->reg + 0x26f0; + kbdev->regmap.regs[MMU_CONTROL__AS11__FAULTEXTRA] = kbdev->reg + 0x26f8; + kbdev->regmap.regs[MMU_CONTROL__AS12__TRANSTAB] = kbdev->reg + 0x2700; + kbdev->regmap.regs[MMU_CONTROL__AS12__MEMATTR] = kbdev->reg + 0x2708; + kbdev->regmap.regs[MMU_CONTROL__AS12__LOCKADDR] = kbdev->reg + 0x2710; + kbdev->regmap.regs[MMU_CONTROL__AS12__COMMAND] = kbdev->reg + 0x2718; + kbdev->regmap.regs[MMU_CONTROL__AS12__FAULTSTATUS] = kbdev->reg + 0x271c; + kbdev->regmap.regs[MMU_CONTROL__AS12__FAULTADDRESS] = kbdev->reg + 0x2720; + kbdev->regmap.regs[MMU_CONTROL__AS12__STATUS] = kbdev->reg + 0x2728; + kbdev->regmap.regs[MMU_CONTROL__AS12__TRANSCFG] = kbdev->reg + 0x2730; + kbdev->regmap.regs[MMU_CONTROL__AS12__FAULTEXTRA] = kbdev->reg + 0x2738; + kbdev->regmap.regs[MMU_CONTROL__AS13__TRANSTAB] = kbdev->reg + 0x2740; + kbdev->regmap.regs[MMU_CONTROL__AS13__MEMATTR] = kbdev->reg + 0x2748; + kbdev->regmap.regs[MMU_CONTROL__AS13__LOCKADDR] = kbdev->reg + 0x2750; + kbdev->regmap.regs[MMU_CONTROL__AS13__COMMAND] = kbdev->reg + 0x2758; + kbdev->regmap.regs[MMU_CONTROL__AS13__FAULTSTATUS] = kbdev->reg + 0x275c; + kbdev->regmap.regs[MMU_CONTROL__AS13__FAULTADDRESS] = kbdev->reg + 0x2760; + kbdev->regmap.regs[MMU_CONTROL__AS13__STATUS] = kbdev->reg + 0x2768; + kbdev->regmap.regs[MMU_CONTROL__AS13__TRANSCFG] = kbdev->reg + 0x2770; + kbdev->regmap.regs[MMU_CONTROL__AS13__FAULTEXTRA] = kbdev->reg + 0x2778; + kbdev->regmap.regs[MMU_CONTROL__AS14__TRANSTAB] = kbdev->reg + 0x2780; + kbdev->regmap.regs[MMU_CONTROL__AS14__MEMATTR] = kbdev->reg + 0x2788; + kbdev->regmap.regs[MMU_CONTROL__AS14__LOCKADDR] = kbdev->reg + 0x2790; + kbdev->regmap.regs[MMU_CONTROL__AS14__COMMAND] = kbdev->reg + 0x2798; + kbdev->regmap.regs[MMU_CONTROL__AS14__FAULTSTATUS] = kbdev->reg + 0x279c; + kbdev->regmap.regs[MMU_CONTROL__AS14__FAULTADDRESS] = kbdev->reg + 0x27a0; + kbdev->regmap.regs[MMU_CONTROL__AS14__STATUS] = kbdev->reg + 0x27a8; + kbdev->regmap.regs[MMU_CONTROL__AS14__TRANSCFG] = kbdev->reg + 0x27b0; + kbdev->regmap.regs[MMU_CONTROL__AS14__FAULTEXTRA] = kbdev->reg + 0x27b8; + kbdev->regmap.regs[MMU_CONTROL__AS15__TRANSTAB] = kbdev->reg + 0x27c0; + kbdev->regmap.regs[MMU_CONTROL__AS15__MEMATTR] = kbdev->reg + 0x27c8; + kbdev->regmap.regs[MMU_CONTROL__AS15__LOCKADDR] = kbdev->reg + 0x27d0; + kbdev->regmap.regs[MMU_CONTROL__AS15__COMMAND] = kbdev->reg + 0x27d8; + kbdev->regmap.regs[MMU_CONTROL__AS15__FAULTSTATUS] = kbdev->reg + 0x27dc; + kbdev->regmap.regs[MMU_CONTROL__AS15__FAULTADDRESS] = kbdev->reg + 0x27e0; + kbdev->regmap.regs[MMU_CONTROL__AS15__STATUS] = kbdev->reg + 0x27e8; + kbdev->regmap.regs[MMU_CONTROL__AS15__TRANSCFG] = kbdev->reg + 0x27f0; + kbdev->regmap.regs[MMU_CONTROL__AS15__FAULTEXTRA] = kbdev->reg + 0x27f8; + kbdev->regmap.regs[USER__LATEST_FLUSH] = kbdev->reg + 0x10000; + kbdev->regmap.regs[IPA_CONTROL__COMMAND] = kbdev->reg + 0x40000; + kbdev->regmap.regs[IPA_CONTROL__STATUS] = kbdev->reg + 0x40004; + kbdev->regmap.regs[IPA_CONTROL__TIMER] = kbdev->reg + 0x40008; + kbdev->regmap.regs[IPA_CONTROL__SELECT_CSHW] = kbdev->reg + 0x40010; + kbdev->regmap.regs[IPA_CONTROL__SELECT_MEMSYS] = kbdev->reg + 0x40018; + kbdev->regmap.regs[IPA_CONTROL__SELECT_TILER] = kbdev->reg + 0x40020; + kbdev->regmap.regs[IPA_CONTROL__SELECT_SHADER] = kbdev->reg + 0x40028; + kbdev->regmap.regs[IPA_CONTROL__VALUE_CSHW_0] = kbdev->reg + 0x40100; + kbdev->regmap.regs[IPA_CONTROL__VALUE_CSHW_1] = kbdev->reg + 0x40108; + kbdev->regmap.regs[IPA_CONTROL__VALUE_CSHW_2] = kbdev->reg + 0x40110; + kbdev->regmap.regs[IPA_CONTROL__VALUE_CSHW_3] = kbdev->reg + 0x40118; + kbdev->regmap.regs[IPA_CONTROL__VALUE_CSHW_4] = kbdev->reg + 0x40120; + kbdev->regmap.regs[IPA_CONTROL__VALUE_CSHW_5] = kbdev->reg + 0x40128; + kbdev->regmap.regs[IPA_CONTROL__VALUE_CSHW_6] = kbdev->reg + 0x40130; + kbdev->regmap.regs[IPA_CONTROL__VALUE_CSHW_7] = kbdev->reg + 0x40138; + kbdev->regmap.regs[IPA_CONTROL__VALUE_MEMSYS_0] = kbdev->reg + 0x40140; + kbdev->regmap.regs[IPA_CONTROL__VALUE_MEMSYS_1] = kbdev->reg + 0x40148; + kbdev->regmap.regs[IPA_CONTROL__VALUE_MEMSYS_2] = kbdev->reg + 0x40150; + kbdev->regmap.regs[IPA_CONTROL__VALUE_MEMSYS_3] = kbdev->reg + 0x40158; + kbdev->regmap.regs[IPA_CONTROL__VALUE_MEMSYS_4] = kbdev->reg + 0x40160; + kbdev->regmap.regs[IPA_CONTROL__VALUE_MEMSYS_5] = kbdev->reg + 0x40168; + kbdev->regmap.regs[IPA_CONTROL__VALUE_MEMSYS_6] = kbdev->reg + 0x40170; + kbdev->regmap.regs[IPA_CONTROL__VALUE_MEMSYS_7] = kbdev->reg + 0x40178; + kbdev->regmap.regs[IPA_CONTROL__VALUE_TILER_0] = kbdev->reg + 0x40180; + kbdev->regmap.regs[IPA_CONTROL__VALUE_TILER_1] = kbdev->reg + 0x40188; + kbdev->regmap.regs[IPA_CONTROL__VALUE_TILER_2] = kbdev->reg + 0x40190; + kbdev->regmap.regs[IPA_CONTROL__VALUE_TILER_3] = kbdev->reg + 0x40198; + kbdev->regmap.regs[IPA_CONTROL__VALUE_TILER_4] = kbdev->reg + 0x401a0; + kbdev->regmap.regs[IPA_CONTROL__VALUE_TILER_5] = kbdev->reg + 0x401a8; + kbdev->regmap.regs[IPA_CONTROL__VALUE_TILER_6] = kbdev->reg + 0x401b0; + kbdev->regmap.regs[IPA_CONTROL__VALUE_TILER_7] = kbdev->reg + 0x401b8; + kbdev->regmap.regs[IPA_CONTROL__VALUE_SHADER_0] = kbdev->reg + 0x401c0; + kbdev->regmap.regs[IPA_CONTROL__VALUE_SHADER_1] = kbdev->reg + 0x401c8; + kbdev->regmap.regs[IPA_CONTROL__VALUE_SHADER_2] = kbdev->reg + 0x401d0; + kbdev->regmap.regs[IPA_CONTROL__VALUE_SHADER_3] = kbdev->reg + 0x401d8; + kbdev->regmap.regs[IPA_CONTROL__VALUE_SHADER_4] = kbdev->reg + 0x401e0; + kbdev->regmap.regs[IPA_CONTROL__VALUE_SHADER_5] = kbdev->reg + 0x401e8; + kbdev->regmap.regs[IPA_CONTROL__VALUE_SHADER_6] = kbdev->reg + 0x401f0; + kbdev->regmap.regs[IPA_CONTROL__VALUE_SHADER_7] = kbdev->reg + 0x401f8; + kbdev->regmap.regs[DOORBELL_BLOCK_0__DOORBELL] = kbdev->reg + 0x80000; + kbdev->regmap.regs[DOORBELL_BLOCK_1__DOORBELL] = kbdev->reg + 0x90000; + kbdev->regmap.regs[DOORBELL_BLOCK_2__DOORBELL] = kbdev->reg + 0xa0000; + kbdev->regmap.regs[DOORBELL_BLOCK_3__DOORBELL] = kbdev->reg + 0xb0000; + kbdev->regmap.regs[DOORBELL_BLOCK_4__DOORBELL] = kbdev->reg + 0xc0000; + kbdev->regmap.regs[DOORBELL_BLOCK_5__DOORBELL] = kbdev->reg + 0xd0000; + kbdev->regmap.regs[DOORBELL_BLOCK_6__DOORBELL] = kbdev->reg + 0xe0000; + kbdev->regmap.regs[DOORBELL_BLOCK_7__DOORBELL] = kbdev->reg + 0xf0000; + kbdev->regmap.regs[DOORBELL_BLOCK_8__DOORBELL] = kbdev->reg + 0x100000; + kbdev->regmap.regs[DOORBELL_BLOCK_9__DOORBELL] = kbdev->reg + 0x110000; + kbdev->regmap.regs[DOORBELL_BLOCK_10__DOORBELL] = kbdev->reg + 0x120000; + kbdev->regmap.regs[DOORBELL_BLOCK_11__DOORBELL] = kbdev->reg + 0x130000; + kbdev->regmap.regs[DOORBELL_BLOCK_12__DOORBELL] = kbdev->reg + 0x140000; + kbdev->regmap.regs[DOORBELL_BLOCK_13__DOORBELL] = kbdev->reg + 0x150000; + kbdev->regmap.regs[DOORBELL_BLOCK_14__DOORBELL] = kbdev->reg + 0x160000; + kbdev->regmap.regs[DOORBELL_BLOCK_15__DOORBELL] = kbdev->reg + 0x170000; + kbdev->regmap.regs[DOORBELL_BLOCK_16__DOORBELL] = kbdev->reg + 0x180000; + kbdev->regmap.regs[DOORBELL_BLOCK_17__DOORBELL] = kbdev->reg + 0x190000; + kbdev->regmap.regs[DOORBELL_BLOCK_18__DOORBELL] = kbdev->reg + 0x1a0000; + kbdev->regmap.regs[DOORBELL_BLOCK_19__DOORBELL] = kbdev->reg + 0x1b0000; + kbdev->regmap.regs[DOORBELL_BLOCK_20__DOORBELL] = kbdev->reg + 0x1c0000; + kbdev->regmap.regs[DOORBELL_BLOCK_21__DOORBELL] = kbdev->reg + 0x1d0000; + kbdev->regmap.regs[DOORBELL_BLOCK_22__DOORBELL] = kbdev->reg + 0x1e0000; + kbdev->regmap.regs[DOORBELL_BLOCK_23__DOORBELL] = kbdev->reg + 0x1f0000; + kbdev->regmap.regs[DOORBELL_BLOCK_24__DOORBELL] = kbdev->reg + 0x200000; + kbdev->regmap.regs[DOORBELL_BLOCK_25__DOORBELL] = kbdev->reg + 0x210000; + kbdev->regmap.regs[DOORBELL_BLOCK_26__DOORBELL] = kbdev->reg + 0x220000; + kbdev->regmap.regs[DOORBELL_BLOCK_27__DOORBELL] = kbdev->reg + 0x230000; + kbdev->regmap.regs[DOORBELL_BLOCK_28__DOORBELL] = kbdev->reg + 0x240000; + kbdev->regmap.regs[DOORBELL_BLOCK_29__DOORBELL] = kbdev->reg + 0x250000; + kbdev->regmap.regs[DOORBELL_BLOCK_30__DOORBELL] = kbdev->reg + 0x260000; + kbdev->regmap.regs[DOORBELL_BLOCK_31__DOORBELL] = kbdev->reg + 0x270000; + kbdev->regmap.regs[DOORBELL_BLOCK_32__DOORBELL] = kbdev->reg + 0x280000; + kbdev->regmap.regs[DOORBELL_BLOCK_33__DOORBELL] = kbdev->reg + 0x290000; + kbdev->regmap.regs[DOORBELL_BLOCK_34__DOORBELL] = kbdev->reg + 0x2a0000; + kbdev->regmap.regs[DOORBELL_BLOCK_35__DOORBELL] = kbdev->reg + 0x2b0000; + kbdev->regmap.regs[DOORBELL_BLOCK_36__DOORBELL] = kbdev->reg + 0x2c0000; + kbdev->regmap.regs[DOORBELL_BLOCK_37__DOORBELL] = kbdev->reg + 0x2d0000; + kbdev->regmap.regs[DOORBELL_BLOCK_38__DOORBELL] = kbdev->reg + 0x2e0000; + kbdev->regmap.regs[DOORBELL_BLOCK_39__DOORBELL] = kbdev->reg + 0x2f0000; + kbdev->regmap.regs[DOORBELL_BLOCK_40__DOORBELL] = kbdev->reg + 0x300000; + kbdev->regmap.regs[DOORBELL_BLOCK_41__DOORBELL] = kbdev->reg + 0x310000; + kbdev->regmap.regs[DOORBELL_BLOCK_42__DOORBELL] = kbdev->reg + 0x320000; + kbdev->regmap.regs[DOORBELL_BLOCK_43__DOORBELL] = kbdev->reg + 0x330000; + kbdev->regmap.regs[DOORBELL_BLOCK_44__DOORBELL] = kbdev->reg + 0x340000; + kbdev->regmap.regs[DOORBELL_BLOCK_45__DOORBELL] = kbdev->reg + 0x350000; + kbdev->regmap.regs[DOORBELL_BLOCK_46__DOORBELL] = kbdev->reg + 0x360000; + kbdev->regmap.regs[DOORBELL_BLOCK_47__DOORBELL] = kbdev->reg + 0x370000; + kbdev->regmap.regs[DOORBELL_BLOCK_48__DOORBELL] = kbdev->reg + 0x380000; + kbdev->regmap.regs[DOORBELL_BLOCK_49__DOORBELL] = kbdev->reg + 0x390000; + kbdev->regmap.regs[DOORBELL_BLOCK_50__DOORBELL] = kbdev->reg + 0x3a0000; + kbdev->regmap.regs[DOORBELL_BLOCK_51__DOORBELL] = kbdev->reg + 0x3b0000; + kbdev->regmap.regs[DOORBELL_BLOCK_52__DOORBELL] = kbdev->reg + 0x3c0000; + kbdev->regmap.regs[DOORBELL_BLOCK_53__DOORBELL] = kbdev->reg + 0x3d0000; + kbdev->regmap.regs[DOORBELL_BLOCK_54__DOORBELL] = kbdev->reg + 0x3e0000; + kbdev->regmap.regs[DOORBELL_BLOCK_55__DOORBELL] = kbdev->reg + 0x3f0000; + kbdev->regmap.regs[DOORBELL_BLOCK_56__DOORBELL] = kbdev->reg + 0x400000; + kbdev->regmap.regs[DOORBELL_BLOCK_57__DOORBELL] = kbdev->reg + 0x410000; + kbdev->regmap.regs[DOORBELL_BLOCK_58__DOORBELL] = kbdev->reg + 0x420000; + kbdev->regmap.regs[DOORBELL_BLOCK_59__DOORBELL] = kbdev->reg + 0x430000; + kbdev->regmap.regs[DOORBELL_BLOCK_60__DOORBELL] = kbdev->reg + 0x440000; + kbdev->regmap.regs[DOORBELL_BLOCK_61__DOORBELL] = kbdev->reg + 0x450000; + kbdev->regmap.regs[DOORBELL_BLOCK_62__DOORBELL] = kbdev->reg + 0x460000; + kbdev->regmap.regs[DOORBELL_BLOCK_63__DOORBELL] = kbdev->reg + 0x470000; +} + +static void kbase_regmap_v10_10_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V10_10_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbase_regmap_v10_8_init(kbdev); + + kbdev->regmap.flags[GPU_CONTROL__CORE_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + + kbdev->regmap.regs[GPU_CONTROL__CORE_FEATURES] = kbdev->reg + 0x8; +} + +static void kbase_regmap_v11_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V11_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbase_regmap_v10_10_init(kbdev); + + kbdev->regmap.flags[GPU_CONTROL__ASN_HASH_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__ASN_HASH_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__ASN_HASH_2] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__DOORBELL_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__GPU_FEATURES] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__PRFCNT_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__SYSC_ALLOC0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_ALLOC1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_ALLOC2] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_ALLOC3] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_ALLOC4] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_ALLOC5] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_ALLOC6] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_ALLOC7] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_PBHA_OVERRIDE0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_PBHA_OVERRIDE1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_PBHA_OVERRIDE2] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SYSC_PBHA_OVERRIDE3] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[USER__LATEST_FLUSH] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + + kbdev->regmap.regs[GPU_CONTROL__ASN_HASH_0] = kbdev->reg + 0x2c0; + kbdev->regmap.regs[GPU_CONTROL__ASN_HASH_1] = kbdev->reg + 0x2c4; + kbdev->regmap.regs[GPU_CONTROL__ASN_HASH_2] = kbdev->reg + 0x2c8; + kbdev->regmap.regs[GPU_CONTROL__DOORBELL_FEATURES] = kbdev->reg + 0xc0; + kbdev->regmap.regs[GPU_CONTROL__GPU_FEATURES] = kbdev->reg + 0x60; + kbdev->regmap.regs[GPU_CONTROL__PRFCNT_FEATURES] = kbdev->reg + 0x68; + kbdev->regmap.regs[GPU_CONTROL__SYSC_ALLOC0] = kbdev->reg + 0x340; + kbdev->regmap.regs[GPU_CONTROL__SYSC_ALLOC1] = kbdev->reg + 0x344; + kbdev->regmap.regs[GPU_CONTROL__SYSC_ALLOC2] = kbdev->reg + 0x348; + kbdev->regmap.regs[GPU_CONTROL__SYSC_ALLOC3] = kbdev->reg + 0x34c; + kbdev->regmap.regs[GPU_CONTROL__SYSC_ALLOC4] = kbdev->reg + 0x350; + kbdev->regmap.regs[GPU_CONTROL__SYSC_ALLOC5] = kbdev->reg + 0x354; + kbdev->regmap.regs[GPU_CONTROL__SYSC_ALLOC6] = kbdev->reg + 0x358; + kbdev->regmap.regs[GPU_CONTROL__SYSC_ALLOC7] = kbdev->reg + 0x35c; + kbdev->regmap.regs[GPU_CONTROL__SYSC_PBHA_OVERRIDE0] = kbdev->reg + 0x320; + kbdev->regmap.regs[GPU_CONTROL__SYSC_PBHA_OVERRIDE1] = kbdev->reg + 0x324; + kbdev->regmap.regs[GPU_CONTROL__SYSC_PBHA_OVERRIDE2] = kbdev->reg + 0x328; + kbdev->regmap.regs[GPU_CONTROL__SYSC_PBHA_OVERRIDE3] = kbdev->reg + 0x32c; + kbdev->regmap.regs[USER__LATEST_FLUSH] = kbdev->reg + 0x10000; +} + +static void kbase_regmap_v12_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V12_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbase_regmap_v11_init(kbdev); + + kbdev->regmap.flags[GPU_CONTROL__AMBA_ENABLE] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_COMMAND_ARG0] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_COMMAND_ARG1] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__MCU_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PWRFEATURES] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__AMBA_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__COHERENCY_ENABLE] = 0; + + kbdev->regmap.regs[GPU_CONTROL__AMBA_ENABLE] = kbdev->reg + 0x304; + kbdev->regmap.regs[GPU_CONTROL__GPU_COMMAND_ARG0] = kbdev->reg + 0xd0; + kbdev->regmap.regs[GPU_CONTROL__GPU_COMMAND_ARG1] = kbdev->reg + 0xd8; + kbdev->regmap.regs[GPU_CONTROL__MCU_FEATURES] = kbdev->reg + 0x708; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PWRFEATURES] = kbdev->reg + 0x188; + kbdev->regmap.regs[GPU_CONTROL__AMBA_FEATURES] = kbdev->reg + 0x300; + kbdev->regmap.regs[GPU_CONTROL__COHERENCY_ENABLE] = NULL; +} + + +u32 kbase_regmap_backend_init(struct kbase_device *kbdev) +{ + int i = 0; + + struct { + u32 arch_id; + void (*init)(struct kbase_device *kbdev); + } init_array[] = { + { GPU_ID_ARCH_MAKE(10, 8, 0), kbase_regmap_v10_8_init }, + { GPU_ID_ARCH_MAKE(10, 10, 0), kbase_regmap_v10_10_init }, + { GPU_ID_ARCH_MAKE(11, 0, 0), kbase_regmap_v11_init }, + { GPU_ID_ARCH_MAKE(12, 0, 0), kbase_regmap_v12_init }, + }; + + for (i = 0; i < ARRAY_SIZE(init_array) - 1; i++) { + if (kbdev->gpu_props.gpu_id.arch_id < init_array[i + 1].arch_id) { + init_array[i].init(kbdev); + return init_array[i].arch_id; + } + } + + /* arch_id greater than last entry in init_array */ + init_array[i].init(kbdev); + return init_array[i].arch_id; +} + +#ifdef CONFIG_MALI_BIFROST_DEBUG +static char *enum_strings[] = { + [GPU_CONTROL__GPU_ID] = "GPU_CONTROL__GPU_ID", + [GPU_CONTROL__L2_FEATURES] = "GPU_CONTROL__L2_FEATURES", + [GPU_CONTROL__TILER_FEATURES] = "GPU_CONTROL__TILER_FEATURES", + [GPU_CONTROL__MEM_FEATURES] = "GPU_CONTROL__MEM_FEATURES", + [GPU_CONTROL__MMU_FEATURES] = "GPU_CONTROL__MMU_FEATURES", + [GPU_CONTROL__AS_PRESENT] = "GPU_CONTROL__AS_PRESENT", + [GPU_CONTROL__CSF_ID] = "GPU_CONTROL__CSF_ID", + [GPU_CONTROL__GPU_IRQ_RAWSTAT] = "GPU_CONTROL__GPU_IRQ_RAWSTAT", + [GPU_CONTROL__GPU_IRQ_CLEAR] = "GPU_CONTROL__GPU_IRQ_CLEAR", + [GPU_CONTROL__GPU_IRQ_MASK] = "GPU_CONTROL__GPU_IRQ_MASK", + [GPU_CONTROL__GPU_IRQ_STATUS] = "GPU_CONTROL__GPU_IRQ_STATUS", + [GPU_CONTROL__GPU_STATUS] = "GPU_CONTROL__GPU_STATUS", + [GPU_CONTROL__GPU_COMMAND] = "GPU_CONTROL__GPU_COMMAND", + [GPU_CONTROL__GPU_FAULTSTATUS] = "GPU_CONTROL__GPU_FAULTSTATUS", + [GPU_CONTROL__GPU_FAULTADDRESS] = "GPU_CONTROL__GPU_FAULTADDRESS", + [GPU_CONTROL__L2_CONFIG] = "GPU_CONTROL__L2_CONFIG", + [GPU_CONTROL__PWR_KEY] = "GPU_CONTROL__PWR_KEY", + [GPU_CONTROL__PWR_OVERRIDE0] = "GPU_CONTROL__PWR_OVERRIDE0", + [GPU_CONTROL__PWR_OVERRIDE1] = "GPU_CONTROL__PWR_OVERRIDE1", + [GPU_CONTROL__TIMESTAMP_OFFSET] = "GPU_CONTROL__TIMESTAMP_OFFSET", + [GPU_CONTROL__CYCLE_COUNT] = "GPU_CONTROL__CYCLE_COUNT", + [GPU_CONTROL__TIMESTAMP] = "GPU_CONTROL__TIMESTAMP", + [GPU_CONTROL__THREAD_MAX_THREADS] = "GPU_CONTROL__THREAD_MAX_THREADS", + [GPU_CONTROL__THREAD_MAX_WORKGROUP_SIZE] = "GPU_CONTROL__THREAD_MAX_WORKGROUP_SIZE", + [GPU_CONTROL__THREAD_MAX_BARRIER_SIZE] = "GPU_CONTROL__THREAD_MAX_BARRIER_SIZE", + [GPU_CONTROL__THREAD_FEATURES] = "GPU_CONTROL__THREAD_FEATURES", + [GPU_CONTROL__TEXTURE_FEATURES_0] = "GPU_CONTROL__TEXTURE_FEATURES_0", + [GPU_CONTROL__TEXTURE_FEATURES_1] = "GPU_CONTROL__TEXTURE_FEATURES_1", + [GPU_CONTROL__TEXTURE_FEATURES_2] = "GPU_CONTROL__TEXTURE_FEATURES_2", + [GPU_CONTROL__TEXTURE_FEATURES_3] = "GPU_CONTROL__TEXTURE_FEATURES_3", + [GPU_CONTROL__SHADER_PRESENT] = "GPU_CONTROL__SHADER_PRESENT", + [GPU_CONTROL__TILER_PRESENT] = "GPU_CONTROL__TILER_PRESENT", + [GPU_CONTROL__L2_PRESENT] = "GPU_CONTROL__L2_PRESENT", + [GPU_CONTROL__SHADER_READY] = "GPU_CONTROL__SHADER_READY", + [GPU_CONTROL__TILER_READY] = "GPU_CONTROL__TILER_READY", + [GPU_CONTROL__L2_READY] = "GPU_CONTROL__L2_READY", + [GPU_CONTROL__SHADER_PWRON] = "GPU_CONTROL__SHADER_PWRON", + [GPU_CONTROL__TILER_PWRON] = "GPU_CONTROL__TILER_PWRON", + [GPU_CONTROL__L2_PWRON] = "GPU_CONTROL__L2_PWRON", + [GPU_CONTROL__SHADER_PWROFF] = "GPU_CONTROL__SHADER_PWROFF", + [GPU_CONTROL__TILER_PWROFF] = "GPU_CONTROL__TILER_PWROFF", + [GPU_CONTROL__L2_PWROFF] = "GPU_CONTROL__L2_PWROFF", + [GPU_CONTROL__SHADER_PWRTRANS] = "GPU_CONTROL__SHADER_PWRTRANS", + [GPU_CONTROL__TILER_PWRTRANS] = "GPU_CONTROL__TILER_PWRTRANS", + [GPU_CONTROL__L2_PWRTRANS] = "GPU_CONTROL__L2_PWRTRANS", + [GPU_CONTROL__SHADER_PWRACTIVE] = "GPU_CONTROL__SHADER_PWRACTIVE", + [GPU_CONTROL__TILER_PWRACTIVE] = "GPU_CONTROL__TILER_PWRACTIVE", + [GPU_CONTROL__L2_PWRACTIVE] = "GPU_CONTROL__L2_PWRACTIVE", + [GPU_CONTROL__REVIDR] = "GPU_CONTROL__REVIDR", + [GPU_CONTROL__COHERENCY_FEATURES] = "GPU_CONTROL__COHERENCY_FEATURES", + [GPU_CONTROL__COHERENCY_ENABLE] = "GPU_CONTROL__COHERENCY_ENABLE", + [GPU_CONTROL__MCU_CONTROL] = "GPU_CONTROL__MCU_CONTROL", + [GPU_CONTROL__MCU_STATUS] = "GPU_CONTROL__MCU_STATUS", + [GPU_CONTROL__STACK_PRESENT] = "GPU_CONTROL__STACK_PRESENT", + [GPU_CONTROL__STACK_READY] = "GPU_CONTROL__STACK_READY", + [GPU_CONTROL__STACK_PWRON] = "GPU_CONTROL__STACK_PWRON", + [GPU_CONTROL__STACK_PWROFF] = "GPU_CONTROL__STACK_PWROFF", + [GPU_CONTROL__STACK_PWRTRANS] = "GPU_CONTROL__STACK_PWRTRANS", + [GPU_CONTROL__CSF_CONFIG] = "GPU_CONTROL__CSF_CONFIG", + [GPU_CONTROL__SHADER_CONFIG] = "GPU_CONTROL__SHADER_CONFIG", + [GPU_CONTROL__TILER_CONFIG] = "GPU_CONTROL__TILER_CONFIG", + [GPU_CONTROL__L2_MMU_CONFIG] = "GPU_CONTROL__L2_MMU_CONFIG", + [GPU_CONTROL__GPU_DBG] = "GPU_CONTROL__GPU_DBG", + [JOB_CONTROL__JOB_IRQ_RAWSTAT] = "JOB_CONTROL__JOB_IRQ_RAWSTAT", + [JOB_CONTROL__JOB_IRQ_CLEAR] = "JOB_CONTROL__JOB_IRQ_CLEAR", + [JOB_CONTROL__JOB_IRQ_MASK] = "JOB_CONTROL__JOB_IRQ_MASK", + [JOB_CONTROL__JOB_IRQ_STATUS] = "JOB_CONTROL__JOB_IRQ_STATUS", + [MMU_CONTROL__IRQ_RAWSTAT] = "MMU_CONTROL__IRQ_RAWSTAT", + [MMU_CONTROL__IRQ_CLEAR] = "MMU_CONTROL__IRQ_CLEAR", + [MMU_CONTROL__IRQ_MASK] = "MMU_CONTROL__IRQ_MASK", + [MMU_CONTROL__IRQ_STATUS] = "MMU_CONTROL__IRQ_STATUS", + [MMU_CONTROL__AS0__TRANSTAB] = "MMU_CONTROL__AS0__TRANSTAB", + [MMU_CONTROL__AS0__MEMATTR] = "MMU_CONTROL__AS0__MEMATTR", + [MMU_CONTROL__AS0__LOCKADDR] = "MMU_CONTROL__AS0__LOCKADDR", + [MMU_CONTROL__AS0__COMMAND] = "MMU_CONTROL__AS0__COMMAND", + [MMU_CONTROL__AS0__FAULTSTATUS] = "MMU_CONTROL__AS0__FAULTSTATUS", + [MMU_CONTROL__AS0__FAULTADDRESS] = "MMU_CONTROL__AS0__FAULTADDRESS", + [MMU_CONTROL__AS0__STATUS] = "MMU_CONTROL__AS0__STATUS", + [MMU_CONTROL__AS0__TRANSCFG] = "MMU_CONTROL__AS0__TRANSCFG", + [MMU_CONTROL__AS0__FAULTEXTRA] = "MMU_CONTROL__AS0__FAULTEXTRA", + [MMU_CONTROL__AS1__TRANSTAB] = "MMU_CONTROL__AS1__TRANSTAB", + [MMU_CONTROL__AS1__MEMATTR] = "MMU_CONTROL__AS1__MEMATTR", + [MMU_CONTROL__AS1__LOCKADDR] = "MMU_CONTROL__AS1__LOCKADDR", + [MMU_CONTROL__AS1__COMMAND] = "MMU_CONTROL__AS1__COMMAND", + [MMU_CONTROL__AS1__FAULTSTATUS] = "MMU_CONTROL__AS1__FAULTSTATUS", + [MMU_CONTROL__AS1__FAULTADDRESS] = "MMU_CONTROL__AS1__FAULTADDRESS", + [MMU_CONTROL__AS1__STATUS] = "MMU_CONTROL__AS1__STATUS", + [MMU_CONTROL__AS1__TRANSCFG] = "MMU_CONTROL__AS1__TRANSCFG", + [MMU_CONTROL__AS1__FAULTEXTRA] = "MMU_CONTROL__AS1__FAULTEXTRA", + [MMU_CONTROL__AS2__TRANSTAB] = "MMU_CONTROL__AS2__TRANSTAB", + [MMU_CONTROL__AS2__MEMATTR] = "MMU_CONTROL__AS2__MEMATTR", + [MMU_CONTROL__AS2__LOCKADDR] = "MMU_CONTROL__AS2__LOCKADDR", + [MMU_CONTROL__AS2__COMMAND] = "MMU_CONTROL__AS2__COMMAND", + [MMU_CONTROL__AS2__FAULTSTATUS] = "MMU_CONTROL__AS2__FAULTSTATUS", + [MMU_CONTROL__AS2__FAULTADDRESS] = "MMU_CONTROL__AS2__FAULTADDRESS", + [MMU_CONTROL__AS2__STATUS] = "MMU_CONTROL__AS2__STATUS", + [MMU_CONTROL__AS2__TRANSCFG] = "MMU_CONTROL__AS2__TRANSCFG", + [MMU_CONTROL__AS2__FAULTEXTRA] = "MMU_CONTROL__AS2__FAULTEXTRA", + [MMU_CONTROL__AS3__TRANSTAB] = "MMU_CONTROL__AS3__TRANSTAB", + [MMU_CONTROL__AS3__MEMATTR] = "MMU_CONTROL__AS3__MEMATTR", + [MMU_CONTROL__AS3__LOCKADDR] = "MMU_CONTROL__AS3__LOCKADDR", + [MMU_CONTROL__AS3__COMMAND] = "MMU_CONTROL__AS3__COMMAND", + [MMU_CONTROL__AS3__FAULTSTATUS] = "MMU_CONTROL__AS3__FAULTSTATUS", + [MMU_CONTROL__AS3__FAULTADDRESS] = "MMU_CONTROL__AS3__FAULTADDRESS", + [MMU_CONTROL__AS3__STATUS] = "MMU_CONTROL__AS3__STATUS", + [MMU_CONTROL__AS3__TRANSCFG] = "MMU_CONTROL__AS3__TRANSCFG", + [MMU_CONTROL__AS3__FAULTEXTRA] = "MMU_CONTROL__AS3__FAULTEXTRA", + [MMU_CONTROL__AS4__TRANSTAB] = "MMU_CONTROL__AS4__TRANSTAB", + [MMU_CONTROL__AS4__MEMATTR] = "MMU_CONTROL__AS4__MEMATTR", + [MMU_CONTROL__AS4__LOCKADDR] = "MMU_CONTROL__AS4__LOCKADDR", + [MMU_CONTROL__AS4__COMMAND] = "MMU_CONTROL__AS4__COMMAND", + [MMU_CONTROL__AS4__FAULTSTATUS] = "MMU_CONTROL__AS4__FAULTSTATUS", + [MMU_CONTROL__AS4__FAULTADDRESS] = "MMU_CONTROL__AS4__FAULTADDRESS", + [MMU_CONTROL__AS4__STATUS] = "MMU_CONTROL__AS4__STATUS", + [MMU_CONTROL__AS4__TRANSCFG] = "MMU_CONTROL__AS4__TRANSCFG", + [MMU_CONTROL__AS4__FAULTEXTRA] = "MMU_CONTROL__AS4__FAULTEXTRA", + [MMU_CONTROL__AS5__TRANSTAB] = "MMU_CONTROL__AS5__TRANSTAB", + [MMU_CONTROL__AS5__MEMATTR] = "MMU_CONTROL__AS5__MEMATTR", + [MMU_CONTROL__AS5__LOCKADDR] = "MMU_CONTROL__AS5__LOCKADDR", + [MMU_CONTROL__AS5__COMMAND] = "MMU_CONTROL__AS5__COMMAND", + [MMU_CONTROL__AS5__FAULTSTATUS] = "MMU_CONTROL__AS5__FAULTSTATUS", + [MMU_CONTROL__AS5__FAULTADDRESS] = "MMU_CONTROL__AS5__FAULTADDRESS", + [MMU_CONTROL__AS5__STATUS] = "MMU_CONTROL__AS5__STATUS", + [MMU_CONTROL__AS5__TRANSCFG] = "MMU_CONTROL__AS5__TRANSCFG", + [MMU_CONTROL__AS5__FAULTEXTRA] = "MMU_CONTROL__AS5__FAULTEXTRA", + [MMU_CONTROL__AS6__TRANSTAB] = "MMU_CONTROL__AS6__TRANSTAB", + [MMU_CONTROL__AS6__MEMATTR] = "MMU_CONTROL__AS6__MEMATTR", + [MMU_CONTROL__AS6__LOCKADDR] = "MMU_CONTROL__AS6__LOCKADDR", + [MMU_CONTROL__AS6__COMMAND] = "MMU_CONTROL__AS6__COMMAND", + [MMU_CONTROL__AS6__FAULTSTATUS] = "MMU_CONTROL__AS6__FAULTSTATUS", + [MMU_CONTROL__AS6__FAULTADDRESS] = "MMU_CONTROL__AS6__FAULTADDRESS", + [MMU_CONTROL__AS6__STATUS] = "MMU_CONTROL__AS6__STATUS", + [MMU_CONTROL__AS6__TRANSCFG] = "MMU_CONTROL__AS6__TRANSCFG", + [MMU_CONTROL__AS6__FAULTEXTRA] = "MMU_CONTROL__AS6__FAULTEXTRA", + [MMU_CONTROL__AS7__TRANSTAB] = "MMU_CONTROL__AS7__TRANSTAB", + [MMU_CONTROL__AS7__MEMATTR] = "MMU_CONTROL__AS7__MEMATTR", + [MMU_CONTROL__AS7__LOCKADDR] = "MMU_CONTROL__AS7__LOCKADDR", + [MMU_CONTROL__AS7__COMMAND] = "MMU_CONTROL__AS7__COMMAND", + [MMU_CONTROL__AS7__FAULTSTATUS] = "MMU_CONTROL__AS7__FAULTSTATUS", + [MMU_CONTROL__AS7__FAULTADDRESS] = "MMU_CONTROL__AS7__FAULTADDRESS", + [MMU_CONTROL__AS7__STATUS] = "MMU_CONTROL__AS7__STATUS", + [MMU_CONTROL__AS7__TRANSCFG] = "MMU_CONTROL__AS7__TRANSCFG", + [MMU_CONTROL__AS7__FAULTEXTRA] = "MMU_CONTROL__AS7__FAULTEXTRA", + [MMU_CONTROL__AS8__TRANSTAB] = "MMU_CONTROL__AS8__TRANSTAB", + [MMU_CONTROL__AS8__MEMATTR] = "MMU_CONTROL__AS8__MEMATTR", + [MMU_CONTROL__AS8__LOCKADDR] = "MMU_CONTROL__AS8__LOCKADDR", + [MMU_CONTROL__AS8__COMMAND] = "MMU_CONTROL__AS8__COMMAND", + [MMU_CONTROL__AS8__FAULTSTATUS] = "MMU_CONTROL__AS8__FAULTSTATUS", + [MMU_CONTROL__AS8__FAULTADDRESS] = "MMU_CONTROL__AS8__FAULTADDRESS", + [MMU_CONTROL__AS8__STATUS] = "MMU_CONTROL__AS8__STATUS", + [MMU_CONTROL__AS8__TRANSCFG] = "MMU_CONTROL__AS8__TRANSCFG", + [MMU_CONTROL__AS8__FAULTEXTRA] = "MMU_CONTROL__AS8__FAULTEXTRA", + [MMU_CONTROL__AS9__TRANSTAB] = "MMU_CONTROL__AS9__TRANSTAB", + [MMU_CONTROL__AS9__MEMATTR] = "MMU_CONTROL__AS9__MEMATTR", + [MMU_CONTROL__AS9__LOCKADDR] = "MMU_CONTROL__AS9__LOCKADDR", + [MMU_CONTROL__AS9__COMMAND] = "MMU_CONTROL__AS9__COMMAND", + [MMU_CONTROL__AS9__FAULTSTATUS] = "MMU_CONTROL__AS9__FAULTSTATUS", + [MMU_CONTROL__AS9__FAULTADDRESS] = "MMU_CONTROL__AS9__FAULTADDRESS", + [MMU_CONTROL__AS9__STATUS] = "MMU_CONTROL__AS9__STATUS", + [MMU_CONTROL__AS9__TRANSCFG] = "MMU_CONTROL__AS9__TRANSCFG", + [MMU_CONTROL__AS9__FAULTEXTRA] = "MMU_CONTROL__AS9__FAULTEXTRA", + [MMU_CONTROL__AS10__TRANSTAB] = "MMU_CONTROL__AS10__TRANSTAB", + [MMU_CONTROL__AS10__MEMATTR] = "MMU_CONTROL__AS10__MEMATTR", + [MMU_CONTROL__AS10__LOCKADDR] = "MMU_CONTROL__AS10__LOCKADDR", + [MMU_CONTROL__AS10__COMMAND] = "MMU_CONTROL__AS10__COMMAND", + [MMU_CONTROL__AS10__FAULTSTATUS] = "MMU_CONTROL__AS10__FAULTSTATUS", + [MMU_CONTROL__AS10__FAULTADDRESS] = "MMU_CONTROL__AS10__FAULTADDRESS", + [MMU_CONTROL__AS10__STATUS] = "MMU_CONTROL__AS10__STATUS", + [MMU_CONTROL__AS10__TRANSCFG] = "MMU_CONTROL__AS10__TRANSCFG", + [MMU_CONTROL__AS10__FAULTEXTRA] = "MMU_CONTROL__AS10__FAULTEXTRA", + [MMU_CONTROL__AS11__TRANSTAB] = "MMU_CONTROL__AS11__TRANSTAB", + [MMU_CONTROL__AS11__MEMATTR] = "MMU_CONTROL__AS11__MEMATTR", + [MMU_CONTROL__AS11__LOCKADDR] = "MMU_CONTROL__AS11__LOCKADDR", + [MMU_CONTROL__AS11__COMMAND] = "MMU_CONTROL__AS11__COMMAND", + [MMU_CONTROL__AS11__FAULTSTATUS] = "MMU_CONTROL__AS11__FAULTSTATUS", + [MMU_CONTROL__AS11__FAULTADDRESS] = "MMU_CONTROL__AS11__FAULTADDRESS", + [MMU_CONTROL__AS11__STATUS] = "MMU_CONTROL__AS11__STATUS", + [MMU_CONTROL__AS11__TRANSCFG] = "MMU_CONTROL__AS11__TRANSCFG", + [MMU_CONTROL__AS11__FAULTEXTRA] = "MMU_CONTROL__AS11__FAULTEXTRA", + [MMU_CONTROL__AS12__TRANSTAB] = "MMU_CONTROL__AS12__TRANSTAB", + [MMU_CONTROL__AS12__MEMATTR] = "MMU_CONTROL__AS12__MEMATTR", + [MMU_CONTROL__AS12__LOCKADDR] = "MMU_CONTROL__AS12__LOCKADDR", + [MMU_CONTROL__AS12__COMMAND] = "MMU_CONTROL__AS12__COMMAND", + [MMU_CONTROL__AS12__FAULTSTATUS] = "MMU_CONTROL__AS12__FAULTSTATUS", + [MMU_CONTROL__AS12__FAULTADDRESS] = "MMU_CONTROL__AS12__FAULTADDRESS", + [MMU_CONTROL__AS12__STATUS] = "MMU_CONTROL__AS12__STATUS", + [MMU_CONTROL__AS12__TRANSCFG] = "MMU_CONTROL__AS12__TRANSCFG", + [MMU_CONTROL__AS12__FAULTEXTRA] = "MMU_CONTROL__AS12__FAULTEXTRA", + [MMU_CONTROL__AS13__TRANSTAB] = "MMU_CONTROL__AS13__TRANSTAB", + [MMU_CONTROL__AS13__MEMATTR] = "MMU_CONTROL__AS13__MEMATTR", + [MMU_CONTROL__AS13__LOCKADDR] = "MMU_CONTROL__AS13__LOCKADDR", + [MMU_CONTROL__AS13__COMMAND] = "MMU_CONTROL__AS13__COMMAND", + [MMU_CONTROL__AS13__FAULTSTATUS] = "MMU_CONTROL__AS13__FAULTSTATUS", + [MMU_CONTROL__AS13__FAULTADDRESS] = "MMU_CONTROL__AS13__FAULTADDRESS", + [MMU_CONTROL__AS13__STATUS] = "MMU_CONTROL__AS13__STATUS", + [MMU_CONTROL__AS13__TRANSCFG] = "MMU_CONTROL__AS13__TRANSCFG", + [MMU_CONTROL__AS13__FAULTEXTRA] = "MMU_CONTROL__AS13__FAULTEXTRA", + [MMU_CONTROL__AS14__TRANSTAB] = "MMU_CONTROL__AS14__TRANSTAB", + [MMU_CONTROL__AS14__MEMATTR] = "MMU_CONTROL__AS14__MEMATTR", + [MMU_CONTROL__AS14__LOCKADDR] = "MMU_CONTROL__AS14__LOCKADDR", + [MMU_CONTROL__AS14__COMMAND] = "MMU_CONTROL__AS14__COMMAND", + [MMU_CONTROL__AS14__FAULTSTATUS] = "MMU_CONTROL__AS14__FAULTSTATUS", + [MMU_CONTROL__AS14__FAULTADDRESS] = "MMU_CONTROL__AS14__FAULTADDRESS", + [MMU_CONTROL__AS14__STATUS] = "MMU_CONTROL__AS14__STATUS", + [MMU_CONTROL__AS14__TRANSCFG] = "MMU_CONTROL__AS14__TRANSCFG", + [MMU_CONTROL__AS14__FAULTEXTRA] = "MMU_CONTROL__AS14__FAULTEXTRA", + [MMU_CONTROL__AS15__TRANSTAB] = "MMU_CONTROL__AS15__TRANSTAB", + [MMU_CONTROL__AS15__MEMATTR] = "MMU_CONTROL__AS15__MEMATTR", + [MMU_CONTROL__AS15__LOCKADDR] = "MMU_CONTROL__AS15__LOCKADDR", + [MMU_CONTROL__AS15__COMMAND] = "MMU_CONTROL__AS15__COMMAND", + [MMU_CONTROL__AS15__FAULTSTATUS] = "MMU_CONTROL__AS15__FAULTSTATUS", + [MMU_CONTROL__AS15__FAULTADDRESS] = "MMU_CONTROL__AS15__FAULTADDRESS", + [MMU_CONTROL__AS15__STATUS] = "MMU_CONTROL__AS15__STATUS", + [MMU_CONTROL__AS15__TRANSCFG] = "MMU_CONTROL__AS15__TRANSCFG", + [MMU_CONTROL__AS15__FAULTEXTRA] = "MMU_CONTROL__AS15__FAULTEXTRA", + [USER__LATEST_FLUSH] = "USER__LATEST_FLUSH", + [IPA_CONTROL__COMMAND] = "IPA_CONTROL__COMMAND", + [IPA_CONTROL__STATUS] = "IPA_CONTROL__STATUS", + [IPA_CONTROL__TIMER] = "IPA_CONTROL__TIMER", + [IPA_CONTROL__SELECT_CSHW] = "IPA_CONTROL__SELECT_CSHW", + [IPA_CONTROL__SELECT_MEMSYS] = "IPA_CONTROL__SELECT_MEMSYS", + [IPA_CONTROL__SELECT_TILER] = "IPA_CONTROL__SELECT_TILER", + [IPA_CONTROL__SELECT_SHADER] = "IPA_CONTROL__SELECT_SHADER", + [IPA_CONTROL__VALUE_CSHW_0] = "IPA_CONTROL__VALUE_CSHW_0", + [IPA_CONTROL__VALUE_CSHW_1] = "IPA_CONTROL__VALUE_CSHW_1", + [IPA_CONTROL__VALUE_CSHW_2] = "IPA_CONTROL__VALUE_CSHW_2", + [IPA_CONTROL__VALUE_CSHW_3] = "IPA_CONTROL__VALUE_CSHW_3", + [IPA_CONTROL__VALUE_CSHW_4] = "IPA_CONTROL__VALUE_CSHW_4", + [IPA_CONTROL__VALUE_CSHW_5] = "IPA_CONTROL__VALUE_CSHW_5", + [IPA_CONTROL__VALUE_CSHW_6] = "IPA_CONTROL__VALUE_CSHW_6", + [IPA_CONTROL__VALUE_CSHW_7] = "IPA_CONTROL__VALUE_CSHW_7", + [IPA_CONTROL__VALUE_MEMSYS_0] = "IPA_CONTROL__VALUE_MEMSYS_0", + [IPA_CONTROL__VALUE_MEMSYS_1] = "IPA_CONTROL__VALUE_MEMSYS_1", + [IPA_CONTROL__VALUE_MEMSYS_2] = "IPA_CONTROL__VALUE_MEMSYS_2", + [IPA_CONTROL__VALUE_MEMSYS_3] = "IPA_CONTROL__VALUE_MEMSYS_3", + [IPA_CONTROL__VALUE_MEMSYS_4] = "IPA_CONTROL__VALUE_MEMSYS_4", + [IPA_CONTROL__VALUE_MEMSYS_5] = "IPA_CONTROL__VALUE_MEMSYS_5", + [IPA_CONTROL__VALUE_MEMSYS_6] = "IPA_CONTROL__VALUE_MEMSYS_6", + [IPA_CONTROL__VALUE_MEMSYS_7] = "IPA_CONTROL__VALUE_MEMSYS_7", + [IPA_CONTROL__VALUE_TILER_0] = "IPA_CONTROL__VALUE_TILER_0", + [IPA_CONTROL__VALUE_TILER_1] = "IPA_CONTROL__VALUE_TILER_1", + [IPA_CONTROL__VALUE_TILER_2] = "IPA_CONTROL__VALUE_TILER_2", + [IPA_CONTROL__VALUE_TILER_3] = "IPA_CONTROL__VALUE_TILER_3", + [IPA_CONTROL__VALUE_TILER_4] = "IPA_CONTROL__VALUE_TILER_4", + [IPA_CONTROL__VALUE_TILER_5] = "IPA_CONTROL__VALUE_TILER_5", + [IPA_CONTROL__VALUE_TILER_6] = "IPA_CONTROL__VALUE_TILER_6", + [IPA_CONTROL__VALUE_TILER_7] = "IPA_CONTROL__VALUE_TILER_7", + [IPA_CONTROL__VALUE_SHADER_0] = "IPA_CONTROL__VALUE_SHADER_0", + [IPA_CONTROL__VALUE_SHADER_1] = "IPA_CONTROL__VALUE_SHADER_1", + [IPA_CONTROL__VALUE_SHADER_2] = "IPA_CONTROL__VALUE_SHADER_2", + [IPA_CONTROL__VALUE_SHADER_3] = "IPA_CONTROL__VALUE_SHADER_3", + [IPA_CONTROL__VALUE_SHADER_4] = "IPA_CONTROL__VALUE_SHADER_4", + [IPA_CONTROL__VALUE_SHADER_5] = "IPA_CONTROL__VALUE_SHADER_5", + [IPA_CONTROL__VALUE_SHADER_6] = "IPA_CONTROL__VALUE_SHADER_6", + [IPA_CONTROL__VALUE_SHADER_7] = "IPA_CONTROL__VALUE_SHADER_7", + [DOORBELL_BLOCK_0__DOORBELL] = "DOORBELL_BLOCK_0__DOORBELL", + [DOORBELL_BLOCK_1__DOORBELL] = "DOORBELL_BLOCK_1__DOORBELL", + [DOORBELL_BLOCK_2__DOORBELL] = "DOORBELL_BLOCK_2__DOORBELL", + [DOORBELL_BLOCK_3__DOORBELL] = "DOORBELL_BLOCK_3__DOORBELL", + [DOORBELL_BLOCK_4__DOORBELL] = "DOORBELL_BLOCK_4__DOORBELL", + [DOORBELL_BLOCK_5__DOORBELL] = "DOORBELL_BLOCK_5__DOORBELL", + [DOORBELL_BLOCK_6__DOORBELL] = "DOORBELL_BLOCK_6__DOORBELL", + [DOORBELL_BLOCK_7__DOORBELL] = "DOORBELL_BLOCK_7__DOORBELL", + [DOORBELL_BLOCK_8__DOORBELL] = "DOORBELL_BLOCK_8__DOORBELL", + [DOORBELL_BLOCK_9__DOORBELL] = "DOORBELL_BLOCK_9__DOORBELL", + [DOORBELL_BLOCK_10__DOORBELL] = "DOORBELL_BLOCK_10__DOORBELL", + [DOORBELL_BLOCK_11__DOORBELL] = "DOORBELL_BLOCK_11__DOORBELL", + [DOORBELL_BLOCK_12__DOORBELL] = "DOORBELL_BLOCK_12__DOORBELL", + [DOORBELL_BLOCK_13__DOORBELL] = "DOORBELL_BLOCK_13__DOORBELL", + [DOORBELL_BLOCK_14__DOORBELL] = "DOORBELL_BLOCK_14__DOORBELL", + [DOORBELL_BLOCK_15__DOORBELL] = "DOORBELL_BLOCK_15__DOORBELL", + [DOORBELL_BLOCK_16__DOORBELL] = "DOORBELL_BLOCK_16__DOORBELL", + [DOORBELL_BLOCK_17__DOORBELL] = "DOORBELL_BLOCK_17__DOORBELL", + [DOORBELL_BLOCK_18__DOORBELL] = "DOORBELL_BLOCK_18__DOORBELL", + [DOORBELL_BLOCK_19__DOORBELL] = "DOORBELL_BLOCK_19__DOORBELL", + [DOORBELL_BLOCK_20__DOORBELL] = "DOORBELL_BLOCK_20__DOORBELL", + [DOORBELL_BLOCK_21__DOORBELL] = "DOORBELL_BLOCK_21__DOORBELL", + [DOORBELL_BLOCK_22__DOORBELL] = "DOORBELL_BLOCK_22__DOORBELL", + [DOORBELL_BLOCK_23__DOORBELL] = "DOORBELL_BLOCK_23__DOORBELL", + [DOORBELL_BLOCK_24__DOORBELL] = "DOORBELL_BLOCK_24__DOORBELL", + [DOORBELL_BLOCK_25__DOORBELL] = "DOORBELL_BLOCK_25__DOORBELL", + [DOORBELL_BLOCK_26__DOORBELL] = "DOORBELL_BLOCK_26__DOORBELL", + [DOORBELL_BLOCK_27__DOORBELL] = "DOORBELL_BLOCK_27__DOORBELL", + [DOORBELL_BLOCK_28__DOORBELL] = "DOORBELL_BLOCK_28__DOORBELL", + [DOORBELL_BLOCK_29__DOORBELL] = "DOORBELL_BLOCK_29__DOORBELL", + [DOORBELL_BLOCK_30__DOORBELL] = "DOORBELL_BLOCK_30__DOORBELL", + [DOORBELL_BLOCK_31__DOORBELL] = "DOORBELL_BLOCK_31__DOORBELL", + [DOORBELL_BLOCK_32__DOORBELL] = "DOORBELL_BLOCK_32__DOORBELL", + [DOORBELL_BLOCK_33__DOORBELL] = "DOORBELL_BLOCK_33__DOORBELL", + [DOORBELL_BLOCK_34__DOORBELL] = "DOORBELL_BLOCK_34__DOORBELL", + [DOORBELL_BLOCK_35__DOORBELL] = "DOORBELL_BLOCK_35__DOORBELL", + [DOORBELL_BLOCK_36__DOORBELL] = "DOORBELL_BLOCK_36__DOORBELL", + [DOORBELL_BLOCK_37__DOORBELL] = "DOORBELL_BLOCK_37__DOORBELL", + [DOORBELL_BLOCK_38__DOORBELL] = "DOORBELL_BLOCK_38__DOORBELL", + [DOORBELL_BLOCK_39__DOORBELL] = "DOORBELL_BLOCK_39__DOORBELL", + [DOORBELL_BLOCK_40__DOORBELL] = "DOORBELL_BLOCK_40__DOORBELL", + [DOORBELL_BLOCK_41__DOORBELL] = "DOORBELL_BLOCK_41__DOORBELL", + [DOORBELL_BLOCK_42__DOORBELL] = "DOORBELL_BLOCK_42__DOORBELL", + [DOORBELL_BLOCK_43__DOORBELL] = "DOORBELL_BLOCK_43__DOORBELL", + [DOORBELL_BLOCK_44__DOORBELL] = "DOORBELL_BLOCK_44__DOORBELL", + [DOORBELL_BLOCK_45__DOORBELL] = "DOORBELL_BLOCK_45__DOORBELL", + [DOORBELL_BLOCK_46__DOORBELL] = "DOORBELL_BLOCK_46__DOORBELL", + [DOORBELL_BLOCK_47__DOORBELL] = "DOORBELL_BLOCK_47__DOORBELL", + [DOORBELL_BLOCK_48__DOORBELL] = "DOORBELL_BLOCK_48__DOORBELL", + [DOORBELL_BLOCK_49__DOORBELL] = "DOORBELL_BLOCK_49__DOORBELL", + [DOORBELL_BLOCK_50__DOORBELL] = "DOORBELL_BLOCK_50__DOORBELL", + [DOORBELL_BLOCK_51__DOORBELL] = "DOORBELL_BLOCK_51__DOORBELL", + [DOORBELL_BLOCK_52__DOORBELL] = "DOORBELL_BLOCK_52__DOORBELL", + [DOORBELL_BLOCK_53__DOORBELL] = "DOORBELL_BLOCK_53__DOORBELL", + [DOORBELL_BLOCK_54__DOORBELL] = "DOORBELL_BLOCK_54__DOORBELL", + [DOORBELL_BLOCK_55__DOORBELL] = "DOORBELL_BLOCK_55__DOORBELL", + [DOORBELL_BLOCK_56__DOORBELL] = "DOORBELL_BLOCK_56__DOORBELL", + [DOORBELL_BLOCK_57__DOORBELL] = "DOORBELL_BLOCK_57__DOORBELL", + [DOORBELL_BLOCK_58__DOORBELL] = "DOORBELL_BLOCK_58__DOORBELL", + [DOORBELL_BLOCK_59__DOORBELL] = "DOORBELL_BLOCK_59__DOORBELL", + [DOORBELL_BLOCK_60__DOORBELL] = "DOORBELL_BLOCK_60__DOORBELL", + [DOORBELL_BLOCK_61__DOORBELL] = "DOORBELL_BLOCK_61__DOORBELL", + [DOORBELL_BLOCK_62__DOORBELL] = "DOORBELL_BLOCK_62__DOORBELL", + [DOORBELL_BLOCK_63__DOORBELL] = "DOORBELL_BLOCK_63__DOORBELL", + [GPU_CONTROL__CORE_FEATURES] = "GPU_CONTROL__CORE_FEATURES", + [GPU_CONTROL__ASN_HASH_0] = "GPU_CONTROL__ASN_HASH_0", + [GPU_CONTROL__ASN_HASH_1] = "GPU_CONTROL__ASN_HASH_1", + [GPU_CONTROL__ASN_HASH_2] = "GPU_CONTROL__ASN_HASH_2", + [GPU_CONTROL__DOORBELL_FEATURES] = "GPU_CONTROL__DOORBELL_FEATURES", + [GPU_CONTROL__GPU_FEATURES] = "GPU_CONTROL__GPU_FEATURES", + [GPU_CONTROL__PRFCNT_FEATURES] = "GPU_CONTROL__PRFCNT_FEATURES", + [GPU_CONTROL__SYSC_ALLOC0] = "GPU_CONTROL__SYSC_ALLOC0", + [GPU_CONTROL__SYSC_ALLOC1] = "GPU_CONTROL__SYSC_ALLOC1", + [GPU_CONTROL__SYSC_ALLOC2] = "GPU_CONTROL__SYSC_ALLOC2", + [GPU_CONTROL__SYSC_ALLOC3] = "GPU_CONTROL__SYSC_ALLOC3", + [GPU_CONTROL__SYSC_ALLOC4] = "GPU_CONTROL__SYSC_ALLOC4", + [GPU_CONTROL__SYSC_ALLOC5] = "GPU_CONTROL__SYSC_ALLOC5", + [GPU_CONTROL__SYSC_ALLOC6] = "GPU_CONTROL__SYSC_ALLOC6", + [GPU_CONTROL__SYSC_ALLOC7] = "GPU_CONTROL__SYSC_ALLOC7", + [GPU_CONTROL__SYSC_PBHA_OVERRIDE0] = "GPU_CONTROL__SYSC_PBHA_OVERRIDE0", + [GPU_CONTROL__SYSC_PBHA_OVERRIDE1] = "GPU_CONTROL__SYSC_PBHA_OVERRIDE1", + [GPU_CONTROL__SYSC_PBHA_OVERRIDE2] = "GPU_CONTROL__SYSC_PBHA_OVERRIDE2", + [GPU_CONTROL__SYSC_PBHA_OVERRIDE3] = "GPU_CONTROL__SYSC_PBHA_OVERRIDE3", + [GPU_CONTROL__AMBA_ENABLE] = "GPU_CONTROL__AMBA_ENABLE", + [GPU_CONTROL__GPU_COMMAND_ARG0] = "GPU_CONTROL__GPU_COMMAND_ARG0", + [GPU_CONTROL__GPU_COMMAND_ARG1] = "GPU_CONTROL__GPU_COMMAND_ARG1", + [GPU_CONTROL__MCU_FEATURES] = "GPU_CONTROL__MCU_FEATURES", + [GPU_CONTROL__SHADER_PWRFEATURES] = "GPU_CONTROL__SHADER_PWRFEATURES", +}; + +const char *kbase_reg_get_enum_string(u32 reg_enum) +{ + if (reg_enum >= ARRAY_SIZE(enum_strings)) + return "INVALID_REG"; + return enum_strings[reg_enum]; +} +#endif /* CONFIG_MALI_BIFROST_DEBUG */ diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_enums.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_enums.h new file mode 100644 index 000000000000..e05af18cd60a --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_enums.h @@ -0,0 +1,408 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * This header is autogenerated. Avoid modifying this file unless absolutely + * necessary. + */ + +#ifndef _MALI_KBASE_REGMAP_CSF_ENUMS_H_ +#define _MALI_KBASE_REGMAP_CSF_ENUMS_H_ + +#if !MALI_USE_CSF +#error "Cannot be compiled with JM" +#endif + +enum kbase_regmap_enum_v10_8 { + GPU_CONTROL__GPU_ID = 0, /* (RO) 32-bit 0x0 */ + GPU_CONTROL__L2_FEATURES, /* (RO) 32-bit 0x4 */ + GPU_CONTROL__TILER_FEATURES, /* (RO) 32-bit 0xC */ + GPU_CONTROL__MEM_FEATURES, /* (RO) 32-bit 0x10 */ + GPU_CONTROL__MMU_FEATURES, /* (RO) 32-bit 0x14 */ + GPU_CONTROL__AS_PRESENT, /* (RO) 32-bit 0x18 */ + GPU_CONTROL__CSF_ID, /* (RO) 32-bit 0x1C */ + GPU_CONTROL__GPU_IRQ_RAWSTAT, /* (RW) 32-bit 0x20 */ + GPU_CONTROL__GPU_IRQ_CLEAR, /* (WO) 32-bit 0x24 */ + GPU_CONTROL__GPU_IRQ_MASK, /* (RW) 32-bit 0x28 */ + GPU_CONTROL__GPU_IRQ_STATUS, /* (RO) 32-bit 0x2C */ + GPU_CONTROL__GPU_STATUS, /* (RO) 32-bit 0x34 */ + GPU_CONTROL__GPU_COMMAND, /* (WO) 32-bit 0x30 */ + GPU_CONTROL__GPU_FAULTSTATUS, /* (RO) 32-bit 0x3C */ + GPU_CONTROL__GPU_FAULTADDRESS, /* (RO) 64-bit 0x40 */ + GPU_CONTROL__L2_CONFIG, /* (RW) 32-bit 0x48 */ + GPU_CONTROL__PWR_KEY, /* (WO) 32-bit 0x50 */ + GPU_CONTROL__PWR_OVERRIDE0, /* (RW) 32-bit 0x54 */ + GPU_CONTROL__PWR_OVERRIDE1, /* (RW) 32-bit 0x58 */ + GPU_CONTROL__TIMESTAMP_OFFSET, /* (RW) 64-bit 0x88 */ + GPU_CONTROL__CYCLE_COUNT, /* (RO) 64-bit 0x90 */ + GPU_CONTROL__TIMESTAMP, /* (RO) 64-bit 0x98 */ + GPU_CONTROL__THREAD_MAX_THREADS, /* (RO) 32-bit 0xA0 */ + GPU_CONTROL__THREAD_MAX_WORKGROUP_SIZE, /* (RO) 32-bit 0xA4 */ + GPU_CONTROL__THREAD_MAX_BARRIER_SIZE, /* (RO) 32-bit 0xA8 */ + GPU_CONTROL__THREAD_FEATURES, /* (RO) 32-bit 0xAC */ + GPU_CONTROL__TEXTURE_FEATURES_0, /* (RO) 32-bit 0xB0 */ + GPU_CONTROL__TEXTURE_FEATURES_1, /* (RO) 32-bit 0xB4 */ + GPU_CONTROL__TEXTURE_FEATURES_2, /* (RO) 32-bit 0xB8 */ + GPU_CONTROL__TEXTURE_FEATURES_3, /* (RO) 32-bit 0xBC */ + GPU_CONTROL__SHADER_PRESENT, /* (RO) 64-bit 0x100 */ + GPU_CONTROL__TILER_PRESENT, /* (RO) 64-bit 0x110 */ + GPU_CONTROL__L2_PRESENT, /* (RO) 64-bit 0x120 */ + GPU_CONTROL__SHADER_READY, /* (RO) 64-bit 0x140 */ + GPU_CONTROL__TILER_READY, /* (RO) 64-bit 0x150 */ + GPU_CONTROL__L2_READY, /* (RO) 64-bit 0x160 */ + GPU_CONTROL__SHADER_PWRON, /* (WO) 64-bit 0x180 */ + GPU_CONTROL__TILER_PWRON, /* (WO) 64-bit 0x190 */ + GPU_CONTROL__L2_PWRON, /* (WO) 64-bit 0x1A0 */ + GPU_CONTROL__SHADER_PWROFF, /* (WO) 64-bit 0x1C0 */ + GPU_CONTROL__TILER_PWROFF, /* (WO) 64-bit 0x1D0 */ + GPU_CONTROL__L2_PWROFF, /* (WO) 64-bit 0x1E0 */ + GPU_CONTROL__SHADER_PWRTRANS, /* (RO) 64-bit 0x200 */ + GPU_CONTROL__TILER_PWRTRANS, /* (RO) 64-bit 0x210 */ + GPU_CONTROL__L2_PWRTRANS, /* (RO) 64-bit 0x220 */ + GPU_CONTROL__SHADER_PWRACTIVE, /* (RO) 64-bit 0x240 */ + GPU_CONTROL__TILER_PWRACTIVE, /* (RO) 64-bit 0x250 */ + GPU_CONTROL__L2_PWRACTIVE, /* (RO) 64-bit 0x260 */ + GPU_CONTROL__REVIDR, /* (RO) 32-bit 0x280 */ + GPU_CONTROL__COHERENCY_FEATURES, /* (RO) 32-bit 0x300 */ + GPU_CONTROL__COHERENCY_ENABLE, /* (RW) 32-bit 0x304 */ + GPU_CONTROL__MCU_CONTROL, /* (RW) 32-bit 0x700 */ + GPU_CONTROL__MCU_STATUS, /* (RO) 32-bit 0x704 */ + GPU_CONTROL__STACK_PRESENT, /* (RO) 64-bit 0xE00 */ + GPU_CONTROL__STACK_READY, /* (RO) 64-bit 0xE10 */ + GPU_CONTROL__STACK_PWRON, /* (WO) 64-bit 0xE20 */ + GPU_CONTROL__STACK_PWROFF, /* (WO) 64-bit 0xE30 */ + GPU_CONTROL__STACK_PWRTRANS, /* (RO) 64-bit 0xE40 */ + GPU_CONTROL__CSF_CONFIG, /* (RW) 32-bit 0xF00 */ + GPU_CONTROL__SHADER_CONFIG, /* (RW) 32-bit 0xF04 */ + GPU_CONTROL__TILER_CONFIG, /* (RW) 32-bit 0xF08 */ + GPU_CONTROL__L2_MMU_CONFIG, /* (RW) 32-bit 0xF0C */ + GPU_CONTROL__GPU_DBG, /* (RW) 64-bit 0xFE8 */ + JOB_CONTROL__JOB_IRQ_RAWSTAT, /* (RW) 32-bit 0x1000 */ + JOB_CONTROL__JOB_IRQ_CLEAR, /* (WO) 32-bit 0x1004 */ + JOB_CONTROL__JOB_IRQ_MASK, /* (RW) 32-bit 0x1008 */ + JOB_CONTROL__JOB_IRQ_STATUS, /* (RO) 32-bit 0x100C */ + MMU_CONTROL__IRQ_RAWSTAT, /* (RW) 32-bit 0x2000 */ + MMU_CONTROL__IRQ_CLEAR, /* (WO) 32-bit 0x2004 */ + MMU_CONTROL__IRQ_MASK, /* (RW) 32-bit 0x2008 */ + MMU_CONTROL__IRQ_STATUS, /* (RO) 32-bit 0x200C */ + MMU_CONTROL__AS0__TRANSTAB, /* (RW) 64-bit 0x2400 */ + MMU_CONTROL__AS0__MEMATTR, /* (RW) 64-bit 0x2408 */ + MMU_CONTROL__AS0__LOCKADDR, /* (RW) 64-bit 0x2410 */ + MMU_CONTROL__AS0__COMMAND, /* (WO) 32-bit 0x2418 */ + MMU_CONTROL__AS0__FAULTSTATUS, /* (RO) 32-bit 0x241C */ + MMU_CONTROL__AS0__FAULTADDRESS, /* (RO) 64-bit 0x2420 */ + MMU_CONTROL__AS0__STATUS, /* (RO) 32-bit 0x2428 */ + MMU_CONTROL__AS0__TRANSCFG, /* (RW) 64-bit 0x2430 */ + MMU_CONTROL__AS0__FAULTEXTRA, /* (RO) 64-bit 0x2438 */ + MMU_CONTROL__AS1__TRANSTAB, /* (RW) 64-bit 0x2440 */ + MMU_CONTROL__AS1__MEMATTR, /* (RW) 64-bit 0x2448 */ + MMU_CONTROL__AS1__LOCKADDR, /* (RW) 64-bit 0x2450 */ + MMU_CONTROL__AS1__COMMAND, /* (WO) 32-bit 0x2458 */ + MMU_CONTROL__AS1__FAULTSTATUS, /* (RO) 32-bit 0x245C */ + MMU_CONTROL__AS1__FAULTADDRESS, /* (RO) 64-bit 0x2460 */ + MMU_CONTROL__AS1__STATUS, /* (RO) 32-bit 0x2468 */ + MMU_CONTROL__AS1__TRANSCFG, /* (RW) 64-bit 0x2470 */ + MMU_CONTROL__AS1__FAULTEXTRA, /* (RO) 64-bit 0x2478 */ + MMU_CONTROL__AS2__TRANSTAB, /* (RW) 64-bit 0x2480 */ + MMU_CONTROL__AS2__MEMATTR, /* (RW) 64-bit 0x2488 */ + MMU_CONTROL__AS2__LOCKADDR, /* (RW) 64-bit 0x2490 */ + MMU_CONTROL__AS2__COMMAND, /* (WO) 32-bit 0x2498 */ + MMU_CONTROL__AS2__FAULTSTATUS, /* (RO) 32-bit 0x249C */ + MMU_CONTROL__AS2__FAULTADDRESS, /* (RO) 64-bit 0x24A0 */ + MMU_CONTROL__AS2__STATUS, /* (RO) 32-bit 0x24A8 */ + MMU_CONTROL__AS2__TRANSCFG, /* (RW) 64-bit 0x24B0 */ + MMU_CONTROL__AS2__FAULTEXTRA, /* (RO) 64-bit 0x24B8 */ + MMU_CONTROL__AS3__TRANSTAB, /* (RW) 64-bit 0x24C0 */ + MMU_CONTROL__AS3__MEMATTR, /* (RW) 64-bit 0x24C8 */ + MMU_CONTROL__AS3__LOCKADDR, /* (RW) 64-bit 0x24D0 */ + MMU_CONTROL__AS3__COMMAND, /* (WO) 32-bit 0x24D8 */ + MMU_CONTROL__AS3__FAULTSTATUS, /* (RO) 32-bit 0x24DC */ + MMU_CONTROL__AS3__FAULTADDRESS, /* (RO) 64-bit 0x24E0 */ + MMU_CONTROL__AS3__STATUS, /* (RO) 32-bit 0x24E8 */ + MMU_CONTROL__AS3__TRANSCFG, /* (RW) 64-bit 0x24F0 */ + MMU_CONTROL__AS3__FAULTEXTRA, /* (RO) 64-bit 0x24F8 */ + MMU_CONTROL__AS4__TRANSTAB, /* (RW) 64-bit 0x2500 */ + MMU_CONTROL__AS4__MEMATTR, /* (RW) 64-bit 0x2508 */ + MMU_CONTROL__AS4__LOCKADDR, /* (RW) 64-bit 0x2510 */ + MMU_CONTROL__AS4__COMMAND, /* (WO) 32-bit 0x2518 */ + MMU_CONTROL__AS4__FAULTSTATUS, /* (RO) 32-bit 0x251C */ + MMU_CONTROL__AS4__FAULTADDRESS, /* (RO) 64-bit 0x2520 */ + MMU_CONTROL__AS4__STATUS, /* (RO) 32-bit 0x2528 */ + MMU_CONTROL__AS4__TRANSCFG, /* (RW) 64-bit 0x2530 */ + MMU_CONTROL__AS4__FAULTEXTRA, /* (RO) 64-bit 0x2538 */ + MMU_CONTROL__AS5__TRANSTAB, /* (RW) 64-bit 0x2540 */ + MMU_CONTROL__AS5__MEMATTR, /* (RW) 64-bit 0x2548 */ + MMU_CONTROL__AS5__LOCKADDR, /* (RW) 64-bit 0x2550 */ + MMU_CONTROL__AS5__COMMAND, /* (WO) 32-bit 0x2558 */ + MMU_CONTROL__AS5__FAULTSTATUS, /* (RO) 32-bit 0x255C */ + MMU_CONTROL__AS5__FAULTADDRESS, /* (RO) 64-bit 0x2560 */ + MMU_CONTROL__AS5__STATUS, /* (RO) 32-bit 0x2568 */ + MMU_CONTROL__AS5__TRANSCFG, /* (RW) 64-bit 0x2570 */ + MMU_CONTROL__AS5__FAULTEXTRA, /* (RO) 64-bit 0x2578 */ + MMU_CONTROL__AS6__TRANSTAB, /* (RW) 64-bit 0x2580 */ + MMU_CONTROL__AS6__MEMATTR, /* (RW) 64-bit 0x2588 */ + MMU_CONTROL__AS6__LOCKADDR, /* (RW) 64-bit 0x2590 */ + MMU_CONTROL__AS6__COMMAND, /* (WO) 32-bit 0x2598 */ + MMU_CONTROL__AS6__FAULTSTATUS, /* (RO) 32-bit 0x259C */ + MMU_CONTROL__AS6__FAULTADDRESS, /* (RO) 64-bit 0x25A0 */ + MMU_CONTROL__AS6__STATUS, /* (RO) 32-bit 0x25A8 */ + MMU_CONTROL__AS6__TRANSCFG, /* (RW) 64-bit 0x25B0 */ + MMU_CONTROL__AS6__FAULTEXTRA, /* (RO) 64-bit 0x25B8 */ + MMU_CONTROL__AS7__TRANSTAB, /* (RW) 64-bit 0x25C0 */ + MMU_CONTROL__AS7__MEMATTR, /* (RW) 64-bit 0x25C8 */ + MMU_CONTROL__AS7__LOCKADDR, /* (RW) 64-bit 0x25D0 */ + MMU_CONTROL__AS7__COMMAND, /* (WO) 32-bit 0x25D8 */ + MMU_CONTROL__AS7__FAULTSTATUS, /* (RO) 32-bit 0x25DC */ + MMU_CONTROL__AS7__FAULTADDRESS, /* (RO) 64-bit 0x25E0 */ + MMU_CONTROL__AS7__STATUS, /* (RO) 32-bit 0x25E8 */ + MMU_CONTROL__AS7__TRANSCFG, /* (RW) 64-bit 0x25F0 */ + MMU_CONTROL__AS7__FAULTEXTRA, /* (RO) 64-bit 0x25F8 */ + MMU_CONTROL__AS8__TRANSTAB, /* (RW) 64-bit 0x2600 */ + MMU_CONTROL__AS8__MEMATTR, /* (RW) 64-bit 0x2608 */ + MMU_CONTROL__AS8__LOCKADDR, /* (RW) 64-bit 0x2610 */ + MMU_CONTROL__AS8__COMMAND, /* (WO) 32-bit 0x2618 */ + MMU_CONTROL__AS8__FAULTSTATUS, /* (RO) 32-bit 0x261C */ + MMU_CONTROL__AS8__FAULTADDRESS, /* (RO) 64-bit 0x2620 */ + MMU_CONTROL__AS8__STATUS, /* (RO) 32-bit 0x2628 */ + MMU_CONTROL__AS8__TRANSCFG, /* (RW) 64-bit 0x2630 */ + MMU_CONTROL__AS8__FAULTEXTRA, /* (RO) 64-bit 0x2638 */ + MMU_CONTROL__AS9__TRANSTAB, /* (RW) 64-bit 0x2640 */ + MMU_CONTROL__AS9__MEMATTR, /* (RW) 64-bit 0x2648 */ + MMU_CONTROL__AS9__LOCKADDR, /* (RW) 64-bit 0x2650 */ + MMU_CONTROL__AS9__COMMAND, /* (WO) 32-bit 0x2658 */ + MMU_CONTROL__AS9__FAULTSTATUS, /* (RO) 32-bit 0x265C */ + MMU_CONTROL__AS9__FAULTADDRESS, /* (RO) 64-bit 0x2660 */ + MMU_CONTROL__AS9__STATUS, /* (RO) 32-bit 0x2668 */ + MMU_CONTROL__AS9__TRANSCFG, /* (RW) 64-bit 0x2670 */ + MMU_CONTROL__AS9__FAULTEXTRA, /* (RO) 64-bit 0x2678 */ + MMU_CONTROL__AS10__TRANSTAB, /* (RW) 64-bit 0x2680 */ + MMU_CONTROL__AS10__MEMATTR, /* (RW) 64-bit 0x2688 */ + MMU_CONTROL__AS10__LOCKADDR, /* (RW) 64-bit 0x2690 */ + MMU_CONTROL__AS10__COMMAND, /* (WO) 32-bit 0x2698 */ + MMU_CONTROL__AS10__FAULTSTATUS, /* (RO) 32-bit 0x269C */ + MMU_CONTROL__AS10__FAULTADDRESS, /* (RO) 64-bit 0x26A0 */ + MMU_CONTROL__AS10__STATUS, /* (RO) 32-bit 0x26A8 */ + MMU_CONTROL__AS10__TRANSCFG, /* (RW) 64-bit 0x26B0 */ + MMU_CONTROL__AS10__FAULTEXTRA, /* (RO) 64-bit 0x26B8 */ + MMU_CONTROL__AS11__TRANSTAB, /* (RW) 64-bit 0x26C0 */ + MMU_CONTROL__AS11__MEMATTR, /* (RW) 64-bit 0x26C8 */ + MMU_CONTROL__AS11__LOCKADDR, /* (RW) 64-bit 0x26D0 */ + MMU_CONTROL__AS11__COMMAND, /* (WO) 32-bit 0x26D8 */ + MMU_CONTROL__AS11__FAULTSTATUS, /* (RO) 32-bit 0x26DC */ + MMU_CONTROL__AS11__FAULTADDRESS, /* (RO) 64-bit 0x26E0 */ + MMU_CONTROL__AS11__STATUS, /* (RO) 32-bit 0x26E8 */ + MMU_CONTROL__AS11__TRANSCFG, /* (RW) 64-bit 0x26F0 */ + MMU_CONTROL__AS11__FAULTEXTRA, /* (RO) 64-bit 0x26F8 */ + MMU_CONTROL__AS12__TRANSTAB, /* (RW) 64-bit 0x2700 */ + MMU_CONTROL__AS12__MEMATTR, /* (RW) 64-bit 0x2708 */ + MMU_CONTROL__AS12__LOCKADDR, /* (RW) 64-bit 0x2710 */ + MMU_CONTROL__AS12__COMMAND, /* (WO) 32-bit 0x2718 */ + MMU_CONTROL__AS12__FAULTSTATUS, /* (RO) 32-bit 0x271C */ + MMU_CONTROL__AS12__FAULTADDRESS, /* (RO) 64-bit 0x2720 */ + MMU_CONTROL__AS12__STATUS, /* (RO) 32-bit 0x2728 */ + MMU_CONTROL__AS12__TRANSCFG, /* (RW) 64-bit 0x2730 */ + MMU_CONTROL__AS12__FAULTEXTRA, /* (RO) 64-bit 0x2738 */ + MMU_CONTROL__AS13__TRANSTAB, /* (RW) 64-bit 0x2740 */ + MMU_CONTROL__AS13__MEMATTR, /* (RW) 64-bit 0x2748 */ + MMU_CONTROL__AS13__LOCKADDR, /* (RW) 64-bit 0x2750 */ + MMU_CONTROL__AS13__COMMAND, /* (WO) 32-bit 0x2758 */ + MMU_CONTROL__AS13__FAULTSTATUS, /* (RO) 32-bit 0x275C */ + MMU_CONTROL__AS13__FAULTADDRESS, /* (RO) 64-bit 0x2760 */ + MMU_CONTROL__AS13__STATUS, /* (RO) 32-bit 0x2768 */ + MMU_CONTROL__AS13__TRANSCFG, /* (RW) 64-bit 0x2770 */ + MMU_CONTROL__AS13__FAULTEXTRA, /* (RO) 64-bit 0x2778 */ + MMU_CONTROL__AS14__TRANSTAB, /* (RW) 64-bit 0x2780 */ + MMU_CONTROL__AS14__MEMATTR, /* (RW) 64-bit 0x2788 */ + MMU_CONTROL__AS14__LOCKADDR, /* (RW) 64-bit 0x2790 */ + MMU_CONTROL__AS14__COMMAND, /* (WO) 32-bit 0x2798 */ + MMU_CONTROL__AS14__FAULTSTATUS, /* (RO) 32-bit 0x279C */ + MMU_CONTROL__AS14__FAULTADDRESS, /* (RO) 64-bit 0x27A0 */ + MMU_CONTROL__AS14__STATUS, /* (RO) 32-bit 0x27A8 */ + MMU_CONTROL__AS14__TRANSCFG, /* (RW) 64-bit 0x27B0 */ + MMU_CONTROL__AS14__FAULTEXTRA, /* (RO) 64-bit 0x27B8 */ + MMU_CONTROL__AS15__TRANSTAB, /* (RW) 64-bit 0x27C0 */ + MMU_CONTROL__AS15__MEMATTR, /* (RW) 64-bit 0x27C8 */ + MMU_CONTROL__AS15__LOCKADDR, /* (RW) 64-bit 0x27D0 */ + MMU_CONTROL__AS15__COMMAND, /* (WO) 32-bit 0x27D8 */ + MMU_CONTROL__AS15__FAULTSTATUS, /* (RO) 32-bit 0x27DC */ + MMU_CONTROL__AS15__FAULTADDRESS, /* (RO) 64-bit 0x27E0 */ + MMU_CONTROL__AS15__STATUS, /* (RO) 32-bit 0x27E8 */ + MMU_CONTROL__AS15__TRANSCFG, /* (RW) 64-bit 0x27F0 */ + MMU_CONTROL__AS15__FAULTEXTRA, /* (RO) 64-bit 0x27F8 */ + USER__LATEST_FLUSH, /* (RO) 32-bit 0x10000 */ + IPA_CONTROL__COMMAND, /* (WO) 32-bit 0x40000 */ + IPA_CONTROL__STATUS, /* (RO) 32-bit 0x40004 */ + IPA_CONTROL__TIMER, /* (RW) 32-bit 0x40008 */ + IPA_CONTROL__SELECT_CSHW, /* (RW) 64-bit 0x40010 */ + IPA_CONTROL__SELECT_MEMSYS, /* (RW) 64-bit 0x40018 */ + IPA_CONTROL__SELECT_TILER, /* (RW) 64-bit 0x40020 */ + IPA_CONTROL__SELECT_SHADER, /* (RW) 64-bit 0x40028 */ + IPA_CONTROL__VALUE_CSHW_0, /* (RO) 64-bit 0x40100 */ + IPA_CONTROL__VALUE_CSHW_1, /* (RO) 64-bit 0x40108 */ + IPA_CONTROL__VALUE_CSHW_2, /* (RO) 64-bit 0x40110 */ + IPA_CONTROL__VALUE_CSHW_3, /* (RO) 64-bit 0x40118 */ + IPA_CONTROL__VALUE_CSHW_4, /* (RO) 64-bit 0x40120 */ + IPA_CONTROL__VALUE_CSHW_5, /* (RO) 64-bit 0x40128 */ + IPA_CONTROL__VALUE_CSHW_6, /* (RO) 64-bit 0x40130 */ + IPA_CONTROL__VALUE_CSHW_7, /* (RO) 64-bit 0x40138 */ + IPA_CONTROL__VALUE_MEMSYS_0, /* (RO) 64-bit 0x40140 */ + IPA_CONTROL__VALUE_MEMSYS_1, /* (RO) 64-bit 0x40148 */ + IPA_CONTROL__VALUE_MEMSYS_2, /* (RO) 64-bit 0x40150 */ + IPA_CONTROL__VALUE_MEMSYS_3, /* (RO) 64-bit 0x40158 */ + IPA_CONTROL__VALUE_MEMSYS_4, /* (RO) 64-bit 0x40160 */ + IPA_CONTROL__VALUE_MEMSYS_5, /* (RO) 64-bit 0x40168 */ + IPA_CONTROL__VALUE_MEMSYS_6, /* (RO) 64-bit 0x40170 */ + IPA_CONTROL__VALUE_MEMSYS_7, /* (RO) 64-bit 0x40178 */ + IPA_CONTROL__VALUE_TILER_0, /* (RO) 64-bit 0x40180 */ + IPA_CONTROL__VALUE_TILER_1, /* (RO) 64-bit 0x40188 */ + IPA_CONTROL__VALUE_TILER_2, /* (RO) 64-bit 0x40190 */ + IPA_CONTROL__VALUE_TILER_3, /* (RO) 64-bit 0x40198 */ + IPA_CONTROL__VALUE_TILER_4, /* (RO) 64-bit 0x401A0 */ + IPA_CONTROL__VALUE_TILER_5, /* (RO) 64-bit 0x401A8 */ + IPA_CONTROL__VALUE_TILER_6, /* (RO) 64-bit 0x401B0 */ + IPA_CONTROL__VALUE_TILER_7, /* (RO) 64-bit 0x401B8 */ + IPA_CONTROL__VALUE_SHADER_0, /* (RO) 64-bit 0x401C0 */ + IPA_CONTROL__VALUE_SHADER_1, /* (RO) 64-bit 0x401C8 */ + IPA_CONTROL__VALUE_SHADER_2, /* (RO) 64-bit 0x401D0 */ + IPA_CONTROL__VALUE_SHADER_3, /* (RO) 64-bit 0x401D8 */ + IPA_CONTROL__VALUE_SHADER_4, /* (RO) 64-bit 0x401E0 */ + IPA_CONTROL__VALUE_SHADER_5, /* (RO) 64-bit 0x401E8 */ + IPA_CONTROL__VALUE_SHADER_6, /* (RO) 64-bit 0x401F0 */ + IPA_CONTROL__VALUE_SHADER_7, /* (RO) 64-bit 0x401F8 */ + DOORBELL_BLOCK_0__DOORBELL, /* (WO) 32-bit 0x80000 */ + DOORBELL_BLOCK_1__DOORBELL, /* (WO) 32-bit 0x90000 */ + DOORBELL_BLOCK_2__DOORBELL, /* (WO) 32-bit 0xA0000 */ + DOORBELL_BLOCK_3__DOORBELL, /* (WO) 32-bit 0xB0000 */ + DOORBELL_BLOCK_4__DOORBELL, /* (WO) 32-bit 0xC0000 */ + DOORBELL_BLOCK_5__DOORBELL, /* (WO) 32-bit 0xD0000 */ + DOORBELL_BLOCK_6__DOORBELL, /* (WO) 32-bit 0xE0000 */ + DOORBELL_BLOCK_7__DOORBELL, /* (WO) 32-bit 0xF0000 */ + DOORBELL_BLOCK_8__DOORBELL, /* (WO) 32-bit 0x100000 */ + DOORBELL_BLOCK_9__DOORBELL, /* (WO) 32-bit 0x110000 */ + DOORBELL_BLOCK_10__DOORBELL, /* (WO) 32-bit 0x120000 */ + DOORBELL_BLOCK_11__DOORBELL, /* (WO) 32-bit 0x130000 */ + DOORBELL_BLOCK_12__DOORBELL, /* (WO) 32-bit 0x140000 */ + DOORBELL_BLOCK_13__DOORBELL, /* (WO) 32-bit 0x150000 */ + DOORBELL_BLOCK_14__DOORBELL, /* (WO) 32-bit 0x160000 */ + DOORBELL_BLOCK_15__DOORBELL, /* (WO) 32-bit 0x170000 */ + DOORBELL_BLOCK_16__DOORBELL, /* (WO) 32-bit 0x180000 */ + DOORBELL_BLOCK_17__DOORBELL, /* (WO) 32-bit 0x190000 */ + DOORBELL_BLOCK_18__DOORBELL, /* (WO) 32-bit 0x1A0000 */ + DOORBELL_BLOCK_19__DOORBELL, /* (WO) 32-bit 0x1B0000 */ + DOORBELL_BLOCK_20__DOORBELL, /* (WO) 32-bit 0x1C0000 */ + DOORBELL_BLOCK_21__DOORBELL, /* (WO) 32-bit 0x1D0000 */ + DOORBELL_BLOCK_22__DOORBELL, /* (WO) 32-bit 0x1E0000 */ + DOORBELL_BLOCK_23__DOORBELL, /* (WO) 32-bit 0x1F0000 */ + DOORBELL_BLOCK_24__DOORBELL, /* (WO) 32-bit 0x200000 */ + DOORBELL_BLOCK_25__DOORBELL, /* (WO) 32-bit 0x210000 */ + DOORBELL_BLOCK_26__DOORBELL, /* (WO) 32-bit 0x220000 */ + DOORBELL_BLOCK_27__DOORBELL, /* (WO) 32-bit 0x230000 */ + DOORBELL_BLOCK_28__DOORBELL, /* (WO) 32-bit 0x240000 */ + DOORBELL_BLOCK_29__DOORBELL, /* (WO) 32-bit 0x250000 */ + DOORBELL_BLOCK_30__DOORBELL, /* (WO) 32-bit 0x260000 */ + DOORBELL_BLOCK_31__DOORBELL, /* (WO) 32-bit 0x270000 */ + DOORBELL_BLOCK_32__DOORBELL, /* (WO) 32-bit 0x280000 */ + DOORBELL_BLOCK_33__DOORBELL, /* (WO) 32-bit 0x290000 */ + DOORBELL_BLOCK_34__DOORBELL, /* (WO) 32-bit 0x2A0000 */ + DOORBELL_BLOCK_35__DOORBELL, /* (WO) 32-bit 0x2B0000 */ + DOORBELL_BLOCK_36__DOORBELL, /* (WO) 32-bit 0x2C0000 */ + DOORBELL_BLOCK_37__DOORBELL, /* (WO) 32-bit 0x2D0000 */ + DOORBELL_BLOCK_38__DOORBELL, /* (WO) 32-bit 0x2E0000 */ + DOORBELL_BLOCK_39__DOORBELL, /* (WO) 32-bit 0x2F0000 */ + DOORBELL_BLOCK_40__DOORBELL, /* (WO) 32-bit 0x300000 */ + DOORBELL_BLOCK_41__DOORBELL, /* (WO) 32-bit 0x310000 */ + DOORBELL_BLOCK_42__DOORBELL, /* (WO) 32-bit 0x320000 */ + DOORBELL_BLOCK_43__DOORBELL, /* (WO) 32-bit 0x330000 */ + DOORBELL_BLOCK_44__DOORBELL, /* (WO) 32-bit 0x340000 */ + DOORBELL_BLOCK_45__DOORBELL, /* (WO) 32-bit 0x350000 */ + DOORBELL_BLOCK_46__DOORBELL, /* (WO) 32-bit 0x360000 */ + DOORBELL_BLOCK_47__DOORBELL, /* (WO) 32-bit 0x370000 */ + DOORBELL_BLOCK_48__DOORBELL, /* (WO) 32-bit 0x380000 */ + DOORBELL_BLOCK_49__DOORBELL, /* (WO) 32-bit 0x390000 */ + DOORBELL_BLOCK_50__DOORBELL, /* (WO) 32-bit 0x3A0000 */ + DOORBELL_BLOCK_51__DOORBELL, /* (WO) 32-bit 0x3B0000 */ + DOORBELL_BLOCK_52__DOORBELL, /* (WO) 32-bit 0x3C0000 */ + DOORBELL_BLOCK_53__DOORBELL, /* (WO) 32-bit 0x3D0000 */ + DOORBELL_BLOCK_54__DOORBELL, /* (WO) 32-bit 0x3E0000 */ + DOORBELL_BLOCK_55__DOORBELL, /* (WO) 32-bit 0x3F0000 */ + DOORBELL_BLOCK_56__DOORBELL, /* (WO) 32-bit 0x400000 */ + DOORBELL_BLOCK_57__DOORBELL, /* (WO) 32-bit 0x410000 */ + DOORBELL_BLOCK_58__DOORBELL, /* (WO) 32-bit 0x420000 */ + DOORBELL_BLOCK_59__DOORBELL, /* (WO) 32-bit 0x430000 */ + DOORBELL_BLOCK_60__DOORBELL, /* (WO) 32-bit 0x440000 */ + DOORBELL_BLOCK_61__DOORBELL, /* (WO) 32-bit 0x450000 */ + DOORBELL_BLOCK_62__DOORBELL, /* (WO) 32-bit 0x460000 */ + DOORBELL_BLOCK_63__DOORBELL, /* (WO) 32-bit 0x470000 */ + NR_V10_8_REGS, +}; + +enum kbase_regmap_enum_v10_10 { + GPU_CONTROL__CORE_FEATURES = NR_V10_8_REGS, /* (RO) 32-bit 0x8 */ + NR_V10_10_REGS, +}; + +enum kbase_regmap_enum_v11 { + GPU_CONTROL__ASN_HASH_0 = NR_V10_10_REGS, /* (RW) 32-bit 0x2C0 */ + GPU_CONTROL__ASN_HASH_1, /* (RW) 32-bit 0x2C4 */ + GPU_CONTROL__ASN_HASH_2, /* (RW) 32-bit 0x2C8 */ + GPU_CONTROL__DOORBELL_FEATURES, /* (RO) 32-bit 0xC0 */ + GPU_CONTROL__GPU_FEATURES, /* (RO) 64-bit 0x60 */ + GPU_CONTROL__PRFCNT_FEATURES, /* (RO) 32-bit 0x68 */ + GPU_CONTROL__SYSC_ALLOC0, /* (RW) 32-bit 0x340 */ + GPU_CONTROL__SYSC_ALLOC1, /* (RW) 32-bit 0x344 */ + GPU_CONTROL__SYSC_ALLOC2, /* (RW) 32-bit 0x348 */ + GPU_CONTROL__SYSC_ALLOC3, /* (RW) 32-bit 0x34C */ + GPU_CONTROL__SYSC_ALLOC4, /* (RW) 32-bit 0x350 */ + GPU_CONTROL__SYSC_ALLOC5, /* (RW) 32-bit 0x354 */ + GPU_CONTROL__SYSC_ALLOC6, /* (RW) 32-bit 0x358 */ + GPU_CONTROL__SYSC_ALLOC7, /* (RW) 32-bit 0x35C */ + GPU_CONTROL__SYSC_PBHA_OVERRIDE0, /* (RW) 32-bit 0x320 */ + GPU_CONTROL__SYSC_PBHA_OVERRIDE1, /* (RW) 32-bit 0x324 */ + GPU_CONTROL__SYSC_PBHA_OVERRIDE2, /* (RW) 32-bit 0x328 */ + GPU_CONTROL__SYSC_PBHA_OVERRIDE3, /* (RW) 32-bit 0x32C */ + NR_V11_REGS, +}; + +/* + * V11_MODIFIED_REGS: + * USER__LATEST_FLUSH (RO) -> (RW) + */ + +enum kbase_regmap_enum_v12 { + GPU_CONTROL__AMBA_ENABLE = NR_V11_REGS, /* (RW) 32-bit 0x304 */ + GPU_CONTROL__GPU_COMMAND_ARG0, /* (RW) 64-bit 0xD0 */ + GPU_CONTROL__GPU_COMMAND_ARG1, /* (RW) 64-bit 0xD8 */ + GPU_CONTROL__MCU_FEATURES, /* (RO) 32-bit 0x708 */ + GPU_CONTROL__SHADER_PWRFEATURES, /* (RW) 32-bit 0x188 */ + NR_V12_REGS, +}; + +enum kbase_regmap_enum_v12_remap { + GPU_CONTROL__AMBA_FEATURES = GPU_CONTROL__COHERENCY_FEATURES, /* (RO) 32-bit 0x300 */ +}; + +/* + * V12_REMOVED_REGS: + * GPU_CONTROL__COHERENCY_ENABLE + */ + + +#endif /* _MALI_KBASE_REGMAP_CSF_ENUMS_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h new file mode 100644 index 000000000000..8f6164ee23bf --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h @@ -0,0 +1,441 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_REGMAP_CSF_MACROS_H_ +#define _MALI_KBASE_REGMAP_CSF_MACROS_H_ + +#if !MALI_USE_CSF +#error "Cannot be compiled with JM" +#endif + +#define ENUM_OFFSET(_index, _base, _next) (_base + _index * (_next - _base)) + + +#define GPU_CONTROL_ENUM(regname) GPU_CONTROL__##regname +#define GPU_TEXTURE_FEATURES_ENUM(n) GPU_CONTROL_ENUM(TEXTURE_FEATURES_##n) +#define GPU_TEXTURE_FEATURES_OFFSET(n) (GPU_TEXTURE_FEATURES_ENUM(0) + n) +#define GPU_ASN_HASH_ENUM(n) GPU_CONTROL_ENUM(ASN_HASH_##n) +#define GPU_ASN_HASH_OFFSET(n) (GPU_ASN_HASH_ENUM(0) + n) +#define GPU_SYSC_PBHA_OVERRIDE_ENUM(n) GPU_CONTROL_ENUM(SYSC_PBHA_OVERRIDE##n) +#define GPU_SYSC_PBHA_OVERRIDE_OFFSET(n) (GPU_SYSC_PBHA_OVERRIDE_ENUM(0) + n) +#define GPU_SYSC_ALLOC_ENUM(n) GPU_CONTROL_ENUM(SYSC_ALLOC##n) +#define GPU_SYSC_ALLOC_OFFSET(n) (GPU_SYSC_ALLOC_ENUM(0) + n) + +/* GPU_L2_SLICE_HASH_OFFSET aliasing GPU_ASN_HASH_OFFSET */ +#define GPU_L2_SLICE_HASH_OFFSET(n) GPU_ASN_HASH_OFFSET(n) + +#define JOB_CONTROL_ENUM(regname) JOB_CONTROL__##regname + +#define MMU_CONTROL_ENUM(regname) MMU_CONTROL__##regname +#define MMU_AS_ENUM(n, regname) MMU_CONTROL_ENUM(AS##n##__##regname) +#define MMU_AS_BASE_ENUM(n) MMU_AS_ENUM(n, TRANSTAB) +#define MMU_AS_OFFSET(n, regname) ENUM_OFFSET(n, MMU_AS_ENUM(0, regname), MMU_AS_ENUM(1, regname)) +#define MMU_AS_BASE_OFFSET(n) MMU_AS_OFFSET(n, TRANSTAB) + +#define USER_ENUM(regname) USER__##regname + +#define IPA_CONTROL_ENUM(regname) IPA_CONTROL__##regname +#define IPA_VALUE_CSHW_ENUM(n) IPA_CONTROL_ENUM(VALUE_CSHW_##n) +#define IPA_VALUE_CSHW_OFFSET(n) (IPA_VALUE_CSHW_ENUM(0) + n) +#define IPA_VALUE_MEMSYS_ENUM(n) IPA_CONTROL_ENUM(VALUE_MEMSYS_##n) +#define IPA_VALUE_MEMSYS_OFFSET(n) (IPA_VALUE_MEMSYS_ENUM(0) + n) +#define IPA_VALUE_TILER_ENUM(n) IPA_CONTROL_ENUM(VALUE_TILER_##n) +#define IPA_VALUE_TILER_OFFSET(n) (IPA_VALUE_TILER_ENUM(0) + n) +#define IPA_VALUE_SHADER_ENUM(n) IPA_CONTROL_ENUM(VALUE_SHADER_##n) +#define IPA_VALUE_SHADER_OFFSET(n) (IPA_VALUE_SHADER_ENUM(0) + n) + +#define HOST_POWER_ENUM(regname) GPU_CONTROL_ENUM(HOST_POWER__##regname) + +#define DOORBELL_BLOCK_ENUM(n, regname) DOORBELL_BLOCK_##n##__##regname +#define DOORBELL_BLOCK_OFFSET(n, regname) \ + ENUM_OFFSET(n, DOORBELL_BLOCK_ENUM(0, regname), DOORBELL_BLOCK_ENUM(1, regname)) + +/* register value macros */ +/* L2_CONFIG PBHA values */ +#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12) +#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT) +#define L2_CONFIG_PBHA_HWU_GET(reg_val) \ + (((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT) +#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \ + (((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \ + (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK)) + +#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT (0) +#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK ((0xFF) << PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) +#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(reg_val) \ + (((reg_val)&PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK) >> \ + PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) + +/* + * Begin AARCH64 MMU TRANSTAB register values + */ +#define AS_TRANSTAB_BASE_SHIFT GPU_U(0) +#define AS_TRANSTAB_BASE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << AS_TRANSTAB_BASE_SHIFT) +#define AS_TRANSTAB_BASE_GET(reg_val) (((reg_val)&AS_TRANSTAB_BASE_MASK) >> AS_TRANSTAB_BASE_SHIFT) +#define AS_TRANSTAB_BASE_SET(reg_val, value) \ + (~(~(reg_val) | AS_TRANSTAB_BASE_MASK) | \ + (((uint64_t)(value) << AS_TRANSTAB_BASE_SHIFT) & AS_TRANSTAB_BASE_MASK)) + +/* + * Begin MMU STATUS register values + */ +#define AS_STATUS_AS_ACTIVE_INT_SHIFT GPU_U(1) +#define AS_STATUS_AS_ACTIVE_INT_MASK (GPU_U(0x1) << AS_STATUS_AS_ACTIVE_INT_SHIFT) +#define AS_STATUS_AS_ACTIVE_INT_GET(reg_val) \ + (((reg_val)&AS_STATUS_AS_ACTIVE_INT_MASK) >> AS_STATUS_AS_ACTIVE_INT_SHIFT) +#define AS_STATUS_AS_ACTIVE_INT_SET(reg_val, value) \ + (~(~(reg_val) | AS_STATUS_AS_ACTIVE_INT_MASK) | \ + (((value) << AS_STATUS_AS_ACTIVE_INT_SHIFT) & AS_STATUS_AS_ACTIVE_INT_MASK)) + +/* + * Begin MMU FAULTSTATUS register values + */ +#define AS_FAULTSTATUS_EXCEPTION_TYPE_OK 0x0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TERMINATED 0x4 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_KABOOM 0x5 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_EUREKA 0x6 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ACTIVE 0x8 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0xF +#define AS_FAULTSTATUS_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_CS_UNRECOVERABLE 0x41 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW 0x4A +#define AS_FAULTSTATUS_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B +#define AS_FAULTSTATUS_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_RT_STACK_OVERFLOW 0x56 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A +#define AS_FAULTSTATUS_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B +#define AS_FAULTSTATUS_EXCEPTION_TYPE_OUT_OF_MEMORY 0x60 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_NE_DATA_INVALID_FAULT 0x61 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_NE_ADDR_RANGE_FAULT 0x62 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_NE_TSU_SPACE_FAULT 0x63 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_NE_TSU_INVALID_ENC 0x64 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_NE_WEIGHT_STREAM_ERROR 0x65 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR 0x68 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_0 0x70 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_1 0x71 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_2 0x72 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_3 0x73 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_4 0x74 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_5 0x75 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_6 0x76 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_7 0x77 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_8 0x78 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_9 0x79 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_10 0x7A +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_11 0x7B +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_12 0x7C +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_13 0x7D +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_14 0x7E +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SW_FAULT_15 0x7F +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_1 0xC1 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_2 0xC2 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_3 0xC3 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_4 0xC4 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_0 0xC8 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_1 0xC9 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_2 0xCA +#define AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_3 0xCB +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_1 0xD9 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_2 0xDA +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_3 0xDB +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN 0xE0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT0 0xE4 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT1 0xE5 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT2 0xE6 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT3 0xE7 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0 0xE8 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1 0xE9 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2 0xEA +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3 0xEB + +/* + * Begin MMU MEMATTR register values + */ +#define AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SHARED 0x0 + +/* CSF_CONFIG register */ +#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2 + +#define MCU_CONTROL_REQ_DISABLE 0x0 +#define MCU_CONTROL_REQ_ENABLE 0x1 +#define MCU_CONTROL_REQ_AUTO 0x2 + +#define MCU_STATUS_VALUE_DISABLED 0x0 +#define MCU_STATUS_VALUE_ENABLED 0x1 +#define MCU_STATUS_VALUE_HALT 0x2 +#define MCU_STATUS_VALUE_FATAL 0x3 + +#define MCU_CNTRL_DOORBELL_DISABLE_SHIFT (31) +#define MCU_CNTRL_DOORBELL_DISABLE_MASK (1 << MCU_CNTRL_DOORBELL_DISABLE_SHIFT) + +/* JOB IRQ flags */ +#define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */ + +/* GPU_COMMAND codes */ +#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */ +#define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */ +#define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */ +#define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */ +#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */ +#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */ +#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */ +#define GPU_COMMAND_CODE_FLUSH_PA_RANGE 0x08 /* Flush the GPU caches for a physical range, TITX */ + +/* GPU_COMMAND_RESET payloads */ + +/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state. + * Power domains will remain powered on. + */ +#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00 + +/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and + * idle state. + */ +#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01 + +/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave + * the system bus in an inconsistent state. Use only as a last resort when nothing else works. + */ +#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02 + +/* GPU_COMMAND_TIME payloads */ +#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */ +#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */ + +/* GPU_COMMAND_FLUSH_CACHES payloads bits for L2 caches */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE 0x000 /* No flush */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN 0x001 /* CLN only */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */ + +/* GPU_COMMAND_FLUSH_CACHES payloads bits for Load-store caches */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE 0x000 /* No flush */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */ + +/* GPU_COMMAND_FLUSH_CACHES payloads bits for Other caches */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE 0x000 /* No flush */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */ + +/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for flush modes */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_NONE 0x00 /* No flush */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN 0x01 /* CLN only */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_INVALIDATE 0x02 /* INV only */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE 0x03 /* CLN + INV */ + +/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for which caches should be the target of the command */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE 0x10 /* Other caches */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE 0x20 /* Load-store caches */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE 0x40 /* L2 caches */ + +/* GPU_COMMAND command + payload */ +#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) ((__u32)opcode | ((__u32)payload << 8)) + +/* Final GPU_COMMAND form */ +/* No operation, nothing happens */ +#define GPU_COMMAND_NOP GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0) + +/* Stop all external bus interfaces, and then reset the entire GPU. */ +#define GPU_COMMAND_SOFT_RESET \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET) + +/* Immediately reset the entire GPU. */ +#define GPU_COMMAND_HARD_RESET \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET) + +/* Starts the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_START \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE) + +/* Stops the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_STOP \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE) + +/* Clean and invalidate L2 cache (Equivalent to FLUSH_PT) */ +#define GPU_COMMAND_CACHE_CLN_INV_L2 \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) + +/* Clean and invalidate L2 and LSC caches (Equivalent to FLUSH_MEM) */ +#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) + +/* Clean and invalidate L2, LSC, and Other caches */ +#define GPU_COMMAND_CACHE_CLN_INV_FULL \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE)) + +/* Clean and invalidate only LSC cache */ +#define GPU_COMMAND_CACHE_CLN_INV_LSC \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) + +/* Clean and invalidate physical range L2 cache (equivalent to FLUSH_PT) */ +#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2 \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ + (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) + +/* Clean and invalidate physical range L2 and LSC cache (equivalent to FLUSH_MEM) */ +#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ + (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) + +/* Clean and invalidate physical range L2, LSC and Other caches */ +#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ + (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) + +/* Merge cache flush commands */ +#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2)) + +/* Places the GPU in protected mode */ +#define GPU_COMMAND_SET_PROTECTED_MODE \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0) + +/* Halt CSF */ +#define GPU_COMMAND_FINISH_HALT GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0) + +/* Clear GPU faults */ +#define GPU_COMMAND_CLEAR_FAULT GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0) + +/* End Command Values */ + +/* GPU_FAULTSTATUS register */ +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul) +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 +#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK (0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT) + +#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10) +#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) + +#define GPU_FAULTSTATUS_JASID_VALID_SHIFT GPU_U(11) +#define GPU_FAULTSTATUS_JASID_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_JASID_VALID_SHIFT) + +#define GPU_FAULTSTATUS_JASID_SHIFT 12 +#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT) +#define GPU_FAULTSTATUS_JASID_GET(reg_val) \ + (((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT) +#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \ + (((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) | \ + (((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK)) + +#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16 +#define GPU_FAULTSTATUS_SOURCE_ID_MASK (0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT) +/* End GPU_FAULTSTATUS register */ + +/* GPU_FAULTSTATUS_ACCESS_TYPE values */ +#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0 +#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1 +#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2 +#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3 +/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */ + +/* Implementation-dependent exception codes used to indicate CSG + * and CS errors that are not specified in the specs. + */ +#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70) +#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71) +#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72) + +/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */ +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A +/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */ + +#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10) +#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) +#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \ + (((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) +#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \ + (((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) | \ + (((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK)) + +/* GPU IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ +#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */ +#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */ +#define FLUSH_PA_RANGE_COMPLETED \ + (1 << 20) /* Set when a physical range cache clean operation has completed. */ + + +/* GPU_FEATURES register */ +#define GPU_FEATURES_RAY_TRACING_SHIFT GPU_U(2) +#define GPU_FEATURES_RAY_TRACING_MASK (GPU_U(0x1) << GPU_FEATURES_RAY_TRACING_SHIFT) +#define GPU_FEATURES_RAY_TRACING_GET(reg_val) \ + (((reg_val)&GPU_FEATURES_RAY_TRACING_MASK) >> GPU_FEATURES_RAY_TRACING_SHIFT) +/* End of GPU_FEATURES register */ + +/* + * In Debug build, + * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask + * interrupts sources of GPU_IRQ by writing it onto GPU_IRQ_CLEAR/MASK registers. + * Note that POWER_CHANGED_SINGLE is only used for internal testing. + * + * In Release build, + * GPU_IRQ_REG_COMMON is used. + * + * Note: + * CLEAN_CACHES_COMPLETED - Used separately for cache operation. + * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON + * as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen + */ +#define GPU_IRQ_REG_COMMON \ + (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED | POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ) + + +#endif /* _MALI_KBASE_REGMAP_CSF_MACROS_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c new file mode 100644 index 000000000000..178d45501916 --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c @@ -0,0 +1,2978 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * This file is autogenerated. Avoid modifying this file unless absolutely + * necessary. + */ + +#include +#include + +#include +#include "mali_kbase_regmap_jm_enums.h" +#include + +static void kbase_regmap_v6_0_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V6_0_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbdev->regmap.flags[GPU_CONTROL__GPU_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TILER_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__MEM_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__MMU_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__AS_PRESENT] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS_PRESENT] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__GPU_IRQ_RAWSTAT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_IRQ_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__GPU_COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__LATEST_FLUSH] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__GPU_FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__GPU_FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__AFBC_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__PWR_KEY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PWR_OVERRIDE0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PWR_OVERRIDE1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PRFCNT_BASE] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PRFCNT_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PRFCNT_JM_EN] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PRFCNT_SHADER_EN] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PRFCNT_TILER_EN] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__PRFCNT_MMU_L2_EN] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__CYCLE_COUNT] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TIMESTAMP] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__THREAD_MAX_THREADS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__THREAD_MAX_WORKGROUP_SIZE] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__THREAD_MAX_BARRIER_SIZE] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__THREAD_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TEXTURE_FEATURES_0] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TEXTURE_FEATURES_1] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TEXTURE_FEATURES_2] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS0_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS1_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS2_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS3_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS4_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS5_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS6_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS7_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS8_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS9_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS10_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS11_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS12_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS13_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS14_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__JS15_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PRESENT] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TILER_PRESENT] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_PRESENT] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__SHADER_READY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TILER_READY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_READY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PWRON] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__TILER_PWRON] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__L2_PWRON] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PWROFF] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__TILER_PWROFF] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__L2_PWROFF] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PWRTRANS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TILER_PWRTRANS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_PWRTRANS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__SHADER_PWRACTIVE] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__TILER_PWRACTIVE] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__L2_PWRACTIVE] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__COHERENCY_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__COHERENCY_ENABLE] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__GPU_USER_INn] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__GPU_USER_OUTn] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__JM_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__SHADER_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__TILER_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__L2_MMU_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_RAWSTAT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_JS_STATE] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_THROTTLE] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__EVENT_IRQ_RAWSTAT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__EVENT_IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__EVENT_IRQ_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__EVENT_IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS0__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS0__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS0__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS0__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS0__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS0__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS0__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS0__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS0__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS1__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS1__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS1__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS1__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS1__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS1__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS1__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS1__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS2__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS2__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS2__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS2__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS2__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS2__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS2__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS2__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS3__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS3__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS3__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS3__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS3__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS3__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS3__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS3__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS4__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS4__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS4__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS4__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS4__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS4__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS4__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS4__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS5__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS5__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS5__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS5__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS5__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS5__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS5__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS5__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS6__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS6__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS6__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS6__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS6__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS6__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS6__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS6__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS7__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS7__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS7__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS7__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS7__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS7__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS7__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS7__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS8__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS8__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS8__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS8__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS8__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS8__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS8__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS8__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS9__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS9__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS9__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS9__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS9__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS9__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS9__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS9__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS10__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS10__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS10__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS10__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS10__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS10__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS10__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS10__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS11__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS11__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS11__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS11__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS11__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS11__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS11__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS11__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS12__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS12__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS12__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS12__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS12__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS12__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS12__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS12__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS13__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS13__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS13__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS13__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS13__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS13__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS13__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS13__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS14__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS14__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS14__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS14__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS14__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS14__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS14__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS14__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__HEAD] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS15__TAIL] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS15__AFFINITY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS15__CONFIG] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS15__XAFFINITY] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS15__COMMAND] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS15__FLUSH_ID] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[JOB_CONTROL__JS15__EVENT_0] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__EVENT_1] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__EVENT_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__HEAD_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__TAIL_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__AFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__CONFIG_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__XAFFINITY_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__COMMAND_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__FLUSH_ID_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[JOB_CONTROL__JS15__EVENT_MASK_NEXT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__IRQ_RAWSTAT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__IRQ_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS0__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS0__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS0__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS0__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS0__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS0__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS0__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS0__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS0__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS1__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS1__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS1__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS1__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS1__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS1__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS1__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS1__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS1__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS2__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS2__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS2__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS2__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS2__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS2__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS2__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS2__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS2__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS3__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS3__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS3__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS3__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS3__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS3__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS3__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS3__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS3__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS4__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS4__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS4__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS4__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS4__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS4__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS4__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS4__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS4__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS5__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS5__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS5__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS5__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS5__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS5__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS5__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS5__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS5__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS6__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS6__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS6__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS6__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS6__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS6__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS6__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS6__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS6__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS7__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS7__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS7__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS7__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS7__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS7__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS7__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS7__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS7__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS8__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS8__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS8__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS8__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS8__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS8__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS8__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS8__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS8__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS9__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS9__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS9__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS9__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS9__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS9__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS9__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS9__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS9__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS10__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS10__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS10__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS10__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS10__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS10__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS10__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS10__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS10__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS11__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS11__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS11__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS11__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS11__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS11__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS11__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS11__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS11__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS12__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS12__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS12__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS12__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS12__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS12__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS12__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS12__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS12__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS13__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS13__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS13__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS13__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS13__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS13__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS13__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS13__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS13__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS14__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS14__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS14__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS14__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS14__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS14__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS14__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS14__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS14__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS15__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS15__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS15__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS15__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS15__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS15__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS15__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS15__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE1__ST1MMU__AS15__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__IRQ_RAWSTAT] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__IRQ_MASK] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS0__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS0__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS0__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS0__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS0__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS0__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS0__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS0__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS0__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS1__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS1__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS1__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS1__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS1__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS1__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS1__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS1__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS1__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS2__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS2__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS2__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS2__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS2__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS2__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS2__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS2__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS2__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS3__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS3__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS3__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS3__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS3__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS3__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS3__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS3__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS3__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS4__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS4__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS4__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS4__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS4__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS4__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS4__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS4__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS4__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS5__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS5__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS5__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS5__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS5__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS5__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS5__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS5__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS5__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS6__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS6__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS6__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS6__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS6__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS6__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS6__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS6__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS6__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS7__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS7__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS7__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS7__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS7__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS7__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS7__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS7__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS7__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS8__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS8__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS8__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS8__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS8__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS8__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS8__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS8__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS8__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS9__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS9__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS9__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS9__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS9__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS9__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS9__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS9__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS9__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS10__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS10__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS10__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS10__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS10__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS10__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS10__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS10__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS10__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS11__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS11__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS11__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS11__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS11__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS11__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS11__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS11__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS11__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS12__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS12__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS12__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS12__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS12__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS12__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS12__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS12__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS12__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS13__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS13__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS13__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS13__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS13__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS13__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS13__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS13__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS13__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS14__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS14__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS14__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS14__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS14__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS14__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS14__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS14__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS14__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS15__TRANSTAB] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS15__MEMATTR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS15__LOCKADDR] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS15__COMMAND] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS15__FAULTSTATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS15__FAULTADDRESS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS15__STATUS] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS15__TRANSCFG] = + KBASE_REGMAP_WIDTH_64_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[MMU_STAGE2__ST2MMU__AS15__FAULTEXTRA] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + + kbdev->regmap.regs[GPU_CONTROL__GPU_ID] = kbdev->reg + 0x0; + kbdev->regmap.regs[GPU_CONTROL__L2_FEATURES] = kbdev->reg + 0x4; + kbdev->regmap.regs[GPU_CONTROL__TILER_FEATURES] = kbdev->reg + 0xc; + kbdev->regmap.regs[GPU_CONTROL__MEM_FEATURES] = kbdev->reg + 0x10; + kbdev->regmap.regs[GPU_CONTROL__MMU_FEATURES] = kbdev->reg + 0x14; + kbdev->regmap.regs[GPU_CONTROL__AS_PRESENT] = kbdev->reg + 0x18; + kbdev->regmap.regs[GPU_CONTROL__JS_PRESENT] = kbdev->reg + 0x1c; + kbdev->regmap.regs[GPU_CONTROL__GPU_IRQ_RAWSTAT] = kbdev->reg + 0x20; + kbdev->regmap.regs[GPU_CONTROL__GPU_IRQ_CLEAR] = kbdev->reg + 0x24; + kbdev->regmap.regs[GPU_CONTROL__GPU_IRQ_MASK] = kbdev->reg + 0x28; + kbdev->regmap.regs[GPU_CONTROL__GPU_IRQ_STATUS] = kbdev->reg + 0x2c; + kbdev->regmap.regs[GPU_CONTROL__GPU_COMMAND] = kbdev->reg + 0x30; + kbdev->regmap.regs[GPU_CONTROL__GPU_STATUS] = kbdev->reg + 0x34; + kbdev->regmap.regs[GPU_CONTROL__LATEST_FLUSH] = kbdev->reg + 0x38; + kbdev->regmap.regs[GPU_CONTROL__GPU_FAULTSTATUS] = kbdev->reg + 0x3c; + kbdev->regmap.regs[GPU_CONTROL__GPU_FAULTADDRESS] = kbdev->reg + 0x40; + kbdev->regmap.regs[GPU_CONTROL__AFBC_FEATURES] = kbdev->reg + 0x4c; + kbdev->regmap.regs[GPU_CONTROL__PWR_KEY] = kbdev->reg + 0x50; + kbdev->regmap.regs[GPU_CONTROL__PWR_OVERRIDE0] = kbdev->reg + 0x54; + kbdev->regmap.regs[GPU_CONTROL__PWR_OVERRIDE1] = kbdev->reg + 0x58; + kbdev->regmap.regs[GPU_CONTROL__PRFCNT_BASE] = kbdev->reg + 0x60; + kbdev->regmap.regs[GPU_CONTROL__PRFCNT_CONFIG] = kbdev->reg + 0x68; + kbdev->regmap.regs[GPU_CONTROL__PRFCNT_JM_EN] = kbdev->reg + 0x6c; + kbdev->regmap.regs[GPU_CONTROL__PRFCNT_SHADER_EN] = kbdev->reg + 0x70; + kbdev->regmap.regs[GPU_CONTROL__PRFCNT_TILER_EN] = kbdev->reg + 0x74; + kbdev->regmap.regs[GPU_CONTROL__PRFCNT_MMU_L2_EN] = kbdev->reg + 0x7c; + kbdev->regmap.regs[GPU_CONTROL__CYCLE_COUNT] = kbdev->reg + 0x90; + kbdev->regmap.regs[GPU_CONTROL__TIMESTAMP] = kbdev->reg + 0x98; + kbdev->regmap.regs[GPU_CONTROL__THREAD_MAX_THREADS] = kbdev->reg + 0xa0; + kbdev->regmap.regs[GPU_CONTROL__THREAD_MAX_WORKGROUP_SIZE] = kbdev->reg + 0xa4; + kbdev->regmap.regs[GPU_CONTROL__THREAD_MAX_BARRIER_SIZE] = kbdev->reg + 0xa8; + kbdev->regmap.regs[GPU_CONTROL__THREAD_FEATURES] = kbdev->reg + 0xac; + kbdev->regmap.regs[GPU_CONTROL__TEXTURE_FEATURES_0] = kbdev->reg + 0xb0; + kbdev->regmap.regs[GPU_CONTROL__TEXTURE_FEATURES_1] = kbdev->reg + 0xb4; + kbdev->regmap.regs[GPU_CONTROL__TEXTURE_FEATURES_2] = kbdev->reg + 0xb8; + kbdev->regmap.regs[GPU_CONTROL__JS0_FEATURES] = kbdev->reg + 0xc0; + kbdev->regmap.regs[GPU_CONTROL__JS1_FEATURES] = kbdev->reg + 0xc4; + kbdev->regmap.regs[GPU_CONTROL__JS2_FEATURES] = kbdev->reg + 0xc8; + kbdev->regmap.regs[GPU_CONTROL__JS3_FEATURES] = kbdev->reg + 0xcc; + kbdev->regmap.regs[GPU_CONTROL__JS4_FEATURES] = kbdev->reg + 0xd0; + kbdev->regmap.regs[GPU_CONTROL__JS5_FEATURES] = kbdev->reg + 0xd4; + kbdev->regmap.regs[GPU_CONTROL__JS6_FEATURES] = kbdev->reg + 0xd8; + kbdev->regmap.regs[GPU_CONTROL__JS7_FEATURES] = kbdev->reg + 0xdc; + kbdev->regmap.regs[GPU_CONTROL__JS8_FEATURES] = kbdev->reg + 0xe0; + kbdev->regmap.regs[GPU_CONTROL__JS9_FEATURES] = kbdev->reg + 0xe4; + kbdev->regmap.regs[GPU_CONTROL__JS10_FEATURES] = kbdev->reg + 0xe8; + kbdev->regmap.regs[GPU_CONTROL__JS11_FEATURES] = kbdev->reg + 0xec; + kbdev->regmap.regs[GPU_CONTROL__JS12_FEATURES] = kbdev->reg + 0xf0; + kbdev->regmap.regs[GPU_CONTROL__JS13_FEATURES] = kbdev->reg + 0xf4; + kbdev->regmap.regs[GPU_CONTROL__JS14_FEATURES] = kbdev->reg + 0xf8; + kbdev->regmap.regs[GPU_CONTROL__JS15_FEATURES] = kbdev->reg + 0xfc; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PRESENT] = kbdev->reg + 0x100; + kbdev->regmap.regs[GPU_CONTROL__TILER_PRESENT] = kbdev->reg + 0x110; + kbdev->regmap.regs[GPU_CONTROL__L2_PRESENT] = kbdev->reg + 0x120; + kbdev->regmap.regs[GPU_CONTROL__SHADER_READY] = kbdev->reg + 0x140; + kbdev->regmap.regs[GPU_CONTROL__TILER_READY] = kbdev->reg + 0x150; + kbdev->regmap.regs[GPU_CONTROL__L2_READY] = kbdev->reg + 0x160; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PWRON] = kbdev->reg + 0x180; + kbdev->regmap.regs[GPU_CONTROL__TILER_PWRON] = kbdev->reg + 0x190; + kbdev->regmap.regs[GPU_CONTROL__L2_PWRON] = kbdev->reg + 0x1a0; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PWROFF] = kbdev->reg + 0x1c0; + kbdev->regmap.regs[GPU_CONTROL__TILER_PWROFF] = kbdev->reg + 0x1d0; + kbdev->regmap.regs[GPU_CONTROL__L2_PWROFF] = kbdev->reg + 0x1e0; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PWRTRANS] = kbdev->reg + 0x200; + kbdev->regmap.regs[GPU_CONTROL__TILER_PWRTRANS] = kbdev->reg + 0x210; + kbdev->regmap.regs[GPU_CONTROL__L2_PWRTRANS] = kbdev->reg + 0x220; + kbdev->regmap.regs[GPU_CONTROL__SHADER_PWRACTIVE] = kbdev->reg + 0x240; + kbdev->regmap.regs[GPU_CONTROL__TILER_PWRACTIVE] = kbdev->reg + 0x250; + kbdev->regmap.regs[GPU_CONTROL__L2_PWRACTIVE] = kbdev->reg + 0x260; + kbdev->regmap.regs[GPU_CONTROL__COHERENCY_FEATURES] = kbdev->reg + 0x300; + kbdev->regmap.regs[GPU_CONTROL__COHERENCY_ENABLE] = kbdev->reg + 0x304; + kbdev->regmap.regs[GPU_CONTROL__GPU_USER_INn] = kbdev->reg + 0x400; + kbdev->regmap.regs[GPU_CONTROL__GPU_USER_OUTn] = kbdev->reg + 0x500; + kbdev->regmap.regs[GPU_CONTROL__JM_CONFIG] = kbdev->reg + 0xf00; + kbdev->regmap.regs[GPU_CONTROL__SHADER_CONFIG] = kbdev->reg + 0xf04; + kbdev->regmap.regs[GPU_CONTROL__TILER_CONFIG] = kbdev->reg + 0xf08; + kbdev->regmap.regs[GPU_CONTROL__L2_MMU_CONFIG] = kbdev->reg + 0xf0c; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_RAWSTAT] = kbdev->reg + 0x1000; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_CLEAR] = kbdev->reg + 0x1004; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_MASK] = kbdev->reg + 0x1008; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_STATUS] = kbdev->reg + 0x100c; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_JS_STATE] = kbdev->reg + 0x1010; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_THROTTLE] = kbdev->reg + 0x1014; + kbdev->regmap.regs[JOB_CONTROL__EVENT_IRQ_RAWSTAT] = kbdev->reg + 0x1020; + kbdev->regmap.regs[JOB_CONTROL__EVENT_IRQ_CLEAR] = kbdev->reg + 0x1024; + kbdev->regmap.regs[JOB_CONTROL__EVENT_IRQ_MASK] = kbdev->reg + 0x1028; + kbdev->regmap.regs[JOB_CONTROL__EVENT_IRQ_STATUS] = kbdev->reg + 0x102c; + kbdev->regmap.regs[JOB_CONTROL__JS0__HEAD] = kbdev->reg + 0x1800; + kbdev->regmap.regs[JOB_CONTROL__JS0__TAIL] = kbdev->reg + 0x1808; + kbdev->regmap.regs[JOB_CONTROL__JS0__AFFINITY] = kbdev->reg + 0x1810; + kbdev->regmap.regs[JOB_CONTROL__JS0__CONFIG] = kbdev->reg + 0x1818; + kbdev->regmap.regs[JOB_CONTROL__JS0__XAFFINITY] = kbdev->reg + 0x181c; + kbdev->regmap.regs[JOB_CONTROL__JS0__COMMAND] = kbdev->reg + 0x1820; + kbdev->regmap.regs[JOB_CONTROL__JS0__STATUS] = kbdev->reg + 0x1824; + kbdev->regmap.regs[JOB_CONTROL__JS0__FLUSH_ID] = kbdev->reg + 0x1830; + kbdev->regmap.regs[JOB_CONTROL__JS0__EVENT_0] = kbdev->reg + 0x1834; + kbdev->regmap.regs[JOB_CONTROL__JS0__EVENT_1] = kbdev->reg + 0x1838; + kbdev->regmap.regs[JOB_CONTROL__JS0__EVENT_MASK] = kbdev->reg + 0x183c; + kbdev->regmap.regs[JOB_CONTROL__JS0__HEAD_NEXT] = kbdev->reg + 0x1840; + kbdev->regmap.regs[JOB_CONTROL__JS0__TAIL_NEXT] = kbdev->reg + 0x1848; + kbdev->regmap.regs[JOB_CONTROL__JS0__AFFINITY_NEXT] = kbdev->reg + 0x1850; + kbdev->regmap.regs[JOB_CONTROL__JS0__CONFIG_NEXT] = kbdev->reg + 0x1858; + kbdev->regmap.regs[JOB_CONTROL__JS0__XAFFINITY_NEXT] = kbdev->reg + 0x185c; + kbdev->regmap.regs[JOB_CONTROL__JS0__COMMAND_NEXT] = kbdev->reg + 0x1860; + kbdev->regmap.regs[JOB_CONTROL__JS0__FLUSH_ID_NEXT] = kbdev->reg + 0x1870; + kbdev->regmap.regs[JOB_CONTROL__JS0__EVENT_MASK_NEXT] = kbdev->reg + 0x187c; + kbdev->regmap.regs[JOB_CONTROL__JS1__HEAD] = kbdev->reg + 0x1880; + kbdev->regmap.regs[JOB_CONTROL__JS1__TAIL] = kbdev->reg + 0x1888; + kbdev->regmap.regs[JOB_CONTROL__JS1__AFFINITY] = kbdev->reg + 0x1890; + kbdev->regmap.regs[JOB_CONTROL__JS1__CONFIG] = kbdev->reg + 0x1898; + kbdev->regmap.regs[JOB_CONTROL__JS1__XAFFINITY] = kbdev->reg + 0x189c; + kbdev->regmap.regs[JOB_CONTROL__JS1__COMMAND] = kbdev->reg + 0x18a0; + kbdev->regmap.regs[JOB_CONTROL__JS1__STATUS] = kbdev->reg + 0x18a4; + kbdev->regmap.regs[JOB_CONTROL__JS1__FLUSH_ID] = kbdev->reg + 0x18b0; + kbdev->regmap.regs[JOB_CONTROL__JS1__EVENT_0] = kbdev->reg + 0x18b4; + kbdev->regmap.regs[JOB_CONTROL__JS1__EVENT_1] = kbdev->reg + 0x18b8; + kbdev->regmap.regs[JOB_CONTROL__JS1__EVENT_MASK] = kbdev->reg + 0x18bc; + kbdev->regmap.regs[JOB_CONTROL__JS1__HEAD_NEXT] = kbdev->reg + 0x18c0; + kbdev->regmap.regs[JOB_CONTROL__JS1__TAIL_NEXT] = kbdev->reg + 0x18c8; + kbdev->regmap.regs[JOB_CONTROL__JS1__AFFINITY_NEXT] = kbdev->reg + 0x18d0; + kbdev->regmap.regs[JOB_CONTROL__JS1__CONFIG_NEXT] = kbdev->reg + 0x18d8; + kbdev->regmap.regs[JOB_CONTROL__JS1__XAFFINITY_NEXT] = kbdev->reg + 0x18dc; + kbdev->regmap.regs[JOB_CONTROL__JS1__COMMAND_NEXT] = kbdev->reg + 0x18e0; + kbdev->regmap.regs[JOB_CONTROL__JS1__FLUSH_ID_NEXT] = kbdev->reg + 0x18f0; + kbdev->regmap.regs[JOB_CONTROL__JS1__EVENT_MASK_NEXT] = kbdev->reg + 0x18fc; + kbdev->regmap.regs[JOB_CONTROL__JS2__HEAD] = kbdev->reg + 0x1900; + kbdev->regmap.regs[JOB_CONTROL__JS2__TAIL] = kbdev->reg + 0x1908; + kbdev->regmap.regs[JOB_CONTROL__JS2__AFFINITY] = kbdev->reg + 0x1910; + kbdev->regmap.regs[JOB_CONTROL__JS2__CONFIG] = kbdev->reg + 0x1918; + kbdev->regmap.regs[JOB_CONTROL__JS2__XAFFINITY] = kbdev->reg + 0x191c; + kbdev->regmap.regs[JOB_CONTROL__JS2__COMMAND] = kbdev->reg + 0x1920; + kbdev->regmap.regs[JOB_CONTROL__JS2__STATUS] = kbdev->reg + 0x1924; + kbdev->regmap.regs[JOB_CONTROL__JS2__FLUSH_ID] = kbdev->reg + 0x1930; + kbdev->regmap.regs[JOB_CONTROL__JS2__EVENT_0] = kbdev->reg + 0x1934; + kbdev->regmap.regs[JOB_CONTROL__JS2__EVENT_1] = kbdev->reg + 0x1938; + kbdev->regmap.regs[JOB_CONTROL__JS2__EVENT_MASK] = kbdev->reg + 0x193c; + kbdev->regmap.regs[JOB_CONTROL__JS2__HEAD_NEXT] = kbdev->reg + 0x1940; + kbdev->regmap.regs[JOB_CONTROL__JS2__TAIL_NEXT] = kbdev->reg + 0x1948; + kbdev->regmap.regs[JOB_CONTROL__JS2__AFFINITY_NEXT] = kbdev->reg + 0x1950; + kbdev->regmap.regs[JOB_CONTROL__JS2__CONFIG_NEXT] = kbdev->reg + 0x1958; + kbdev->regmap.regs[JOB_CONTROL__JS2__XAFFINITY_NEXT] = kbdev->reg + 0x195c; + kbdev->regmap.regs[JOB_CONTROL__JS2__COMMAND_NEXT] = kbdev->reg + 0x1960; + kbdev->regmap.regs[JOB_CONTROL__JS2__FLUSH_ID_NEXT] = kbdev->reg + 0x1970; + kbdev->regmap.regs[JOB_CONTROL__JS2__EVENT_MASK_NEXT] = kbdev->reg + 0x197c; + kbdev->regmap.regs[JOB_CONTROL__JS3__HEAD] = kbdev->reg + 0x1980; + kbdev->regmap.regs[JOB_CONTROL__JS3__TAIL] = kbdev->reg + 0x1988; + kbdev->regmap.regs[JOB_CONTROL__JS3__AFFINITY] = kbdev->reg + 0x1990; + kbdev->regmap.regs[JOB_CONTROL__JS3__CONFIG] = kbdev->reg + 0x1998; + kbdev->regmap.regs[JOB_CONTROL__JS3__XAFFINITY] = kbdev->reg + 0x199c; + kbdev->regmap.regs[JOB_CONTROL__JS3__COMMAND] = kbdev->reg + 0x19a0; + kbdev->regmap.regs[JOB_CONTROL__JS3__STATUS] = kbdev->reg + 0x19a4; + kbdev->regmap.regs[JOB_CONTROL__JS3__FLUSH_ID] = kbdev->reg + 0x19b0; + kbdev->regmap.regs[JOB_CONTROL__JS3__EVENT_0] = kbdev->reg + 0x19b4; + kbdev->regmap.regs[JOB_CONTROL__JS3__EVENT_1] = kbdev->reg + 0x19b8; + kbdev->regmap.regs[JOB_CONTROL__JS3__EVENT_MASK] = kbdev->reg + 0x19bc; + kbdev->regmap.regs[JOB_CONTROL__JS3__HEAD_NEXT] = kbdev->reg + 0x19c0; + kbdev->regmap.regs[JOB_CONTROL__JS3__TAIL_NEXT] = kbdev->reg + 0x19c8; + kbdev->regmap.regs[JOB_CONTROL__JS3__AFFINITY_NEXT] = kbdev->reg + 0x19d0; + kbdev->regmap.regs[JOB_CONTROL__JS3__CONFIG_NEXT] = kbdev->reg + 0x19d8; + kbdev->regmap.regs[JOB_CONTROL__JS3__XAFFINITY_NEXT] = kbdev->reg + 0x19dc; + kbdev->regmap.regs[JOB_CONTROL__JS3__COMMAND_NEXT] = kbdev->reg + 0x19e0; + kbdev->regmap.regs[JOB_CONTROL__JS3__FLUSH_ID_NEXT] = kbdev->reg + 0x19f0; + kbdev->regmap.regs[JOB_CONTROL__JS3__EVENT_MASK_NEXT] = kbdev->reg + 0x19fc; + kbdev->regmap.regs[JOB_CONTROL__JS4__HEAD] = kbdev->reg + 0x1a00; + kbdev->regmap.regs[JOB_CONTROL__JS4__TAIL] = kbdev->reg + 0x1a08; + kbdev->regmap.regs[JOB_CONTROL__JS4__AFFINITY] = kbdev->reg + 0x1a10; + kbdev->regmap.regs[JOB_CONTROL__JS4__CONFIG] = kbdev->reg + 0x1a18; + kbdev->regmap.regs[JOB_CONTROL__JS4__XAFFINITY] = kbdev->reg + 0x1a1c; + kbdev->regmap.regs[JOB_CONTROL__JS4__COMMAND] = kbdev->reg + 0x1a20; + kbdev->regmap.regs[JOB_CONTROL__JS4__STATUS] = kbdev->reg + 0x1a24; + kbdev->regmap.regs[JOB_CONTROL__JS4__FLUSH_ID] = kbdev->reg + 0x1a30; + kbdev->regmap.regs[JOB_CONTROL__JS4__EVENT_0] = kbdev->reg + 0x1a34; + kbdev->regmap.regs[JOB_CONTROL__JS4__EVENT_1] = kbdev->reg + 0x1a38; + kbdev->regmap.regs[JOB_CONTROL__JS4__EVENT_MASK] = kbdev->reg + 0x1a3c; + kbdev->regmap.regs[JOB_CONTROL__JS4__HEAD_NEXT] = kbdev->reg + 0x1a40; + kbdev->regmap.regs[JOB_CONTROL__JS4__TAIL_NEXT] = kbdev->reg + 0x1a48; + kbdev->regmap.regs[JOB_CONTROL__JS4__AFFINITY_NEXT] = kbdev->reg + 0x1a50; + kbdev->regmap.regs[JOB_CONTROL__JS4__CONFIG_NEXT] = kbdev->reg + 0x1a58; + kbdev->regmap.regs[JOB_CONTROL__JS4__XAFFINITY_NEXT] = kbdev->reg + 0x1a5c; + kbdev->regmap.regs[JOB_CONTROL__JS4__COMMAND_NEXT] = kbdev->reg + 0x1a60; + kbdev->regmap.regs[JOB_CONTROL__JS4__FLUSH_ID_NEXT] = kbdev->reg + 0x1a70; + kbdev->regmap.regs[JOB_CONTROL__JS4__EVENT_MASK_NEXT] = kbdev->reg + 0x1a7c; + kbdev->regmap.regs[JOB_CONTROL__JS5__HEAD] = kbdev->reg + 0x1a80; + kbdev->regmap.regs[JOB_CONTROL__JS5__TAIL] = kbdev->reg + 0x1a88; + kbdev->regmap.regs[JOB_CONTROL__JS5__AFFINITY] = kbdev->reg + 0x1a90; + kbdev->regmap.regs[JOB_CONTROL__JS5__CONFIG] = kbdev->reg + 0x1a98; + kbdev->regmap.regs[JOB_CONTROL__JS5__XAFFINITY] = kbdev->reg + 0x1a9c; + kbdev->regmap.regs[JOB_CONTROL__JS5__COMMAND] = kbdev->reg + 0x1aa0; + kbdev->regmap.regs[JOB_CONTROL__JS5__STATUS] = kbdev->reg + 0x1aa4; + kbdev->regmap.regs[JOB_CONTROL__JS5__FLUSH_ID] = kbdev->reg + 0x1ab0; + kbdev->regmap.regs[JOB_CONTROL__JS5__EVENT_0] = kbdev->reg + 0x1ab4; + kbdev->regmap.regs[JOB_CONTROL__JS5__EVENT_1] = kbdev->reg + 0x1ab8; + kbdev->regmap.regs[JOB_CONTROL__JS5__EVENT_MASK] = kbdev->reg + 0x1abc; + kbdev->regmap.regs[JOB_CONTROL__JS5__HEAD_NEXT] = kbdev->reg + 0x1ac0; + kbdev->regmap.regs[JOB_CONTROL__JS5__TAIL_NEXT] = kbdev->reg + 0x1ac8; + kbdev->regmap.regs[JOB_CONTROL__JS5__AFFINITY_NEXT] = kbdev->reg + 0x1ad0; + kbdev->regmap.regs[JOB_CONTROL__JS5__CONFIG_NEXT] = kbdev->reg + 0x1ad8; + kbdev->regmap.regs[JOB_CONTROL__JS5__XAFFINITY_NEXT] = kbdev->reg + 0x1adc; + kbdev->regmap.regs[JOB_CONTROL__JS5__COMMAND_NEXT] = kbdev->reg + 0x1ae0; + kbdev->regmap.regs[JOB_CONTROL__JS5__FLUSH_ID_NEXT] = kbdev->reg + 0x1af0; + kbdev->regmap.regs[JOB_CONTROL__JS5__EVENT_MASK_NEXT] = kbdev->reg + 0x1afc; + kbdev->regmap.regs[JOB_CONTROL__JS6__HEAD] = kbdev->reg + 0x1b00; + kbdev->regmap.regs[JOB_CONTROL__JS6__TAIL] = kbdev->reg + 0x1b08; + kbdev->regmap.regs[JOB_CONTROL__JS6__AFFINITY] = kbdev->reg + 0x1b10; + kbdev->regmap.regs[JOB_CONTROL__JS6__CONFIG] = kbdev->reg + 0x1b18; + kbdev->regmap.regs[JOB_CONTROL__JS6__XAFFINITY] = kbdev->reg + 0x1b1c; + kbdev->regmap.regs[JOB_CONTROL__JS6__COMMAND] = kbdev->reg + 0x1b20; + kbdev->regmap.regs[JOB_CONTROL__JS6__STATUS] = kbdev->reg + 0x1b24; + kbdev->regmap.regs[JOB_CONTROL__JS6__FLUSH_ID] = kbdev->reg + 0x1b30; + kbdev->regmap.regs[JOB_CONTROL__JS6__EVENT_0] = kbdev->reg + 0x1b34; + kbdev->regmap.regs[JOB_CONTROL__JS6__EVENT_1] = kbdev->reg + 0x1b38; + kbdev->regmap.regs[JOB_CONTROL__JS6__EVENT_MASK] = kbdev->reg + 0x1b3c; + kbdev->regmap.regs[JOB_CONTROL__JS6__HEAD_NEXT] = kbdev->reg + 0x1b40; + kbdev->regmap.regs[JOB_CONTROL__JS6__TAIL_NEXT] = kbdev->reg + 0x1b48; + kbdev->regmap.regs[JOB_CONTROL__JS6__AFFINITY_NEXT] = kbdev->reg + 0x1b50; + kbdev->regmap.regs[JOB_CONTROL__JS6__CONFIG_NEXT] = kbdev->reg + 0x1b58; + kbdev->regmap.regs[JOB_CONTROL__JS6__XAFFINITY_NEXT] = kbdev->reg + 0x1b5c; + kbdev->regmap.regs[JOB_CONTROL__JS6__COMMAND_NEXT] = kbdev->reg + 0x1b60; + kbdev->regmap.regs[JOB_CONTROL__JS6__FLUSH_ID_NEXT] = kbdev->reg + 0x1b70; + kbdev->regmap.regs[JOB_CONTROL__JS6__EVENT_MASK_NEXT] = kbdev->reg + 0x1b7c; + kbdev->regmap.regs[JOB_CONTROL__JS7__HEAD] = kbdev->reg + 0x1b80; + kbdev->regmap.regs[JOB_CONTROL__JS7__TAIL] = kbdev->reg + 0x1b88; + kbdev->regmap.regs[JOB_CONTROL__JS7__AFFINITY] = kbdev->reg + 0x1b90; + kbdev->regmap.regs[JOB_CONTROL__JS7__CONFIG] = kbdev->reg + 0x1b98; + kbdev->regmap.regs[JOB_CONTROL__JS7__XAFFINITY] = kbdev->reg + 0x1b9c; + kbdev->regmap.regs[JOB_CONTROL__JS7__COMMAND] = kbdev->reg + 0x1ba0; + kbdev->regmap.regs[JOB_CONTROL__JS7__STATUS] = kbdev->reg + 0x1ba4; + kbdev->regmap.regs[JOB_CONTROL__JS7__FLUSH_ID] = kbdev->reg + 0x1bb0; + kbdev->regmap.regs[JOB_CONTROL__JS7__EVENT_0] = kbdev->reg + 0x1bb4; + kbdev->regmap.regs[JOB_CONTROL__JS7__EVENT_1] = kbdev->reg + 0x1bb8; + kbdev->regmap.regs[JOB_CONTROL__JS7__EVENT_MASK] = kbdev->reg + 0x1bbc; + kbdev->regmap.regs[JOB_CONTROL__JS7__HEAD_NEXT] = kbdev->reg + 0x1bc0; + kbdev->regmap.regs[JOB_CONTROL__JS7__TAIL_NEXT] = kbdev->reg + 0x1bc8; + kbdev->regmap.regs[JOB_CONTROL__JS7__AFFINITY_NEXT] = kbdev->reg + 0x1bd0; + kbdev->regmap.regs[JOB_CONTROL__JS7__CONFIG_NEXT] = kbdev->reg + 0x1bd8; + kbdev->regmap.regs[JOB_CONTROL__JS7__XAFFINITY_NEXT] = kbdev->reg + 0x1bdc; + kbdev->regmap.regs[JOB_CONTROL__JS7__COMMAND_NEXT] = kbdev->reg + 0x1be0; + kbdev->regmap.regs[JOB_CONTROL__JS7__FLUSH_ID_NEXT] = kbdev->reg + 0x1bf0; + kbdev->regmap.regs[JOB_CONTROL__JS7__EVENT_MASK_NEXT] = kbdev->reg + 0x1bfc; + kbdev->regmap.regs[JOB_CONTROL__JS8__HEAD] = kbdev->reg + 0x1c00; + kbdev->regmap.regs[JOB_CONTROL__JS8__TAIL] = kbdev->reg + 0x1c08; + kbdev->regmap.regs[JOB_CONTROL__JS8__AFFINITY] = kbdev->reg + 0x1c10; + kbdev->regmap.regs[JOB_CONTROL__JS8__CONFIG] = kbdev->reg + 0x1c18; + kbdev->regmap.regs[JOB_CONTROL__JS8__XAFFINITY] = kbdev->reg + 0x1c1c; + kbdev->regmap.regs[JOB_CONTROL__JS8__COMMAND] = kbdev->reg + 0x1c20; + kbdev->regmap.regs[JOB_CONTROL__JS8__STATUS] = kbdev->reg + 0x1c24; + kbdev->regmap.regs[JOB_CONTROL__JS8__FLUSH_ID] = kbdev->reg + 0x1c30; + kbdev->regmap.regs[JOB_CONTROL__JS8__EVENT_0] = kbdev->reg + 0x1c34; + kbdev->regmap.regs[JOB_CONTROL__JS8__EVENT_1] = kbdev->reg + 0x1c38; + kbdev->regmap.regs[JOB_CONTROL__JS8__EVENT_MASK] = kbdev->reg + 0x1c3c; + kbdev->regmap.regs[JOB_CONTROL__JS8__HEAD_NEXT] = kbdev->reg + 0x1c40; + kbdev->regmap.regs[JOB_CONTROL__JS8__TAIL_NEXT] = kbdev->reg + 0x1c48; + kbdev->regmap.regs[JOB_CONTROL__JS8__AFFINITY_NEXT] = kbdev->reg + 0x1c50; + kbdev->regmap.regs[JOB_CONTROL__JS8__CONFIG_NEXT] = kbdev->reg + 0x1c58; + kbdev->regmap.regs[JOB_CONTROL__JS8__XAFFINITY_NEXT] = kbdev->reg + 0x1c5c; + kbdev->regmap.regs[JOB_CONTROL__JS8__COMMAND_NEXT] = kbdev->reg + 0x1c60; + kbdev->regmap.regs[JOB_CONTROL__JS8__FLUSH_ID_NEXT] = kbdev->reg + 0x1c70; + kbdev->regmap.regs[JOB_CONTROL__JS8__EVENT_MASK_NEXT] = kbdev->reg + 0x1c7c; + kbdev->regmap.regs[JOB_CONTROL__JS9__HEAD] = kbdev->reg + 0x1c80; + kbdev->regmap.regs[JOB_CONTROL__JS9__TAIL] = kbdev->reg + 0x1c88; + kbdev->regmap.regs[JOB_CONTROL__JS9__AFFINITY] = kbdev->reg + 0x1c90; + kbdev->regmap.regs[JOB_CONTROL__JS9__CONFIG] = kbdev->reg + 0x1c98; + kbdev->regmap.regs[JOB_CONTROL__JS9__XAFFINITY] = kbdev->reg + 0x1c9c; + kbdev->regmap.regs[JOB_CONTROL__JS9__COMMAND] = kbdev->reg + 0x1ca0; + kbdev->regmap.regs[JOB_CONTROL__JS9__STATUS] = kbdev->reg + 0x1ca4; + kbdev->regmap.regs[JOB_CONTROL__JS9__FLUSH_ID] = kbdev->reg + 0x1cb0; + kbdev->regmap.regs[JOB_CONTROL__JS9__EVENT_0] = kbdev->reg + 0x1cb4; + kbdev->regmap.regs[JOB_CONTROL__JS9__EVENT_1] = kbdev->reg + 0x1cb8; + kbdev->regmap.regs[JOB_CONTROL__JS9__EVENT_MASK] = kbdev->reg + 0x1cbc; + kbdev->regmap.regs[JOB_CONTROL__JS9__HEAD_NEXT] = kbdev->reg + 0x1cc0; + kbdev->regmap.regs[JOB_CONTROL__JS9__TAIL_NEXT] = kbdev->reg + 0x1cc8; + kbdev->regmap.regs[JOB_CONTROL__JS9__AFFINITY_NEXT] = kbdev->reg + 0x1cd0; + kbdev->regmap.regs[JOB_CONTROL__JS9__CONFIG_NEXT] = kbdev->reg + 0x1cd8; + kbdev->regmap.regs[JOB_CONTROL__JS9__XAFFINITY_NEXT] = kbdev->reg + 0x1cdc; + kbdev->regmap.regs[JOB_CONTROL__JS9__COMMAND_NEXT] = kbdev->reg + 0x1ce0; + kbdev->regmap.regs[JOB_CONTROL__JS9__FLUSH_ID_NEXT] = kbdev->reg + 0x1cf0; + kbdev->regmap.regs[JOB_CONTROL__JS9__EVENT_MASK_NEXT] = kbdev->reg + 0x1cfc; + kbdev->regmap.regs[JOB_CONTROL__JS10__HEAD] = kbdev->reg + 0x1d00; + kbdev->regmap.regs[JOB_CONTROL__JS10__TAIL] = kbdev->reg + 0x1d08; + kbdev->regmap.regs[JOB_CONTROL__JS10__AFFINITY] = kbdev->reg + 0x1d10; + kbdev->regmap.regs[JOB_CONTROL__JS10__CONFIG] = kbdev->reg + 0x1d18; + kbdev->regmap.regs[JOB_CONTROL__JS10__XAFFINITY] = kbdev->reg + 0x1d1c; + kbdev->regmap.regs[JOB_CONTROL__JS10__COMMAND] = kbdev->reg + 0x1d20; + kbdev->regmap.regs[JOB_CONTROL__JS10__STATUS] = kbdev->reg + 0x1d24; + kbdev->regmap.regs[JOB_CONTROL__JS10__FLUSH_ID] = kbdev->reg + 0x1d30; + kbdev->regmap.regs[JOB_CONTROL__JS10__EVENT_0] = kbdev->reg + 0x1d34; + kbdev->regmap.regs[JOB_CONTROL__JS10__EVENT_1] = kbdev->reg + 0x1d38; + kbdev->regmap.regs[JOB_CONTROL__JS10__EVENT_MASK] = kbdev->reg + 0x1d3c; + kbdev->regmap.regs[JOB_CONTROL__JS10__HEAD_NEXT] = kbdev->reg + 0x1d40; + kbdev->regmap.regs[JOB_CONTROL__JS10__TAIL_NEXT] = kbdev->reg + 0x1d48; + kbdev->regmap.regs[JOB_CONTROL__JS10__AFFINITY_NEXT] = kbdev->reg + 0x1d50; + kbdev->regmap.regs[JOB_CONTROL__JS10__CONFIG_NEXT] = kbdev->reg + 0x1d58; + kbdev->regmap.regs[JOB_CONTROL__JS10__XAFFINITY_NEXT] = kbdev->reg + 0x1d5c; + kbdev->regmap.regs[JOB_CONTROL__JS10__COMMAND_NEXT] = kbdev->reg + 0x1d60; + kbdev->regmap.regs[JOB_CONTROL__JS10__FLUSH_ID_NEXT] = kbdev->reg + 0x1d70; + kbdev->regmap.regs[JOB_CONTROL__JS10__EVENT_MASK_NEXT] = kbdev->reg + 0x1d7c; + kbdev->regmap.regs[JOB_CONTROL__JS11__HEAD] = kbdev->reg + 0x1d80; + kbdev->regmap.regs[JOB_CONTROL__JS11__TAIL] = kbdev->reg + 0x1d88; + kbdev->regmap.regs[JOB_CONTROL__JS11__AFFINITY] = kbdev->reg + 0x1d90; + kbdev->regmap.regs[JOB_CONTROL__JS11__CONFIG] = kbdev->reg + 0x1d98; + kbdev->regmap.regs[JOB_CONTROL__JS11__XAFFINITY] = kbdev->reg + 0x1d9c; + kbdev->regmap.regs[JOB_CONTROL__JS11__COMMAND] = kbdev->reg + 0x1da0; + kbdev->regmap.regs[JOB_CONTROL__JS11__STATUS] = kbdev->reg + 0x1da4; + kbdev->regmap.regs[JOB_CONTROL__JS11__FLUSH_ID] = kbdev->reg + 0x1db0; + kbdev->regmap.regs[JOB_CONTROL__JS11__EVENT_0] = kbdev->reg + 0x1db4; + kbdev->regmap.regs[JOB_CONTROL__JS11__EVENT_1] = kbdev->reg + 0x1db8; + kbdev->regmap.regs[JOB_CONTROL__JS11__EVENT_MASK] = kbdev->reg + 0x1dbc; + kbdev->regmap.regs[JOB_CONTROL__JS11__HEAD_NEXT] = kbdev->reg + 0x1dc0; + kbdev->regmap.regs[JOB_CONTROL__JS11__TAIL_NEXT] = kbdev->reg + 0x1dc8; + kbdev->regmap.regs[JOB_CONTROL__JS11__AFFINITY_NEXT] = kbdev->reg + 0x1dd0; + kbdev->regmap.regs[JOB_CONTROL__JS11__CONFIG_NEXT] = kbdev->reg + 0x1dd8; + kbdev->regmap.regs[JOB_CONTROL__JS11__XAFFINITY_NEXT] = kbdev->reg + 0x1ddc; + kbdev->regmap.regs[JOB_CONTROL__JS11__COMMAND_NEXT] = kbdev->reg + 0x1de0; + kbdev->regmap.regs[JOB_CONTROL__JS11__FLUSH_ID_NEXT] = kbdev->reg + 0x1df0; + kbdev->regmap.regs[JOB_CONTROL__JS11__EVENT_MASK_NEXT] = kbdev->reg + 0x1dfc; + kbdev->regmap.regs[JOB_CONTROL__JS12__HEAD] = kbdev->reg + 0x1e00; + kbdev->regmap.regs[JOB_CONTROL__JS12__TAIL] = kbdev->reg + 0x1e08; + kbdev->regmap.regs[JOB_CONTROL__JS12__AFFINITY] = kbdev->reg + 0x1e10; + kbdev->regmap.regs[JOB_CONTROL__JS12__CONFIG] = kbdev->reg + 0x1e18; + kbdev->regmap.regs[JOB_CONTROL__JS12__XAFFINITY] = kbdev->reg + 0x1e1c; + kbdev->regmap.regs[JOB_CONTROL__JS12__COMMAND] = kbdev->reg + 0x1e20; + kbdev->regmap.regs[JOB_CONTROL__JS12__STATUS] = kbdev->reg + 0x1e24; + kbdev->regmap.regs[JOB_CONTROL__JS12__FLUSH_ID] = kbdev->reg + 0x1e30; + kbdev->regmap.regs[JOB_CONTROL__JS12__EVENT_0] = kbdev->reg + 0x1e34; + kbdev->regmap.regs[JOB_CONTROL__JS12__EVENT_1] = kbdev->reg + 0x1e38; + kbdev->regmap.regs[JOB_CONTROL__JS12__EVENT_MASK] = kbdev->reg + 0x1e3c; + kbdev->regmap.regs[JOB_CONTROL__JS12__HEAD_NEXT] = kbdev->reg + 0x1e40; + kbdev->regmap.regs[JOB_CONTROL__JS12__TAIL_NEXT] = kbdev->reg + 0x1e48; + kbdev->regmap.regs[JOB_CONTROL__JS12__AFFINITY_NEXT] = kbdev->reg + 0x1e50; + kbdev->regmap.regs[JOB_CONTROL__JS12__CONFIG_NEXT] = kbdev->reg + 0x1e58; + kbdev->regmap.regs[JOB_CONTROL__JS12__XAFFINITY_NEXT] = kbdev->reg + 0x1e5c; + kbdev->regmap.regs[JOB_CONTROL__JS12__COMMAND_NEXT] = kbdev->reg + 0x1e60; + kbdev->regmap.regs[JOB_CONTROL__JS12__FLUSH_ID_NEXT] = kbdev->reg + 0x1e70; + kbdev->regmap.regs[JOB_CONTROL__JS12__EVENT_MASK_NEXT] = kbdev->reg + 0x1e7c; + kbdev->regmap.regs[JOB_CONTROL__JS13__HEAD] = kbdev->reg + 0x1e80; + kbdev->regmap.regs[JOB_CONTROL__JS13__TAIL] = kbdev->reg + 0x1e88; + kbdev->regmap.regs[JOB_CONTROL__JS13__AFFINITY] = kbdev->reg + 0x1e90; + kbdev->regmap.regs[JOB_CONTROL__JS13__CONFIG] = kbdev->reg + 0x1e98; + kbdev->regmap.regs[JOB_CONTROL__JS13__XAFFINITY] = kbdev->reg + 0x1e9c; + kbdev->regmap.regs[JOB_CONTROL__JS13__COMMAND] = kbdev->reg + 0x1ea0; + kbdev->regmap.regs[JOB_CONTROL__JS13__STATUS] = kbdev->reg + 0x1ea4; + kbdev->regmap.regs[JOB_CONTROL__JS13__FLUSH_ID] = kbdev->reg + 0x1eb0; + kbdev->regmap.regs[JOB_CONTROL__JS13__EVENT_0] = kbdev->reg + 0x1eb4; + kbdev->regmap.regs[JOB_CONTROL__JS13__EVENT_1] = kbdev->reg + 0x1eb8; + kbdev->regmap.regs[JOB_CONTROL__JS13__EVENT_MASK] = kbdev->reg + 0x1ebc; + kbdev->regmap.regs[JOB_CONTROL__JS13__HEAD_NEXT] = kbdev->reg + 0x1ec0; + kbdev->regmap.regs[JOB_CONTROL__JS13__TAIL_NEXT] = kbdev->reg + 0x1ec8; + kbdev->regmap.regs[JOB_CONTROL__JS13__AFFINITY_NEXT] = kbdev->reg + 0x1ed0; + kbdev->regmap.regs[JOB_CONTROL__JS13__CONFIG_NEXT] = kbdev->reg + 0x1ed8; + kbdev->regmap.regs[JOB_CONTROL__JS13__XAFFINITY_NEXT] = kbdev->reg + 0x1edc; + kbdev->regmap.regs[JOB_CONTROL__JS13__COMMAND_NEXT] = kbdev->reg + 0x1ee0; + kbdev->regmap.regs[JOB_CONTROL__JS13__FLUSH_ID_NEXT] = kbdev->reg + 0x1ef0; + kbdev->regmap.regs[JOB_CONTROL__JS13__EVENT_MASK_NEXT] = kbdev->reg + 0x1efc; + kbdev->regmap.regs[JOB_CONTROL__JS14__HEAD] = kbdev->reg + 0x1f00; + kbdev->regmap.regs[JOB_CONTROL__JS14__TAIL] = kbdev->reg + 0x1f08; + kbdev->regmap.regs[JOB_CONTROL__JS14__AFFINITY] = kbdev->reg + 0x1f10; + kbdev->regmap.regs[JOB_CONTROL__JS14__CONFIG] = kbdev->reg + 0x1f18; + kbdev->regmap.regs[JOB_CONTROL__JS14__XAFFINITY] = kbdev->reg + 0x1f1c; + kbdev->regmap.regs[JOB_CONTROL__JS14__COMMAND] = kbdev->reg + 0x1f20; + kbdev->regmap.regs[JOB_CONTROL__JS14__STATUS] = kbdev->reg + 0x1f24; + kbdev->regmap.regs[JOB_CONTROL__JS14__FLUSH_ID] = kbdev->reg + 0x1f30; + kbdev->regmap.regs[JOB_CONTROL__JS14__EVENT_0] = kbdev->reg + 0x1f34; + kbdev->regmap.regs[JOB_CONTROL__JS14__EVENT_1] = kbdev->reg + 0x1f38; + kbdev->regmap.regs[JOB_CONTROL__JS14__EVENT_MASK] = kbdev->reg + 0x1f3c; + kbdev->regmap.regs[JOB_CONTROL__JS14__HEAD_NEXT] = kbdev->reg + 0x1f40; + kbdev->regmap.regs[JOB_CONTROL__JS14__TAIL_NEXT] = kbdev->reg + 0x1f48; + kbdev->regmap.regs[JOB_CONTROL__JS14__AFFINITY_NEXT] = kbdev->reg + 0x1f50; + kbdev->regmap.regs[JOB_CONTROL__JS14__CONFIG_NEXT] = kbdev->reg + 0x1f58; + kbdev->regmap.regs[JOB_CONTROL__JS14__XAFFINITY_NEXT] = kbdev->reg + 0x1f5c; + kbdev->regmap.regs[JOB_CONTROL__JS14__COMMAND_NEXT] = kbdev->reg + 0x1f60; + kbdev->regmap.regs[JOB_CONTROL__JS14__FLUSH_ID_NEXT] = kbdev->reg + 0x1f70; + kbdev->regmap.regs[JOB_CONTROL__JS14__EVENT_MASK_NEXT] = kbdev->reg + 0x1f7c; + kbdev->regmap.regs[JOB_CONTROL__JS15__HEAD] = kbdev->reg + 0x1f80; + kbdev->regmap.regs[JOB_CONTROL__JS15__TAIL] = kbdev->reg + 0x1f88; + kbdev->regmap.regs[JOB_CONTROL__JS15__AFFINITY] = kbdev->reg + 0x1f90; + kbdev->regmap.regs[JOB_CONTROL__JS15__CONFIG] = kbdev->reg + 0x1f98; + kbdev->regmap.regs[JOB_CONTROL__JS15__XAFFINITY] = kbdev->reg + 0x1f9c; + kbdev->regmap.regs[JOB_CONTROL__JS15__COMMAND] = kbdev->reg + 0x1fa0; + kbdev->regmap.regs[JOB_CONTROL__JS15__STATUS] = kbdev->reg + 0x1fa4; + kbdev->regmap.regs[JOB_CONTROL__JS15__FLUSH_ID] = kbdev->reg + 0x1fb0; + kbdev->regmap.regs[JOB_CONTROL__JS15__EVENT_0] = kbdev->reg + 0x1fb4; + kbdev->regmap.regs[JOB_CONTROL__JS15__EVENT_1] = kbdev->reg + 0x1fb8; + kbdev->regmap.regs[JOB_CONTROL__JS15__EVENT_MASK] = kbdev->reg + 0x1fbc; + kbdev->regmap.regs[JOB_CONTROL__JS15__HEAD_NEXT] = kbdev->reg + 0x1fc0; + kbdev->regmap.regs[JOB_CONTROL__JS15__TAIL_NEXT] = kbdev->reg + 0x1fc8; + kbdev->regmap.regs[JOB_CONTROL__JS15__AFFINITY_NEXT] = kbdev->reg + 0x1fd0; + kbdev->regmap.regs[JOB_CONTROL__JS15__CONFIG_NEXT] = kbdev->reg + 0x1fd8; + kbdev->regmap.regs[JOB_CONTROL__JS15__XAFFINITY_NEXT] = kbdev->reg + 0x1fdc; + kbdev->regmap.regs[JOB_CONTROL__JS15__COMMAND_NEXT] = kbdev->reg + 0x1fe0; + kbdev->regmap.regs[JOB_CONTROL__JS15__FLUSH_ID_NEXT] = kbdev->reg + 0x1ff0; + kbdev->regmap.regs[JOB_CONTROL__JS15__EVENT_MASK_NEXT] = kbdev->reg + 0x1ffc; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__IRQ_RAWSTAT] = kbdev->reg + 0x2000; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__IRQ_CLEAR] = kbdev->reg + 0x2004; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__IRQ_MASK] = kbdev->reg + 0x2008; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__IRQ_STATUS] = kbdev->reg + 0x200c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS0__TRANSTAB] = kbdev->reg + 0x2400; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS0__MEMATTR] = kbdev->reg + 0x2408; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS0__LOCKADDR] = kbdev->reg + 0x2410; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS0__COMMAND] = kbdev->reg + 0x2418; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS0__FAULTSTATUS] = kbdev->reg + 0x241c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS0__FAULTADDRESS] = kbdev->reg + 0x2420; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS0__STATUS] = kbdev->reg + 0x2428; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS0__TRANSCFG] = kbdev->reg + 0x2430; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS0__FAULTEXTRA] = kbdev->reg + 0x2438; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS1__TRANSTAB] = kbdev->reg + 0x2440; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS1__MEMATTR] = kbdev->reg + 0x2448; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS1__LOCKADDR] = kbdev->reg + 0x2450; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS1__COMMAND] = kbdev->reg + 0x2458; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS1__FAULTSTATUS] = kbdev->reg + 0x245c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS1__FAULTADDRESS] = kbdev->reg + 0x2460; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS1__STATUS] = kbdev->reg + 0x2468; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS1__TRANSCFG] = kbdev->reg + 0x2470; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS1__FAULTEXTRA] = kbdev->reg + 0x2478; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS2__TRANSTAB] = kbdev->reg + 0x2480; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS2__MEMATTR] = kbdev->reg + 0x2488; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS2__LOCKADDR] = kbdev->reg + 0x2490; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS2__COMMAND] = kbdev->reg + 0x2498; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS2__FAULTSTATUS] = kbdev->reg + 0x249c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS2__FAULTADDRESS] = kbdev->reg + 0x24a0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS2__STATUS] = kbdev->reg + 0x24a8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS2__TRANSCFG] = kbdev->reg + 0x24b0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS2__FAULTEXTRA] = kbdev->reg + 0x24b8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS3__TRANSTAB] = kbdev->reg + 0x24c0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS3__MEMATTR] = kbdev->reg + 0x24c8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS3__LOCKADDR] = kbdev->reg + 0x24d0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS3__COMMAND] = kbdev->reg + 0x24d8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS3__FAULTSTATUS] = kbdev->reg + 0x24dc; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS3__FAULTADDRESS] = kbdev->reg + 0x24e0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS3__STATUS] = kbdev->reg + 0x24e8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS3__TRANSCFG] = kbdev->reg + 0x24f0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS3__FAULTEXTRA] = kbdev->reg + 0x24f8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS4__TRANSTAB] = kbdev->reg + 0x2500; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS4__MEMATTR] = kbdev->reg + 0x2508; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS4__LOCKADDR] = kbdev->reg + 0x2510; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS4__COMMAND] = kbdev->reg + 0x2518; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS4__FAULTSTATUS] = kbdev->reg + 0x251c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS4__FAULTADDRESS] = kbdev->reg + 0x2520; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS4__STATUS] = kbdev->reg + 0x2528; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS4__TRANSCFG] = kbdev->reg + 0x2530; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS4__FAULTEXTRA] = kbdev->reg + 0x2538; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS5__TRANSTAB] = kbdev->reg + 0x2540; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS5__MEMATTR] = kbdev->reg + 0x2548; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS5__LOCKADDR] = kbdev->reg + 0x2550; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS5__COMMAND] = kbdev->reg + 0x2558; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS5__FAULTSTATUS] = kbdev->reg + 0x255c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS5__FAULTADDRESS] = kbdev->reg + 0x2560; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS5__STATUS] = kbdev->reg + 0x2568; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS5__TRANSCFG] = kbdev->reg + 0x2570; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS5__FAULTEXTRA] = kbdev->reg + 0x2578; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS6__TRANSTAB] = kbdev->reg + 0x2580; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS6__MEMATTR] = kbdev->reg + 0x2588; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS6__LOCKADDR] = kbdev->reg + 0x2590; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS6__COMMAND] = kbdev->reg + 0x2598; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS6__FAULTSTATUS] = kbdev->reg + 0x259c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS6__FAULTADDRESS] = kbdev->reg + 0x25a0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS6__STATUS] = kbdev->reg + 0x25a8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS6__TRANSCFG] = kbdev->reg + 0x25b0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS6__FAULTEXTRA] = kbdev->reg + 0x25b8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS7__TRANSTAB] = kbdev->reg + 0x25c0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS7__MEMATTR] = kbdev->reg + 0x25c8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS7__LOCKADDR] = kbdev->reg + 0x25d0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS7__COMMAND] = kbdev->reg + 0x25d8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS7__FAULTSTATUS] = kbdev->reg + 0x25dc; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS7__FAULTADDRESS] = kbdev->reg + 0x25e0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS7__STATUS] = kbdev->reg + 0x25e8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS7__TRANSCFG] = kbdev->reg + 0x25f0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS7__FAULTEXTRA] = kbdev->reg + 0x25f8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS8__TRANSTAB] = kbdev->reg + 0x2600; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS8__MEMATTR] = kbdev->reg + 0x2608; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS8__LOCKADDR] = kbdev->reg + 0x2610; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS8__COMMAND] = kbdev->reg + 0x2618; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS8__FAULTSTATUS] = kbdev->reg + 0x261c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS8__FAULTADDRESS] = kbdev->reg + 0x2620; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS8__STATUS] = kbdev->reg + 0x2628; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS8__TRANSCFG] = kbdev->reg + 0x2630; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS8__FAULTEXTRA] = kbdev->reg + 0x2638; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS9__TRANSTAB] = kbdev->reg + 0x2640; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS9__MEMATTR] = kbdev->reg + 0x2648; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS9__LOCKADDR] = kbdev->reg + 0x2650; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS9__COMMAND] = kbdev->reg + 0x2658; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS9__FAULTSTATUS] = kbdev->reg + 0x265c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS9__FAULTADDRESS] = kbdev->reg + 0x2660; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS9__STATUS] = kbdev->reg + 0x2668; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS9__TRANSCFG] = kbdev->reg + 0x2670; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS9__FAULTEXTRA] = kbdev->reg + 0x2678; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS10__TRANSTAB] = kbdev->reg + 0x2680; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS10__MEMATTR] = kbdev->reg + 0x2688; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS10__LOCKADDR] = kbdev->reg + 0x2690; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS10__COMMAND] = kbdev->reg + 0x2698; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS10__FAULTSTATUS] = kbdev->reg + 0x269c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS10__FAULTADDRESS] = kbdev->reg + 0x26a0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS10__STATUS] = kbdev->reg + 0x26a8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS10__TRANSCFG] = kbdev->reg + 0x26b0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS10__FAULTEXTRA] = kbdev->reg + 0x26b8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS11__TRANSTAB] = kbdev->reg + 0x26c0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS11__MEMATTR] = kbdev->reg + 0x26c8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS11__LOCKADDR] = kbdev->reg + 0x26d0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS11__COMMAND] = kbdev->reg + 0x26d8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS11__FAULTSTATUS] = kbdev->reg + 0x26dc; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS11__FAULTADDRESS] = kbdev->reg + 0x26e0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS11__STATUS] = kbdev->reg + 0x26e8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS11__TRANSCFG] = kbdev->reg + 0x26f0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS11__FAULTEXTRA] = kbdev->reg + 0x26f8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS12__TRANSTAB] = kbdev->reg + 0x2700; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS12__MEMATTR] = kbdev->reg + 0x2708; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS12__LOCKADDR] = kbdev->reg + 0x2710; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS12__COMMAND] = kbdev->reg + 0x2718; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS12__FAULTSTATUS] = kbdev->reg + 0x271c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS12__FAULTADDRESS] = kbdev->reg + 0x2720; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS12__STATUS] = kbdev->reg + 0x2728; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS12__TRANSCFG] = kbdev->reg + 0x2730; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS12__FAULTEXTRA] = kbdev->reg + 0x2738; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS13__TRANSTAB] = kbdev->reg + 0x2740; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS13__MEMATTR] = kbdev->reg + 0x2748; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS13__LOCKADDR] = kbdev->reg + 0x2750; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS13__COMMAND] = kbdev->reg + 0x2758; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS13__FAULTSTATUS] = kbdev->reg + 0x275c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS13__FAULTADDRESS] = kbdev->reg + 0x2760; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS13__STATUS] = kbdev->reg + 0x2768; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS13__TRANSCFG] = kbdev->reg + 0x2770; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS13__FAULTEXTRA] = kbdev->reg + 0x2778; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS14__TRANSTAB] = kbdev->reg + 0x2780; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS14__MEMATTR] = kbdev->reg + 0x2788; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS14__LOCKADDR] = kbdev->reg + 0x2790; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS14__COMMAND] = kbdev->reg + 0x2798; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS14__FAULTSTATUS] = kbdev->reg + 0x279c; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS14__FAULTADDRESS] = kbdev->reg + 0x27a0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS14__STATUS] = kbdev->reg + 0x27a8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS14__TRANSCFG] = kbdev->reg + 0x27b0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS14__FAULTEXTRA] = kbdev->reg + 0x27b8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS15__TRANSTAB] = kbdev->reg + 0x27c0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS15__MEMATTR] = kbdev->reg + 0x27c8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS15__LOCKADDR] = kbdev->reg + 0x27d0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS15__COMMAND] = kbdev->reg + 0x27d8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS15__FAULTSTATUS] = kbdev->reg + 0x27dc; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS15__FAULTADDRESS] = kbdev->reg + 0x27e0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS15__STATUS] = kbdev->reg + 0x27e8; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS15__TRANSCFG] = kbdev->reg + 0x27f0; + kbdev->regmap.regs[MMU_STAGE1__ST1MMU__AS15__FAULTEXTRA] = kbdev->reg + 0x27f8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__IRQ_RAWSTAT] = kbdev->reg + 0x10000; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__IRQ_CLEAR] = kbdev->reg + 0x10004; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__IRQ_MASK] = kbdev->reg + 0x10008; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__IRQ_STATUS] = kbdev->reg + 0x1000c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS0__TRANSTAB] = kbdev->reg + 0x10400; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS0__MEMATTR] = kbdev->reg + 0x10408; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS0__LOCKADDR] = kbdev->reg + 0x10410; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS0__COMMAND] = kbdev->reg + 0x10418; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS0__FAULTSTATUS] = kbdev->reg + 0x1041c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS0__FAULTADDRESS] = kbdev->reg + 0x10420; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS0__STATUS] = kbdev->reg + 0x10428; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS0__TRANSCFG] = kbdev->reg + 0x10430; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS0__FAULTEXTRA] = kbdev->reg + 0x10438; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS1__TRANSTAB] = kbdev->reg + 0x10440; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS1__MEMATTR] = kbdev->reg + 0x10448; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS1__LOCKADDR] = kbdev->reg + 0x10450; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS1__COMMAND] = kbdev->reg + 0x10458; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS1__FAULTSTATUS] = kbdev->reg + 0x1045c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS1__FAULTADDRESS] = kbdev->reg + 0x10460; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS1__STATUS] = kbdev->reg + 0x10468; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS1__TRANSCFG] = kbdev->reg + 0x10470; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS1__FAULTEXTRA] = kbdev->reg + 0x10478; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS2__TRANSTAB] = kbdev->reg + 0x10480; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS2__MEMATTR] = kbdev->reg + 0x10488; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS2__LOCKADDR] = kbdev->reg + 0x10490; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS2__COMMAND] = kbdev->reg + 0x10498; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS2__FAULTSTATUS] = kbdev->reg + 0x1049c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS2__FAULTADDRESS] = kbdev->reg + 0x104a0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS2__STATUS] = kbdev->reg + 0x104a8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS2__TRANSCFG] = kbdev->reg + 0x104b0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS2__FAULTEXTRA] = kbdev->reg + 0x104b8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS3__TRANSTAB] = kbdev->reg + 0x104c0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS3__MEMATTR] = kbdev->reg + 0x104c8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS3__LOCKADDR] = kbdev->reg + 0x104d0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS3__COMMAND] = kbdev->reg + 0x104d8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS3__FAULTSTATUS] = kbdev->reg + 0x104dc; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS3__FAULTADDRESS] = kbdev->reg + 0x104e0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS3__STATUS] = kbdev->reg + 0x104e8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS3__TRANSCFG] = kbdev->reg + 0x104f0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS3__FAULTEXTRA] = kbdev->reg + 0x104f8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS4__TRANSTAB] = kbdev->reg + 0x10500; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS4__MEMATTR] = kbdev->reg + 0x10508; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS4__LOCKADDR] = kbdev->reg + 0x10510; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS4__COMMAND] = kbdev->reg + 0x10518; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS4__FAULTSTATUS] = kbdev->reg + 0x1051c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS4__FAULTADDRESS] = kbdev->reg + 0x10520; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS4__STATUS] = kbdev->reg + 0x10528; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS4__TRANSCFG] = kbdev->reg + 0x10530; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS4__FAULTEXTRA] = kbdev->reg + 0x10538; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS5__TRANSTAB] = kbdev->reg + 0x10540; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS5__MEMATTR] = kbdev->reg + 0x10548; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS5__LOCKADDR] = kbdev->reg + 0x10550; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS5__COMMAND] = kbdev->reg + 0x10558; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS5__FAULTSTATUS] = kbdev->reg + 0x1055c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS5__FAULTADDRESS] = kbdev->reg + 0x10560; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS5__STATUS] = kbdev->reg + 0x10568; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS5__TRANSCFG] = kbdev->reg + 0x10570; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS5__FAULTEXTRA] = kbdev->reg + 0x10578; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS6__TRANSTAB] = kbdev->reg + 0x10580; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS6__MEMATTR] = kbdev->reg + 0x10588; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS6__LOCKADDR] = kbdev->reg + 0x10590; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS6__COMMAND] = kbdev->reg + 0x10598; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS6__FAULTSTATUS] = kbdev->reg + 0x1059c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS6__FAULTADDRESS] = kbdev->reg + 0x105a0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS6__STATUS] = kbdev->reg + 0x105a8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS6__TRANSCFG] = kbdev->reg + 0x105b0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS6__FAULTEXTRA] = kbdev->reg + 0x105b8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS7__TRANSTAB] = kbdev->reg + 0x105c0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS7__MEMATTR] = kbdev->reg + 0x105c8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS7__LOCKADDR] = kbdev->reg + 0x105d0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS7__COMMAND] = kbdev->reg + 0x105d8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS7__FAULTSTATUS] = kbdev->reg + 0x105dc; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS7__FAULTADDRESS] = kbdev->reg + 0x105e0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS7__STATUS] = kbdev->reg + 0x105e8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS7__TRANSCFG] = kbdev->reg + 0x105f0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS7__FAULTEXTRA] = kbdev->reg + 0x105f8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS8__TRANSTAB] = kbdev->reg + 0x10600; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS8__MEMATTR] = kbdev->reg + 0x10608; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS8__LOCKADDR] = kbdev->reg + 0x10610; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS8__COMMAND] = kbdev->reg + 0x10618; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS8__FAULTSTATUS] = kbdev->reg + 0x1061c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS8__FAULTADDRESS] = kbdev->reg + 0x10620; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS8__STATUS] = kbdev->reg + 0x10628; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS8__TRANSCFG] = kbdev->reg + 0x10630; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS8__FAULTEXTRA] = kbdev->reg + 0x10638; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS9__TRANSTAB] = kbdev->reg + 0x10640; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS9__MEMATTR] = kbdev->reg + 0x10648; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS9__LOCKADDR] = kbdev->reg + 0x10650; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS9__COMMAND] = kbdev->reg + 0x10658; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS9__FAULTSTATUS] = kbdev->reg + 0x1065c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS9__FAULTADDRESS] = kbdev->reg + 0x10660; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS9__STATUS] = kbdev->reg + 0x10668; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS9__TRANSCFG] = kbdev->reg + 0x10670; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS9__FAULTEXTRA] = kbdev->reg + 0x10678; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS10__TRANSTAB] = kbdev->reg + 0x10680; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS10__MEMATTR] = kbdev->reg + 0x10688; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS10__LOCKADDR] = kbdev->reg + 0x10690; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS10__COMMAND] = kbdev->reg + 0x10698; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS10__FAULTSTATUS] = kbdev->reg + 0x1069c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS10__FAULTADDRESS] = kbdev->reg + 0x106a0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS10__STATUS] = kbdev->reg + 0x106a8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS10__TRANSCFG] = kbdev->reg + 0x106b0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS10__FAULTEXTRA] = kbdev->reg + 0x106b8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS11__TRANSTAB] = kbdev->reg + 0x106c0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS11__MEMATTR] = kbdev->reg + 0x106c8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS11__LOCKADDR] = kbdev->reg + 0x106d0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS11__COMMAND] = kbdev->reg + 0x106d8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS11__FAULTSTATUS] = kbdev->reg + 0x106dc; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS11__FAULTADDRESS] = kbdev->reg + 0x106e0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS11__STATUS] = kbdev->reg + 0x106e8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS11__TRANSCFG] = kbdev->reg + 0x106f0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS11__FAULTEXTRA] = kbdev->reg + 0x106f8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS12__TRANSTAB] = kbdev->reg + 0x10700; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS12__MEMATTR] = kbdev->reg + 0x10708; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS12__LOCKADDR] = kbdev->reg + 0x10710; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS12__COMMAND] = kbdev->reg + 0x10718; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS12__FAULTSTATUS] = kbdev->reg + 0x1071c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS12__FAULTADDRESS] = kbdev->reg + 0x10720; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS12__STATUS] = kbdev->reg + 0x10728; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS12__TRANSCFG] = kbdev->reg + 0x10730; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS12__FAULTEXTRA] = kbdev->reg + 0x10738; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS13__TRANSTAB] = kbdev->reg + 0x10740; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS13__MEMATTR] = kbdev->reg + 0x10748; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS13__LOCKADDR] = kbdev->reg + 0x10750; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS13__COMMAND] = kbdev->reg + 0x10758; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS13__FAULTSTATUS] = kbdev->reg + 0x1075c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS13__FAULTADDRESS] = kbdev->reg + 0x10760; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS13__STATUS] = kbdev->reg + 0x10768; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS13__TRANSCFG] = kbdev->reg + 0x10770; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS13__FAULTEXTRA] = kbdev->reg + 0x10778; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS14__TRANSTAB] = kbdev->reg + 0x10780; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS14__MEMATTR] = kbdev->reg + 0x10788; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS14__LOCKADDR] = kbdev->reg + 0x10790; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS14__COMMAND] = kbdev->reg + 0x10798; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS14__FAULTSTATUS] = kbdev->reg + 0x1079c; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS14__FAULTADDRESS] = kbdev->reg + 0x107a0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS14__STATUS] = kbdev->reg + 0x107a8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS14__TRANSCFG] = kbdev->reg + 0x107b0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS14__FAULTEXTRA] = kbdev->reg + 0x107b8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS15__TRANSTAB] = kbdev->reg + 0x107c0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS15__MEMATTR] = kbdev->reg + 0x107c8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS15__LOCKADDR] = kbdev->reg + 0x107d0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS15__COMMAND] = kbdev->reg + 0x107d8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS15__FAULTSTATUS] = kbdev->reg + 0x107dc; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS15__FAULTADDRESS] = kbdev->reg + 0x107e0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS15__STATUS] = kbdev->reg + 0x107e8; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS15__TRANSCFG] = kbdev->reg + 0x107f0; + kbdev->regmap.regs[MMU_STAGE2__ST2MMU__AS15__FAULTEXTRA] = kbdev->reg + 0x107f8; +} + +static void kbase_regmap_v6_2_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V6_2_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbase_regmap_v6_0_init(kbdev); + + kbdev->regmap.flags[GPU_CONTROL__REVIDR] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__STACK_PRESENT] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__STACK_PWROFF] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__STACK_PWRON] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_WRITE; + kbdev->regmap.flags[GPU_CONTROL__STACK_PWRTRANS] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__STACK_READY] = KBASE_REGMAP_WIDTH_64_BIT | + KBASE_REGMAP_PERM_READ; + + kbdev->regmap.regs[GPU_CONTROL__REVIDR] = kbdev->reg + 0x280; + kbdev->regmap.regs[GPU_CONTROL__STACK_PRESENT] = kbdev->reg + 0xe00; + kbdev->regmap.regs[GPU_CONTROL__STACK_PWROFF] = kbdev->reg + 0xe30; + kbdev->regmap.regs[GPU_CONTROL__STACK_PWRON] = kbdev->reg + 0xe20; + kbdev->regmap.regs[GPU_CONTROL__STACK_PWRTRANS] = kbdev->reg + 0xe40; + kbdev->regmap.regs[GPU_CONTROL__STACK_READY] = kbdev->reg + 0xe10; +} + +static void kbase_regmap_v7_0_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V7_0_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbase_regmap_v6_2_init(kbdev); + + kbdev->regmap.flags[GPU_CONTROL__TEXTURE_FEATURES_3] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + + kbdev->regmap.regs[GPU_CONTROL__TEXTURE_FEATURES_3] = kbdev->reg + 0xbc; +} + +static void kbase_regmap_v7_2_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V7_2_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbase_regmap_v7_0_init(kbdev); + + kbdev->regmap.flags[GPU_CONTROL__CORE_FEATURES] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + kbdev->regmap.flags[GPU_CONTROL__THREAD_TLS_ALLOC] = KBASE_REGMAP_WIDTH_32_BIT | + KBASE_REGMAP_PERM_READ; + + kbdev->regmap.regs[GPU_CONTROL__CORE_FEATURES] = kbdev->reg + 0x8; + kbdev->regmap.regs[GPU_CONTROL__THREAD_TLS_ALLOC] = kbdev->reg + 0x310; +} + +static void kbase_regmap_v9_0_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V9_0_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbase_regmap_v7_2_init(kbdev); + + kbdev->regmap.flags[GPU_CONTROL__CORE_FEATURES] = 0; + kbdev->regmap.flags[GPU_CONTROL__THREAD_TLS_ALLOC] = 0; + kbdev->regmap.flags[JOB_CONTROL__JOB_IRQ_THROTTLE] = 0; + + kbdev->regmap.regs[GPU_CONTROL__CORE_FEATURES] = NULL; + kbdev->regmap.regs[GPU_CONTROL__THREAD_TLS_ALLOC] = NULL; + kbdev->regmap.regs[JOB_CONTROL__JOB_IRQ_THROTTLE] = NULL; +} + +static void kbase_regmap_v9_2_init(struct kbase_device *kbdev) +{ + if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) { + kbdev->regmap.size = NR_V9_2_REGS; + kbdev->regmap.regs = + kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL); + kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL); + } + + if (WARN_ON(kbdev->regmap.regs == NULL)) + return; + if (WARN_ON(kbdev->regmap.flags == NULL)) + return; + + kbase_regmap_v9_0_init(kbdev); + + kbdev->regmap.flags[GPU_CONTROL__L2_CONFIG] = + KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE; + + kbdev->regmap.regs[GPU_CONTROL__L2_CONFIG] = kbdev->reg + 0x48; +} + +u32 kbase_regmap_backend_init(struct kbase_device *kbdev) +{ + int i = 0; + + struct { + u32 arch_id; + void (*init)(struct kbase_device *kbdev); + } init_array[] = { + { GPU_ID_ARCH_MAKE(6, 0, 0), kbase_regmap_v6_0_init }, + { GPU_ID_ARCH_MAKE(6, 2, 0), kbase_regmap_v6_2_init }, + { GPU_ID_ARCH_MAKE(7, 0, 0), kbase_regmap_v7_0_init }, + { GPU_ID_ARCH_MAKE(7, 2, 0), kbase_regmap_v7_2_init }, + { GPU_ID_ARCH_MAKE(9, 0, 0), kbase_regmap_v9_0_init }, + { GPU_ID_ARCH_MAKE(9, 2, 0), kbase_regmap_v9_2_init }, + }; + + for (i = 0; i < ARRAY_SIZE(init_array) - 1; i++) { + if (kbdev->gpu_props.gpu_id.arch_id < init_array[i + 1].arch_id) { + init_array[i].init(kbdev); + return init_array[i].arch_id; + } + } + + /* arch_id greater than last entry in init_array */ + init_array[i].init(kbdev); + return init_array[i].arch_id; +} + +#ifdef CONFIG_MALI_BIFROST_DEBUG +static char *enum_strings[] = { + [GPU_CONTROL__GPU_ID] = "GPU_CONTROL__GPU_ID", + [GPU_CONTROL__L2_FEATURES] = "GPU_CONTROL__L2_FEATURES", + [GPU_CONTROL__TILER_FEATURES] = "GPU_CONTROL__TILER_FEATURES", + [GPU_CONTROL__MEM_FEATURES] = "GPU_CONTROL__MEM_FEATURES", + [GPU_CONTROL__MMU_FEATURES] = "GPU_CONTROL__MMU_FEATURES", + [GPU_CONTROL__AS_PRESENT] = "GPU_CONTROL__AS_PRESENT", + [GPU_CONTROL__JS_PRESENT] = "GPU_CONTROL__JS_PRESENT", + [GPU_CONTROL__GPU_IRQ_RAWSTAT] = "GPU_CONTROL__GPU_IRQ_RAWSTAT", + [GPU_CONTROL__GPU_IRQ_CLEAR] = "GPU_CONTROL__GPU_IRQ_CLEAR", + [GPU_CONTROL__GPU_IRQ_MASK] = "GPU_CONTROL__GPU_IRQ_MASK", + [GPU_CONTROL__GPU_IRQ_STATUS] = "GPU_CONTROL__GPU_IRQ_STATUS", + [GPU_CONTROL__GPU_COMMAND] = "GPU_CONTROL__GPU_COMMAND", + [GPU_CONTROL__GPU_STATUS] = "GPU_CONTROL__GPU_STATUS", + [GPU_CONTROL__LATEST_FLUSH] = "GPU_CONTROL__LATEST_FLUSH", + [GPU_CONTROL__GPU_FAULTSTATUS] = "GPU_CONTROL__GPU_FAULTSTATUS", + [GPU_CONTROL__GPU_FAULTADDRESS] = "GPU_CONTROL__GPU_FAULTADDRESS", + [GPU_CONTROL__AFBC_FEATURES] = "GPU_CONTROL__AFBC_FEATURES", + [GPU_CONTROL__PWR_KEY] = "GPU_CONTROL__PWR_KEY", + [GPU_CONTROL__PWR_OVERRIDE0] = "GPU_CONTROL__PWR_OVERRIDE0", + [GPU_CONTROL__PWR_OVERRIDE1] = "GPU_CONTROL__PWR_OVERRIDE1", + [GPU_CONTROL__PRFCNT_BASE] = "GPU_CONTROL__PRFCNT_BASE", + [GPU_CONTROL__PRFCNT_CONFIG] = "GPU_CONTROL__PRFCNT_CONFIG", + [GPU_CONTROL__PRFCNT_JM_EN] = "GPU_CONTROL__PRFCNT_JM_EN", + [GPU_CONTROL__PRFCNT_SHADER_EN] = "GPU_CONTROL__PRFCNT_SHADER_EN", + [GPU_CONTROL__PRFCNT_TILER_EN] = "GPU_CONTROL__PRFCNT_TILER_EN", + [GPU_CONTROL__PRFCNT_MMU_L2_EN] = "GPU_CONTROL__PRFCNT_MMU_L2_EN", + [GPU_CONTROL__CYCLE_COUNT] = "GPU_CONTROL__CYCLE_COUNT", + [GPU_CONTROL__TIMESTAMP] = "GPU_CONTROL__TIMESTAMP", + [GPU_CONTROL__THREAD_MAX_THREADS] = "GPU_CONTROL__THREAD_MAX_THREADS", + [GPU_CONTROL__THREAD_MAX_WORKGROUP_SIZE] = "GPU_CONTROL__THREAD_MAX_WORKGROUP_SIZE", + [GPU_CONTROL__THREAD_MAX_BARRIER_SIZE] = "GPU_CONTROL__THREAD_MAX_BARRIER_SIZE", + [GPU_CONTROL__THREAD_FEATURES] = "GPU_CONTROL__THREAD_FEATURES", + [GPU_CONTROL__TEXTURE_FEATURES_0] = "GPU_CONTROL__TEXTURE_FEATURES_0", + [GPU_CONTROL__TEXTURE_FEATURES_1] = "GPU_CONTROL__TEXTURE_FEATURES_1", + [GPU_CONTROL__TEXTURE_FEATURES_2] = "GPU_CONTROL__TEXTURE_FEATURES_2", + [GPU_CONTROL__JS0_FEATURES] = "GPU_CONTROL__JS0_FEATURES", + [GPU_CONTROL__JS1_FEATURES] = "GPU_CONTROL__JS1_FEATURES", + [GPU_CONTROL__JS2_FEATURES] = "GPU_CONTROL__JS2_FEATURES", + [GPU_CONTROL__JS3_FEATURES] = "GPU_CONTROL__JS3_FEATURES", + [GPU_CONTROL__JS4_FEATURES] = "GPU_CONTROL__JS4_FEATURES", + [GPU_CONTROL__JS5_FEATURES] = "GPU_CONTROL__JS5_FEATURES", + [GPU_CONTROL__JS6_FEATURES] = "GPU_CONTROL__JS6_FEATURES", + [GPU_CONTROL__JS7_FEATURES] = "GPU_CONTROL__JS7_FEATURES", + [GPU_CONTROL__JS8_FEATURES] = "GPU_CONTROL__JS8_FEATURES", + [GPU_CONTROL__JS9_FEATURES] = "GPU_CONTROL__JS9_FEATURES", + [GPU_CONTROL__JS10_FEATURES] = "GPU_CONTROL__JS10_FEATURES", + [GPU_CONTROL__JS11_FEATURES] = "GPU_CONTROL__JS11_FEATURES", + [GPU_CONTROL__JS12_FEATURES] = "GPU_CONTROL__JS12_FEATURES", + [GPU_CONTROL__JS13_FEATURES] = "GPU_CONTROL__JS13_FEATURES", + [GPU_CONTROL__JS14_FEATURES] = "GPU_CONTROL__JS14_FEATURES", + [GPU_CONTROL__JS15_FEATURES] = "GPU_CONTROL__JS15_FEATURES", + [GPU_CONTROL__SHADER_PRESENT] = "GPU_CONTROL__SHADER_PRESENT", + [GPU_CONTROL__TILER_PRESENT] = "GPU_CONTROL__TILER_PRESENT", + [GPU_CONTROL__L2_PRESENT] = "GPU_CONTROL__L2_PRESENT", + [GPU_CONTROL__SHADER_READY] = "GPU_CONTROL__SHADER_READY", + [GPU_CONTROL__TILER_READY] = "GPU_CONTROL__TILER_READY", + [GPU_CONTROL__L2_READY] = "GPU_CONTROL__L2_READY", + [GPU_CONTROL__SHADER_PWRON] = "GPU_CONTROL__SHADER_PWRON", + [GPU_CONTROL__TILER_PWRON] = "GPU_CONTROL__TILER_PWRON", + [GPU_CONTROL__L2_PWRON] = "GPU_CONTROL__L2_PWRON", + [GPU_CONTROL__SHADER_PWROFF] = "GPU_CONTROL__SHADER_PWROFF", + [GPU_CONTROL__TILER_PWROFF] = "GPU_CONTROL__TILER_PWROFF", + [GPU_CONTROL__L2_PWROFF] = "GPU_CONTROL__L2_PWROFF", + [GPU_CONTROL__SHADER_PWRTRANS] = "GPU_CONTROL__SHADER_PWRTRANS", + [GPU_CONTROL__TILER_PWRTRANS] = "GPU_CONTROL__TILER_PWRTRANS", + [GPU_CONTROL__L2_PWRTRANS] = "GPU_CONTROL__L2_PWRTRANS", + [GPU_CONTROL__SHADER_PWRACTIVE] = "GPU_CONTROL__SHADER_PWRACTIVE", + [GPU_CONTROL__TILER_PWRACTIVE] = "GPU_CONTROL__TILER_PWRACTIVE", + [GPU_CONTROL__L2_PWRACTIVE] = "GPU_CONTROL__L2_PWRACTIVE", + [GPU_CONTROL__COHERENCY_FEATURES] = "GPU_CONTROL__COHERENCY_FEATURES", + [GPU_CONTROL__COHERENCY_ENABLE] = "GPU_CONTROL__COHERENCY_ENABLE", + [GPU_CONTROL__GPU_USER_INn] = "GPU_CONTROL__GPU_USER_INn", + [GPU_CONTROL__GPU_USER_OUTn] = "GPU_CONTROL__GPU_USER_OUTn", + [GPU_CONTROL__JM_CONFIG] = "GPU_CONTROL__JM_CONFIG", + [GPU_CONTROL__SHADER_CONFIG] = "GPU_CONTROL__SHADER_CONFIG", + [GPU_CONTROL__TILER_CONFIG] = "GPU_CONTROL__TILER_CONFIG", + [GPU_CONTROL__L2_MMU_CONFIG] = "GPU_CONTROL__L2_MMU_CONFIG", + [JOB_CONTROL__JOB_IRQ_RAWSTAT] = "JOB_CONTROL__JOB_IRQ_RAWSTAT", + [JOB_CONTROL__JOB_IRQ_CLEAR] = "JOB_CONTROL__JOB_IRQ_CLEAR", + [JOB_CONTROL__JOB_IRQ_MASK] = "JOB_CONTROL__JOB_IRQ_MASK", + [JOB_CONTROL__JOB_IRQ_STATUS] = "JOB_CONTROL__JOB_IRQ_STATUS", + [JOB_CONTROL__JOB_IRQ_JS_STATE] = "JOB_CONTROL__JOB_IRQ_JS_STATE", + [JOB_CONTROL__JOB_IRQ_THROTTLE] = "JOB_CONTROL__JOB_IRQ_THROTTLE", + [JOB_CONTROL__EVENT_IRQ_RAWSTAT] = "JOB_CONTROL__EVENT_IRQ_RAWSTAT", + [JOB_CONTROL__EVENT_IRQ_CLEAR] = "JOB_CONTROL__EVENT_IRQ_CLEAR", + [JOB_CONTROL__EVENT_IRQ_MASK] = "JOB_CONTROL__EVENT_IRQ_MASK", + [JOB_CONTROL__EVENT_IRQ_STATUS] = "JOB_CONTROL__EVENT_IRQ_STATUS", + [JOB_CONTROL__JS0__HEAD] = "JOB_CONTROL__JS0__HEAD", + [JOB_CONTROL__JS0__TAIL] = "JOB_CONTROL__JS0__TAIL", + [JOB_CONTROL__JS0__AFFINITY] = "JOB_CONTROL__JS0__AFFINITY", + [JOB_CONTROL__JS0__CONFIG] = "JOB_CONTROL__JS0__CONFIG", + [JOB_CONTROL__JS0__XAFFINITY] = "JOB_CONTROL__JS0__XAFFINITY", + [JOB_CONTROL__JS0__COMMAND] = "JOB_CONTROL__JS0__COMMAND", + [JOB_CONTROL__JS0__STATUS] = "JOB_CONTROL__JS0__STATUS", + [JOB_CONTROL__JS0__FLUSH_ID] = "JOB_CONTROL__JS0__FLUSH_ID", + [JOB_CONTROL__JS0__EVENT_0] = "JOB_CONTROL__JS0__EVENT_0", + [JOB_CONTROL__JS0__EVENT_1] = "JOB_CONTROL__JS0__EVENT_1", + [JOB_CONTROL__JS0__EVENT_MASK] = "JOB_CONTROL__JS0__EVENT_MASK", + [JOB_CONTROL__JS0__HEAD_NEXT] = "JOB_CONTROL__JS0__HEAD_NEXT", + [JOB_CONTROL__JS0__TAIL_NEXT] = "JOB_CONTROL__JS0__TAIL_NEXT", + [JOB_CONTROL__JS0__AFFINITY_NEXT] = "JOB_CONTROL__JS0__AFFINITY_NEXT", + [JOB_CONTROL__JS0__CONFIG_NEXT] = "JOB_CONTROL__JS0__CONFIG_NEXT", + [JOB_CONTROL__JS0__XAFFINITY_NEXT] = "JOB_CONTROL__JS0__XAFFINITY_NEXT", + [JOB_CONTROL__JS0__COMMAND_NEXT] = "JOB_CONTROL__JS0__COMMAND_NEXT", + [JOB_CONTROL__JS0__FLUSH_ID_NEXT] = "JOB_CONTROL__JS0__FLUSH_ID_NEXT", + [JOB_CONTROL__JS0__EVENT_MASK_NEXT] = "JOB_CONTROL__JS0__EVENT_MASK_NEXT", + [JOB_CONTROL__JS1__HEAD] = "JOB_CONTROL__JS1__HEAD", + [JOB_CONTROL__JS1__TAIL] = "JOB_CONTROL__JS1__TAIL", + [JOB_CONTROL__JS1__AFFINITY] = "JOB_CONTROL__JS1__AFFINITY", + [JOB_CONTROL__JS1__CONFIG] = "JOB_CONTROL__JS1__CONFIG", + [JOB_CONTROL__JS1__XAFFINITY] = "JOB_CONTROL__JS1__XAFFINITY", + [JOB_CONTROL__JS1__COMMAND] = "JOB_CONTROL__JS1__COMMAND", + [JOB_CONTROL__JS1__STATUS] = "JOB_CONTROL__JS1__STATUS", + [JOB_CONTROL__JS1__FLUSH_ID] = "JOB_CONTROL__JS1__FLUSH_ID", + [JOB_CONTROL__JS1__EVENT_0] = "JOB_CONTROL__JS1__EVENT_0", + [JOB_CONTROL__JS1__EVENT_1] = "JOB_CONTROL__JS1__EVENT_1", + [JOB_CONTROL__JS1__EVENT_MASK] = "JOB_CONTROL__JS1__EVENT_MASK", + [JOB_CONTROL__JS1__HEAD_NEXT] = "JOB_CONTROL__JS1__HEAD_NEXT", + [JOB_CONTROL__JS1__TAIL_NEXT] = "JOB_CONTROL__JS1__TAIL_NEXT", + [JOB_CONTROL__JS1__AFFINITY_NEXT] = "JOB_CONTROL__JS1__AFFINITY_NEXT", + [JOB_CONTROL__JS1__CONFIG_NEXT] = "JOB_CONTROL__JS1__CONFIG_NEXT", + [JOB_CONTROL__JS1__XAFFINITY_NEXT] = "JOB_CONTROL__JS1__XAFFINITY_NEXT", + [JOB_CONTROL__JS1__COMMAND_NEXT] = "JOB_CONTROL__JS1__COMMAND_NEXT", + [JOB_CONTROL__JS1__FLUSH_ID_NEXT] = "JOB_CONTROL__JS1__FLUSH_ID_NEXT", + [JOB_CONTROL__JS1__EVENT_MASK_NEXT] = "JOB_CONTROL__JS1__EVENT_MASK_NEXT", + [JOB_CONTROL__JS2__HEAD] = "JOB_CONTROL__JS2__HEAD", + [JOB_CONTROL__JS2__TAIL] = "JOB_CONTROL__JS2__TAIL", + [JOB_CONTROL__JS2__AFFINITY] = "JOB_CONTROL__JS2__AFFINITY", + [JOB_CONTROL__JS2__CONFIG] = "JOB_CONTROL__JS2__CONFIG", + [JOB_CONTROL__JS2__XAFFINITY] = "JOB_CONTROL__JS2__XAFFINITY", + [JOB_CONTROL__JS2__COMMAND] = "JOB_CONTROL__JS2__COMMAND", + [JOB_CONTROL__JS2__STATUS] = "JOB_CONTROL__JS2__STATUS", + [JOB_CONTROL__JS2__FLUSH_ID] = "JOB_CONTROL__JS2__FLUSH_ID", + [JOB_CONTROL__JS2__EVENT_0] = "JOB_CONTROL__JS2__EVENT_0", + [JOB_CONTROL__JS2__EVENT_1] = "JOB_CONTROL__JS2__EVENT_1", + [JOB_CONTROL__JS2__EVENT_MASK] = "JOB_CONTROL__JS2__EVENT_MASK", + [JOB_CONTROL__JS2__HEAD_NEXT] = "JOB_CONTROL__JS2__HEAD_NEXT", + [JOB_CONTROL__JS2__TAIL_NEXT] = "JOB_CONTROL__JS2__TAIL_NEXT", + [JOB_CONTROL__JS2__AFFINITY_NEXT] = "JOB_CONTROL__JS2__AFFINITY_NEXT", + [JOB_CONTROL__JS2__CONFIG_NEXT] = "JOB_CONTROL__JS2__CONFIG_NEXT", + [JOB_CONTROL__JS2__XAFFINITY_NEXT] = "JOB_CONTROL__JS2__XAFFINITY_NEXT", + [JOB_CONTROL__JS2__COMMAND_NEXT] = "JOB_CONTROL__JS2__COMMAND_NEXT", + [JOB_CONTROL__JS2__FLUSH_ID_NEXT] = "JOB_CONTROL__JS2__FLUSH_ID_NEXT", + [JOB_CONTROL__JS2__EVENT_MASK_NEXT] = "JOB_CONTROL__JS2__EVENT_MASK_NEXT", + [JOB_CONTROL__JS3__HEAD] = "JOB_CONTROL__JS3__HEAD", + [JOB_CONTROL__JS3__TAIL] = "JOB_CONTROL__JS3__TAIL", + [JOB_CONTROL__JS3__AFFINITY] = "JOB_CONTROL__JS3__AFFINITY", + [JOB_CONTROL__JS3__CONFIG] = "JOB_CONTROL__JS3__CONFIG", + [JOB_CONTROL__JS3__XAFFINITY] = "JOB_CONTROL__JS3__XAFFINITY", + [JOB_CONTROL__JS3__COMMAND] = "JOB_CONTROL__JS3__COMMAND", + [JOB_CONTROL__JS3__STATUS] = "JOB_CONTROL__JS3__STATUS", + [JOB_CONTROL__JS3__FLUSH_ID] = "JOB_CONTROL__JS3__FLUSH_ID", + [JOB_CONTROL__JS3__EVENT_0] = "JOB_CONTROL__JS3__EVENT_0", + [JOB_CONTROL__JS3__EVENT_1] = "JOB_CONTROL__JS3__EVENT_1", + [JOB_CONTROL__JS3__EVENT_MASK] = "JOB_CONTROL__JS3__EVENT_MASK", + [JOB_CONTROL__JS3__HEAD_NEXT] = "JOB_CONTROL__JS3__HEAD_NEXT", + [JOB_CONTROL__JS3__TAIL_NEXT] = "JOB_CONTROL__JS3__TAIL_NEXT", + [JOB_CONTROL__JS3__AFFINITY_NEXT] = "JOB_CONTROL__JS3__AFFINITY_NEXT", + [JOB_CONTROL__JS3__CONFIG_NEXT] = "JOB_CONTROL__JS3__CONFIG_NEXT", + [JOB_CONTROL__JS3__XAFFINITY_NEXT] = "JOB_CONTROL__JS3__XAFFINITY_NEXT", + [JOB_CONTROL__JS3__COMMAND_NEXT] = "JOB_CONTROL__JS3__COMMAND_NEXT", + [JOB_CONTROL__JS3__FLUSH_ID_NEXT] = "JOB_CONTROL__JS3__FLUSH_ID_NEXT", + [JOB_CONTROL__JS3__EVENT_MASK_NEXT] = "JOB_CONTROL__JS3__EVENT_MASK_NEXT", + [JOB_CONTROL__JS4__HEAD] = "JOB_CONTROL__JS4__HEAD", + [JOB_CONTROL__JS4__TAIL] = "JOB_CONTROL__JS4__TAIL", + [JOB_CONTROL__JS4__AFFINITY] = "JOB_CONTROL__JS4__AFFINITY", + [JOB_CONTROL__JS4__CONFIG] = "JOB_CONTROL__JS4__CONFIG", + [JOB_CONTROL__JS4__XAFFINITY] = "JOB_CONTROL__JS4__XAFFINITY", + [JOB_CONTROL__JS4__COMMAND] = "JOB_CONTROL__JS4__COMMAND", + [JOB_CONTROL__JS4__STATUS] = "JOB_CONTROL__JS4__STATUS", + [JOB_CONTROL__JS4__FLUSH_ID] = "JOB_CONTROL__JS4__FLUSH_ID", + [JOB_CONTROL__JS4__EVENT_0] = "JOB_CONTROL__JS4__EVENT_0", + [JOB_CONTROL__JS4__EVENT_1] = "JOB_CONTROL__JS4__EVENT_1", + [JOB_CONTROL__JS4__EVENT_MASK] = "JOB_CONTROL__JS4__EVENT_MASK", + [JOB_CONTROL__JS4__HEAD_NEXT] = "JOB_CONTROL__JS4__HEAD_NEXT", + [JOB_CONTROL__JS4__TAIL_NEXT] = "JOB_CONTROL__JS4__TAIL_NEXT", + [JOB_CONTROL__JS4__AFFINITY_NEXT] = "JOB_CONTROL__JS4__AFFINITY_NEXT", + [JOB_CONTROL__JS4__CONFIG_NEXT] = "JOB_CONTROL__JS4__CONFIG_NEXT", + [JOB_CONTROL__JS4__XAFFINITY_NEXT] = "JOB_CONTROL__JS4__XAFFINITY_NEXT", + [JOB_CONTROL__JS4__COMMAND_NEXT] = "JOB_CONTROL__JS4__COMMAND_NEXT", + [JOB_CONTROL__JS4__FLUSH_ID_NEXT] = "JOB_CONTROL__JS4__FLUSH_ID_NEXT", + [JOB_CONTROL__JS4__EVENT_MASK_NEXT] = "JOB_CONTROL__JS4__EVENT_MASK_NEXT", + [JOB_CONTROL__JS5__HEAD] = "JOB_CONTROL__JS5__HEAD", + [JOB_CONTROL__JS5__TAIL] = "JOB_CONTROL__JS5__TAIL", + [JOB_CONTROL__JS5__AFFINITY] = "JOB_CONTROL__JS5__AFFINITY", + [JOB_CONTROL__JS5__CONFIG] = "JOB_CONTROL__JS5__CONFIG", + [JOB_CONTROL__JS5__XAFFINITY] = "JOB_CONTROL__JS5__XAFFINITY", + [JOB_CONTROL__JS5__COMMAND] = "JOB_CONTROL__JS5__COMMAND", + [JOB_CONTROL__JS5__STATUS] = "JOB_CONTROL__JS5__STATUS", + [JOB_CONTROL__JS5__FLUSH_ID] = "JOB_CONTROL__JS5__FLUSH_ID", + [JOB_CONTROL__JS5__EVENT_0] = "JOB_CONTROL__JS5__EVENT_0", + [JOB_CONTROL__JS5__EVENT_1] = "JOB_CONTROL__JS5__EVENT_1", + [JOB_CONTROL__JS5__EVENT_MASK] = "JOB_CONTROL__JS5__EVENT_MASK", + [JOB_CONTROL__JS5__HEAD_NEXT] = "JOB_CONTROL__JS5__HEAD_NEXT", + [JOB_CONTROL__JS5__TAIL_NEXT] = "JOB_CONTROL__JS5__TAIL_NEXT", + [JOB_CONTROL__JS5__AFFINITY_NEXT] = "JOB_CONTROL__JS5__AFFINITY_NEXT", + [JOB_CONTROL__JS5__CONFIG_NEXT] = "JOB_CONTROL__JS5__CONFIG_NEXT", + [JOB_CONTROL__JS5__XAFFINITY_NEXT] = "JOB_CONTROL__JS5__XAFFINITY_NEXT", + [JOB_CONTROL__JS5__COMMAND_NEXT] = "JOB_CONTROL__JS5__COMMAND_NEXT", + [JOB_CONTROL__JS5__FLUSH_ID_NEXT] = "JOB_CONTROL__JS5__FLUSH_ID_NEXT", + [JOB_CONTROL__JS5__EVENT_MASK_NEXT] = "JOB_CONTROL__JS5__EVENT_MASK_NEXT", + [JOB_CONTROL__JS6__HEAD] = "JOB_CONTROL__JS6__HEAD", + [JOB_CONTROL__JS6__TAIL] = "JOB_CONTROL__JS6__TAIL", + [JOB_CONTROL__JS6__AFFINITY] = "JOB_CONTROL__JS6__AFFINITY", + [JOB_CONTROL__JS6__CONFIG] = "JOB_CONTROL__JS6__CONFIG", + [JOB_CONTROL__JS6__XAFFINITY] = "JOB_CONTROL__JS6__XAFFINITY", + [JOB_CONTROL__JS6__COMMAND] = "JOB_CONTROL__JS6__COMMAND", + [JOB_CONTROL__JS6__STATUS] = "JOB_CONTROL__JS6__STATUS", + [JOB_CONTROL__JS6__FLUSH_ID] = "JOB_CONTROL__JS6__FLUSH_ID", + [JOB_CONTROL__JS6__EVENT_0] = "JOB_CONTROL__JS6__EVENT_0", + [JOB_CONTROL__JS6__EVENT_1] = "JOB_CONTROL__JS6__EVENT_1", + [JOB_CONTROL__JS6__EVENT_MASK] = "JOB_CONTROL__JS6__EVENT_MASK", + [JOB_CONTROL__JS6__HEAD_NEXT] = "JOB_CONTROL__JS6__HEAD_NEXT", + [JOB_CONTROL__JS6__TAIL_NEXT] = "JOB_CONTROL__JS6__TAIL_NEXT", + [JOB_CONTROL__JS6__AFFINITY_NEXT] = "JOB_CONTROL__JS6__AFFINITY_NEXT", + [JOB_CONTROL__JS6__CONFIG_NEXT] = "JOB_CONTROL__JS6__CONFIG_NEXT", + [JOB_CONTROL__JS6__XAFFINITY_NEXT] = "JOB_CONTROL__JS6__XAFFINITY_NEXT", + [JOB_CONTROL__JS6__COMMAND_NEXT] = "JOB_CONTROL__JS6__COMMAND_NEXT", + [JOB_CONTROL__JS6__FLUSH_ID_NEXT] = "JOB_CONTROL__JS6__FLUSH_ID_NEXT", + [JOB_CONTROL__JS6__EVENT_MASK_NEXT] = "JOB_CONTROL__JS6__EVENT_MASK_NEXT", + [JOB_CONTROL__JS7__HEAD] = "JOB_CONTROL__JS7__HEAD", + [JOB_CONTROL__JS7__TAIL] = "JOB_CONTROL__JS7__TAIL", + [JOB_CONTROL__JS7__AFFINITY] = "JOB_CONTROL__JS7__AFFINITY", + [JOB_CONTROL__JS7__CONFIG] = "JOB_CONTROL__JS7__CONFIG", + [JOB_CONTROL__JS7__XAFFINITY] = "JOB_CONTROL__JS7__XAFFINITY", + [JOB_CONTROL__JS7__COMMAND] = "JOB_CONTROL__JS7__COMMAND", + [JOB_CONTROL__JS7__STATUS] = "JOB_CONTROL__JS7__STATUS", + [JOB_CONTROL__JS7__FLUSH_ID] = "JOB_CONTROL__JS7__FLUSH_ID", + [JOB_CONTROL__JS7__EVENT_0] = "JOB_CONTROL__JS7__EVENT_0", + [JOB_CONTROL__JS7__EVENT_1] = "JOB_CONTROL__JS7__EVENT_1", + [JOB_CONTROL__JS7__EVENT_MASK] = "JOB_CONTROL__JS7__EVENT_MASK", + [JOB_CONTROL__JS7__HEAD_NEXT] = "JOB_CONTROL__JS7__HEAD_NEXT", + [JOB_CONTROL__JS7__TAIL_NEXT] = "JOB_CONTROL__JS7__TAIL_NEXT", + [JOB_CONTROL__JS7__AFFINITY_NEXT] = "JOB_CONTROL__JS7__AFFINITY_NEXT", + [JOB_CONTROL__JS7__CONFIG_NEXT] = "JOB_CONTROL__JS7__CONFIG_NEXT", + [JOB_CONTROL__JS7__XAFFINITY_NEXT] = "JOB_CONTROL__JS7__XAFFINITY_NEXT", + [JOB_CONTROL__JS7__COMMAND_NEXT] = "JOB_CONTROL__JS7__COMMAND_NEXT", + [JOB_CONTROL__JS7__FLUSH_ID_NEXT] = "JOB_CONTROL__JS7__FLUSH_ID_NEXT", + [JOB_CONTROL__JS7__EVENT_MASK_NEXT] = "JOB_CONTROL__JS7__EVENT_MASK_NEXT", + [JOB_CONTROL__JS8__HEAD] = "JOB_CONTROL__JS8__HEAD", + [JOB_CONTROL__JS8__TAIL] = "JOB_CONTROL__JS8__TAIL", + [JOB_CONTROL__JS8__AFFINITY] = "JOB_CONTROL__JS8__AFFINITY", + [JOB_CONTROL__JS8__CONFIG] = "JOB_CONTROL__JS8__CONFIG", + [JOB_CONTROL__JS8__XAFFINITY] = "JOB_CONTROL__JS8__XAFFINITY", + [JOB_CONTROL__JS8__COMMAND] = "JOB_CONTROL__JS8__COMMAND", + [JOB_CONTROL__JS8__STATUS] = "JOB_CONTROL__JS8__STATUS", + [JOB_CONTROL__JS8__FLUSH_ID] = "JOB_CONTROL__JS8__FLUSH_ID", + [JOB_CONTROL__JS8__EVENT_0] = "JOB_CONTROL__JS8__EVENT_0", + [JOB_CONTROL__JS8__EVENT_1] = "JOB_CONTROL__JS8__EVENT_1", + [JOB_CONTROL__JS8__EVENT_MASK] = "JOB_CONTROL__JS8__EVENT_MASK", + [JOB_CONTROL__JS8__HEAD_NEXT] = "JOB_CONTROL__JS8__HEAD_NEXT", + [JOB_CONTROL__JS8__TAIL_NEXT] = "JOB_CONTROL__JS8__TAIL_NEXT", + [JOB_CONTROL__JS8__AFFINITY_NEXT] = "JOB_CONTROL__JS8__AFFINITY_NEXT", + [JOB_CONTROL__JS8__CONFIG_NEXT] = "JOB_CONTROL__JS8__CONFIG_NEXT", + [JOB_CONTROL__JS8__XAFFINITY_NEXT] = "JOB_CONTROL__JS8__XAFFINITY_NEXT", + [JOB_CONTROL__JS8__COMMAND_NEXT] = "JOB_CONTROL__JS8__COMMAND_NEXT", + [JOB_CONTROL__JS8__FLUSH_ID_NEXT] = "JOB_CONTROL__JS8__FLUSH_ID_NEXT", + [JOB_CONTROL__JS8__EVENT_MASK_NEXT] = "JOB_CONTROL__JS8__EVENT_MASK_NEXT", + [JOB_CONTROL__JS9__HEAD] = "JOB_CONTROL__JS9__HEAD", + [JOB_CONTROL__JS9__TAIL] = "JOB_CONTROL__JS9__TAIL", + [JOB_CONTROL__JS9__AFFINITY] = "JOB_CONTROL__JS9__AFFINITY", + [JOB_CONTROL__JS9__CONFIG] = "JOB_CONTROL__JS9__CONFIG", + [JOB_CONTROL__JS9__XAFFINITY] = "JOB_CONTROL__JS9__XAFFINITY", + [JOB_CONTROL__JS9__COMMAND] = "JOB_CONTROL__JS9__COMMAND", + [JOB_CONTROL__JS9__STATUS] = "JOB_CONTROL__JS9__STATUS", + [JOB_CONTROL__JS9__FLUSH_ID] = "JOB_CONTROL__JS9__FLUSH_ID", + [JOB_CONTROL__JS9__EVENT_0] = "JOB_CONTROL__JS9__EVENT_0", + [JOB_CONTROL__JS9__EVENT_1] = "JOB_CONTROL__JS9__EVENT_1", + [JOB_CONTROL__JS9__EVENT_MASK] = "JOB_CONTROL__JS9__EVENT_MASK", + [JOB_CONTROL__JS9__HEAD_NEXT] = "JOB_CONTROL__JS9__HEAD_NEXT", + [JOB_CONTROL__JS9__TAIL_NEXT] = "JOB_CONTROL__JS9__TAIL_NEXT", + [JOB_CONTROL__JS9__AFFINITY_NEXT] = "JOB_CONTROL__JS9__AFFINITY_NEXT", + [JOB_CONTROL__JS9__CONFIG_NEXT] = "JOB_CONTROL__JS9__CONFIG_NEXT", + [JOB_CONTROL__JS9__XAFFINITY_NEXT] = "JOB_CONTROL__JS9__XAFFINITY_NEXT", + [JOB_CONTROL__JS9__COMMAND_NEXT] = "JOB_CONTROL__JS9__COMMAND_NEXT", + [JOB_CONTROL__JS9__FLUSH_ID_NEXT] = "JOB_CONTROL__JS9__FLUSH_ID_NEXT", + [JOB_CONTROL__JS9__EVENT_MASK_NEXT] = "JOB_CONTROL__JS9__EVENT_MASK_NEXT", + [JOB_CONTROL__JS10__HEAD] = "JOB_CONTROL__JS10__HEAD", + [JOB_CONTROL__JS10__TAIL] = "JOB_CONTROL__JS10__TAIL", + [JOB_CONTROL__JS10__AFFINITY] = "JOB_CONTROL__JS10__AFFINITY", + [JOB_CONTROL__JS10__CONFIG] = "JOB_CONTROL__JS10__CONFIG", + [JOB_CONTROL__JS10__XAFFINITY] = "JOB_CONTROL__JS10__XAFFINITY", + [JOB_CONTROL__JS10__COMMAND] = "JOB_CONTROL__JS10__COMMAND", + [JOB_CONTROL__JS10__STATUS] = "JOB_CONTROL__JS10__STATUS", + [JOB_CONTROL__JS10__FLUSH_ID] = "JOB_CONTROL__JS10__FLUSH_ID", + [JOB_CONTROL__JS10__EVENT_0] = "JOB_CONTROL__JS10__EVENT_0", + [JOB_CONTROL__JS10__EVENT_1] = "JOB_CONTROL__JS10__EVENT_1", + [JOB_CONTROL__JS10__EVENT_MASK] = "JOB_CONTROL__JS10__EVENT_MASK", + [JOB_CONTROL__JS10__HEAD_NEXT] = "JOB_CONTROL__JS10__HEAD_NEXT", + [JOB_CONTROL__JS10__TAIL_NEXT] = "JOB_CONTROL__JS10__TAIL_NEXT", + [JOB_CONTROL__JS10__AFFINITY_NEXT] = "JOB_CONTROL__JS10__AFFINITY_NEXT", + [JOB_CONTROL__JS10__CONFIG_NEXT] = "JOB_CONTROL__JS10__CONFIG_NEXT", + [JOB_CONTROL__JS10__XAFFINITY_NEXT] = "JOB_CONTROL__JS10__XAFFINITY_NEXT", + [JOB_CONTROL__JS10__COMMAND_NEXT] = "JOB_CONTROL__JS10__COMMAND_NEXT", + [JOB_CONTROL__JS10__FLUSH_ID_NEXT] = "JOB_CONTROL__JS10__FLUSH_ID_NEXT", + [JOB_CONTROL__JS10__EVENT_MASK_NEXT] = "JOB_CONTROL__JS10__EVENT_MASK_NEXT", + [JOB_CONTROL__JS11__HEAD] = "JOB_CONTROL__JS11__HEAD", + [JOB_CONTROL__JS11__TAIL] = "JOB_CONTROL__JS11__TAIL", + [JOB_CONTROL__JS11__AFFINITY] = "JOB_CONTROL__JS11__AFFINITY", + [JOB_CONTROL__JS11__CONFIG] = "JOB_CONTROL__JS11__CONFIG", + [JOB_CONTROL__JS11__XAFFINITY] = "JOB_CONTROL__JS11__XAFFINITY", + [JOB_CONTROL__JS11__COMMAND] = "JOB_CONTROL__JS11__COMMAND", + [JOB_CONTROL__JS11__STATUS] = "JOB_CONTROL__JS11__STATUS", + [JOB_CONTROL__JS11__FLUSH_ID] = "JOB_CONTROL__JS11__FLUSH_ID", + [JOB_CONTROL__JS11__EVENT_0] = "JOB_CONTROL__JS11__EVENT_0", + [JOB_CONTROL__JS11__EVENT_1] = "JOB_CONTROL__JS11__EVENT_1", + [JOB_CONTROL__JS11__EVENT_MASK] = "JOB_CONTROL__JS11__EVENT_MASK", + [JOB_CONTROL__JS11__HEAD_NEXT] = "JOB_CONTROL__JS11__HEAD_NEXT", + [JOB_CONTROL__JS11__TAIL_NEXT] = "JOB_CONTROL__JS11__TAIL_NEXT", + [JOB_CONTROL__JS11__AFFINITY_NEXT] = "JOB_CONTROL__JS11__AFFINITY_NEXT", + [JOB_CONTROL__JS11__CONFIG_NEXT] = "JOB_CONTROL__JS11__CONFIG_NEXT", + [JOB_CONTROL__JS11__XAFFINITY_NEXT] = "JOB_CONTROL__JS11__XAFFINITY_NEXT", + [JOB_CONTROL__JS11__COMMAND_NEXT] = "JOB_CONTROL__JS11__COMMAND_NEXT", + [JOB_CONTROL__JS11__FLUSH_ID_NEXT] = "JOB_CONTROL__JS11__FLUSH_ID_NEXT", + [JOB_CONTROL__JS11__EVENT_MASK_NEXT] = "JOB_CONTROL__JS11__EVENT_MASK_NEXT", + [JOB_CONTROL__JS12__HEAD] = "JOB_CONTROL__JS12__HEAD", + [JOB_CONTROL__JS12__TAIL] = "JOB_CONTROL__JS12__TAIL", + [JOB_CONTROL__JS12__AFFINITY] = "JOB_CONTROL__JS12__AFFINITY", + [JOB_CONTROL__JS12__CONFIG] = "JOB_CONTROL__JS12__CONFIG", + [JOB_CONTROL__JS12__XAFFINITY] = "JOB_CONTROL__JS12__XAFFINITY", + [JOB_CONTROL__JS12__COMMAND] = "JOB_CONTROL__JS12__COMMAND", + [JOB_CONTROL__JS12__STATUS] = "JOB_CONTROL__JS12__STATUS", + [JOB_CONTROL__JS12__FLUSH_ID] = "JOB_CONTROL__JS12__FLUSH_ID", + [JOB_CONTROL__JS12__EVENT_0] = "JOB_CONTROL__JS12__EVENT_0", + [JOB_CONTROL__JS12__EVENT_1] = "JOB_CONTROL__JS12__EVENT_1", + [JOB_CONTROL__JS12__EVENT_MASK] = "JOB_CONTROL__JS12__EVENT_MASK", + [JOB_CONTROL__JS12__HEAD_NEXT] = "JOB_CONTROL__JS12__HEAD_NEXT", + [JOB_CONTROL__JS12__TAIL_NEXT] = "JOB_CONTROL__JS12__TAIL_NEXT", + [JOB_CONTROL__JS12__AFFINITY_NEXT] = "JOB_CONTROL__JS12__AFFINITY_NEXT", + [JOB_CONTROL__JS12__CONFIG_NEXT] = "JOB_CONTROL__JS12__CONFIG_NEXT", + [JOB_CONTROL__JS12__XAFFINITY_NEXT] = "JOB_CONTROL__JS12__XAFFINITY_NEXT", + [JOB_CONTROL__JS12__COMMAND_NEXT] = "JOB_CONTROL__JS12__COMMAND_NEXT", + [JOB_CONTROL__JS12__FLUSH_ID_NEXT] = "JOB_CONTROL__JS12__FLUSH_ID_NEXT", + [JOB_CONTROL__JS12__EVENT_MASK_NEXT] = "JOB_CONTROL__JS12__EVENT_MASK_NEXT", + [JOB_CONTROL__JS13__HEAD] = "JOB_CONTROL__JS13__HEAD", + [JOB_CONTROL__JS13__TAIL] = "JOB_CONTROL__JS13__TAIL", + [JOB_CONTROL__JS13__AFFINITY] = "JOB_CONTROL__JS13__AFFINITY", + [JOB_CONTROL__JS13__CONFIG] = "JOB_CONTROL__JS13__CONFIG", + [JOB_CONTROL__JS13__XAFFINITY] = "JOB_CONTROL__JS13__XAFFINITY", + [JOB_CONTROL__JS13__COMMAND] = "JOB_CONTROL__JS13__COMMAND", + [JOB_CONTROL__JS13__STATUS] = "JOB_CONTROL__JS13__STATUS", + [JOB_CONTROL__JS13__FLUSH_ID] = "JOB_CONTROL__JS13__FLUSH_ID", + [JOB_CONTROL__JS13__EVENT_0] = "JOB_CONTROL__JS13__EVENT_0", + [JOB_CONTROL__JS13__EVENT_1] = "JOB_CONTROL__JS13__EVENT_1", + [JOB_CONTROL__JS13__EVENT_MASK] = "JOB_CONTROL__JS13__EVENT_MASK", + [JOB_CONTROL__JS13__HEAD_NEXT] = "JOB_CONTROL__JS13__HEAD_NEXT", + [JOB_CONTROL__JS13__TAIL_NEXT] = "JOB_CONTROL__JS13__TAIL_NEXT", + [JOB_CONTROL__JS13__AFFINITY_NEXT] = "JOB_CONTROL__JS13__AFFINITY_NEXT", + [JOB_CONTROL__JS13__CONFIG_NEXT] = "JOB_CONTROL__JS13__CONFIG_NEXT", + [JOB_CONTROL__JS13__XAFFINITY_NEXT] = "JOB_CONTROL__JS13__XAFFINITY_NEXT", + [JOB_CONTROL__JS13__COMMAND_NEXT] = "JOB_CONTROL__JS13__COMMAND_NEXT", + [JOB_CONTROL__JS13__FLUSH_ID_NEXT] = "JOB_CONTROL__JS13__FLUSH_ID_NEXT", + [JOB_CONTROL__JS13__EVENT_MASK_NEXT] = "JOB_CONTROL__JS13__EVENT_MASK_NEXT", + [JOB_CONTROL__JS14__HEAD] = "JOB_CONTROL__JS14__HEAD", + [JOB_CONTROL__JS14__TAIL] = "JOB_CONTROL__JS14__TAIL", + [JOB_CONTROL__JS14__AFFINITY] = "JOB_CONTROL__JS14__AFFINITY", + [JOB_CONTROL__JS14__CONFIG] = "JOB_CONTROL__JS14__CONFIG", + [JOB_CONTROL__JS14__XAFFINITY] = "JOB_CONTROL__JS14__XAFFINITY", + [JOB_CONTROL__JS14__COMMAND] = "JOB_CONTROL__JS14__COMMAND", + [JOB_CONTROL__JS14__STATUS] = "JOB_CONTROL__JS14__STATUS", + [JOB_CONTROL__JS14__FLUSH_ID] = "JOB_CONTROL__JS14__FLUSH_ID", + [JOB_CONTROL__JS14__EVENT_0] = "JOB_CONTROL__JS14__EVENT_0", + [JOB_CONTROL__JS14__EVENT_1] = "JOB_CONTROL__JS14__EVENT_1", + [JOB_CONTROL__JS14__EVENT_MASK] = "JOB_CONTROL__JS14__EVENT_MASK", + [JOB_CONTROL__JS14__HEAD_NEXT] = "JOB_CONTROL__JS14__HEAD_NEXT", + [JOB_CONTROL__JS14__TAIL_NEXT] = "JOB_CONTROL__JS14__TAIL_NEXT", + [JOB_CONTROL__JS14__AFFINITY_NEXT] = "JOB_CONTROL__JS14__AFFINITY_NEXT", + [JOB_CONTROL__JS14__CONFIG_NEXT] = "JOB_CONTROL__JS14__CONFIG_NEXT", + [JOB_CONTROL__JS14__XAFFINITY_NEXT] = "JOB_CONTROL__JS14__XAFFINITY_NEXT", + [JOB_CONTROL__JS14__COMMAND_NEXT] = "JOB_CONTROL__JS14__COMMAND_NEXT", + [JOB_CONTROL__JS14__FLUSH_ID_NEXT] = "JOB_CONTROL__JS14__FLUSH_ID_NEXT", + [JOB_CONTROL__JS14__EVENT_MASK_NEXT] = "JOB_CONTROL__JS14__EVENT_MASK_NEXT", + [JOB_CONTROL__JS15__HEAD] = "JOB_CONTROL__JS15__HEAD", + [JOB_CONTROL__JS15__TAIL] = "JOB_CONTROL__JS15__TAIL", + [JOB_CONTROL__JS15__AFFINITY] = "JOB_CONTROL__JS15__AFFINITY", + [JOB_CONTROL__JS15__CONFIG] = "JOB_CONTROL__JS15__CONFIG", + [JOB_CONTROL__JS15__XAFFINITY] = "JOB_CONTROL__JS15__XAFFINITY", + [JOB_CONTROL__JS15__COMMAND] = "JOB_CONTROL__JS15__COMMAND", + [JOB_CONTROL__JS15__STATUS] = "JOB_CONTROL__JS15__STATUS", + [JOB_CONTROL__JS15__FLUSH_ID] = "JOB_CONTROL__JS15__FLUSH_ID", + [JOB_CONTROL__JS15__EVENT_0] = "JOB_CONTROL__JS15__EVENT_0", + [JOB_CONTROL__JS15__EVENT_1] = "JOB_CONTROL__JS15__EVENT_1", + [JOB_CONTROL__JS15__EVENT_MASK] = "JOB_CONTROL__JS15__EVENT_MASK", + [JOB_CONTROL__JS15__HEAD_NEXT] = "JOB_CONTROL__JS15__HEAD_NEXT", + [JOB_CONTROL__JS15__TAIL_NEXT] = "JOB_CONTROL__JS15__TAIL_NEXT", + [JOB_CONTROL__JS15__AFFINITY_NEXT] = "JOB_CONTROL__JS15__AFFINITY_NEXT", + [JOB_CONTROL__JS15__CONFIG_NEXT] = "JOB_CONTROL__JS15__CONFIG_NEXT", + [JOB_CONTROL__JS15__XAFFINITY_NEXT] = "JOB_CONTROL__JS15__XAFFINITY_NEXT", + [JOB_CONTROL__JS15__COMMAND_NEXT] = "JOB_CONTROL__JS15__COMMAND_NEXT", + [JOB_CONTROL__JS15__FLUSH_ID_NEXT] = "JOB_CONTROL__JS15__FLUSH_ID_NEXT", + [JOB_CONTROL__JS15__EVENT_MASK_NEXT] = "JOB_CONTROL__JS15__EVENT_MASK_NEXT", + [MMU_STAGE1__ST1MMU__IRQ_RAWSTAT] = "MMU_STAGE1__ST1MMU__IRQ_RAWSTAT", + [MMU_STAGE1__ST1MMU__IRQ_CLEAR] = "MMU_STAGE1__ST1MMU__IRQ_CLEAR", + [MMU_STAGE1__ST1MMU__IRQ_MASK] = "MMU_STAGE1__ST1MMU__IRQ_MASK", + [MMU_STAGE1__ST1MMU__IRQ_STATUS] = "MMU_STAGE1__ST1MMU__IRQ_STATUS", + [MMU_STAGE1__ST1MMU__AS0__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS0__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS0__MEMATTR] = "MMU_STAGE1__ST1MMU__AS0__MEMATTR", + [MMU_STAGE1__ST1MMU__AS0__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS0__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS0__COMMAND] = "MMU_STAGE1__ST1MMU__AS0__COMMAND", + [MMU_STAGE1__ST1MMU__AS0__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS0__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS0__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS0__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS0__STATUS] = "MMU_STAGE1__ST1MMU__AS0__STATUS", + [MMU_STAGE1__ST1MMU__AS0__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS0__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS0__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS0__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS1__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS1__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS1__MEMATTR] = "MMU_STAGE1__ST1MMU__AS1__MEMATTR", + [MMU_STAGE1__ST1MMU__AS1__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS1__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS1__COMMAND] = "MMU_STAGE1__ST1MMU__AS1__COMMAND", + [MMU_STAGE1__ST1MMU__AS1__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS1__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS1__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS1__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS1__STATUS] = "MMU_STAGE1__ST1MMU__AS1__STATUS", + [MMU_STAGE1__ST1MMU__AS1__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS1__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS1__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS1__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS2__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS2__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS2__MEMATTR] = "MMU_STAGE1__ST1MMU__AS2__MEMATTR", + [MMU_STAGE1__ST1MMU__AS2__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS2__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS2__COMMAND] = "MMU_STAGE1__ST1MMU__AS2__COMMAND", + [MMU_STAGE1__ST1MMU__AS2__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS2__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS2__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS2__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS2__STATUS] = "MMU_STAGE1__ST1MMU__AS2__STATUS", + [MMU_STAGE1__ST1MMU__AS2__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS2__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS2__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS2__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS3__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS3__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS3__MEMATTR] = "MMU_STAGE1__ST1MMU__AS3__MEMATTR", + [MMU_STAGE1__ST1MMU__AS3__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS3__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS3__COMMAND] = "MMU_STAGE1__ST1MMU__AS3__COMMAND", + [MMU_STAGE1__ST1MMU__AS3__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS3__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS3__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS3__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS3__STATUS] = "MMU_STAGE1__ST1MMU__AS3__STATUS", + [MMU_STAGE1__ST1MMU__AS3__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS3__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS3__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS3__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS4__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS4__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS4__MEMATTR] = "MMU_STAGE1__ST1MMU__AS4__MEMATTR", + [MMU_STAGE1__ST1MMU__AS4__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS4__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS4__COMMAND] = "MMU_STAGE1__ST1MMU__AS4__COMMAND", + [MMU_STAGE1__ST1MMU__AS4__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS4__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS4__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS4__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS4__STATUS] = "MMU_STAGE1__ST1MMU__AS4__STATUS", + [MMU_STAGE1__ST1MMU__AS4__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS4__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS4__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS4__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS5__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS5__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS5__MEMATTR] = "MMU_STAGE1__ST1MMU__AS5__MEMATTR", + [MMU_STAGE1__ST1MMU__AS5__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS5__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS5__COMMAND] = "MMU_STAGE1__ST1MMU__AS5__COMMAND", + [MMU_STAGE1__ST1MMU__AS5__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS5__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS5__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS5__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS5__STATUS] = "MMU_STAGE1__ST1MMU__AS5__STATUS", + [MMU_STAGE1__ST1MMU__AS5__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS5__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS5__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS5__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS6__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS6__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS6__MEMATTR] = "MMU_STAGE1__ST1MMU__AS6__MEMATTR", + [MMU_STAGE1__ST1MMU__AS6__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS6__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS6__COMMAND] = "MMU_STAGE1__ST1MMU__AS6__COMMAND", + [MMU_STAGE1__ST1MMU__AS6__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS6__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS6__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS6__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS6__STATUS] = "MMU_STAGE1__ST1MMU__AS6__STATUS", + [MMU_STAGE1__ST1MMU__AS6__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS6__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS6__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS6__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS7__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS7__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS7__MEMATTR] = "MMU_STAGE1__ST1MMU__AS7__MEMATTR", + [MMU_STAGE1__ST1MMU__AS7__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS7__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS7__COMMAND] = "MMU_STAGE1__ST1MMU__AS7__COMMAND", + [MMU_STAGE1__ST1MMU__AS7__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS7__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS7__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS7__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS7__STATUS] = "MMU_STAGE1__ST1MMU__AS7__STATUS", + [MMU_STAGE1__ST1MMU__AS7__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS7__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS7__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS7__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS8__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS8__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS8__MEMATTR] = "MMU_STAGE1__ST1MMU__AS8__MEMATTR", + [MMU_STAGE1__ST1MMU__AS8__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS8__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS8__COMMAND] = "MMU_STAGE1__ST1MMU__AS8__COMMAND", + [MMU_STAGE1__ST1MMU__AS8__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS8__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS8__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS8__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS8__STATUS] = "MMU_STAGE1__ST1MMU__AS8__STATUS", + [MMU_STAGE1__ST1MMU__AS8__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS8__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS8__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS8__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS9__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS9__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS9__MEMATTR] = "MMU_STAGE1__ST1MMU__AS9__MEMATTR", + [MMU_STAGE1__ST1MMU__AS9__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS9__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS9__COMMAND] = "MMU_STAGE1__ST1MMU__AS9__COMMAND", + [MMU_STAGE1__ST1MMU__AS9__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS9__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS9__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS9__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS9__STATUS] = "MMU_STAGE1__ST1MMU__AS9__STATUS", + [MMU_STAGE1__ST1MMU__AS9__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS9__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS9__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS9__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS10__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS10__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS10__MEMATTR] = "MMU_STAGE1__ST1MMU__AS10__MEMATTR", + [MMU_STAGE1__ST1MMU__AS10__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS10__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS10__COMMAND] = "MMU_STAGE1__ST1MMU__AS10__COMMAND", + [MMU_STAGE1__ST1MMU__AS10__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS10__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS10__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS10__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS10__STATUS] = "MMU_STAGE1__ST1MMU__AS10__STATUS", + [MMU_STAGE1__ST1MMU__AS10__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS10__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS10__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS10__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS11__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS11__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS11__MEMATTR] = "MMU_STAGE1__ST1MMU__AS11__MEMATTR", + [MMU_STAGE1__ST1MMU__AS11__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS11__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS11__COMMAND] = "MMU_STAGE1__ST1MMU__AS11__COMMAND", + [MMU_STAGE1__ST1MMU__AS11__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS11__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS11__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS11__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS11__STATUS] = "MMU_STAGE1__ST1MMU__AS11__STATUS", + [MMU_STAGE1__ST1MMU__AS11__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS11__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS11__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS11__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS12__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS12__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS12__MEMATTR] = "MMU_STAGE1__ST1MMU__AS12__MEMATTR", + [MMU_STAGE1__ST1MMU__AS12__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS12__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS12__COMMAND] = "MMU_STAGE1__ST1MMU__AS12__COMMAND", + [MMU_STAGE1__ST1MMU__AS12__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS12__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS12__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS12__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS12__STATUS] = "MMU_STAGE1__ST1MMU__AS12__STATUS", + [MMU_STAGE1__ST1MMU__AS12__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS12__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS12__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS12__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS13__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS13__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS13__MEMATTR] = "MMU_STAGE1__ST1MMU__AS13__MEMATTR", + [MMU_STAGE1__ST1MMU__AS13__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS13__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS13__COMMAND] = "MMU_STAGE1__ST1MMU__AS13__COMMAND", + [MMU_STAGE1__ST1MMU__AS13__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS13__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS13__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS13__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS13__STATUS] = "MMU_STAGE1__ST1MMU__AS13__STATUS", + [MMU_STAGE1__ST1MMU__AS13__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS13__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS13__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS13__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS14__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS14__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS14__MEMATTR] = "MMU_STAGE1__ST1MMU__AS14__MEMATTR", + [MMU_STAGE1__ST1MMU__AS14__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS14__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS14__COMMAND] = "MMU_STAGE1__ST1MMU__AS14__COMMAND", + [MMU_STAGE1__ST1MMU__AS14__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS14__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS14__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS14__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS14__STATUS] = "MMU_STAGE1__ST1MMU__AS14__STATUS", + [MMU_STAGE1__ST1MMU__AS14__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS14__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS14__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS14__FAULTEXTRA", + [MMU_STAGE1__ST1MMU__AS15__TRANSTAB] = "MMU_STAGE1__ST1MMU__AS15__TRANSTAB", + [MMU_STAGE1__ST1MMU__AS15__MEMATTR] = "MMU_STAGE1__ST1MMU__AS15__MEMATTR", + [MMU_STAGE1__ST1MMU__AS15__LOCKADDR] = "MMU_STAGE1__ST1MMU__AS15__LOCKADDR", + [MMU_STAGE1__ST1MMU__AS15__COMMAND] = "MMU_STAGE1__ST1MMU__AS15__COMMAND", + [MMU_STAGE1__ST1MMU__AS15__FAULTSTATUS] = "MMU_STAGE1__ST1MMU__AS15__FAULTSTATUS", + [MMU_STAGE1__ST1MMU__AS15__FAULTADDRESS] = "MMU_STAGE1__ST1MMU__AS15__FAULTADDRESS", + [MMU_STAGE1__ST1MMU__AS15__STATUS] = "MMU_STAGE1__ST1MMU__AS15__STATUS", + [MMU_STAGE1__ST1MMU__AS15__TRANSCFG] = "MMU_STAGE1__ST1MMU__AS15__TRANSCFG", + [MMU_STAGE1__ST1MMU__AS15__FAULTEXTRA] = "MMU_STAGE1__ST1MMU__AS15__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__IRQ_RAWSTAT] = "MMU_STAGE2__ST2MMU__IRQ_RAWSTAT", + [MMU_STAGE2__ST2MMU__IRQ_CLEAR] = "MMU_STAGE2__ST2MMU__IRQ_CLEAR", + [MMU_STAGE2__ST2MMU__IRQ_MASK] = "MMU_STAGE2__ST2MMU__IRQ_MASK", + [MMU_STAGE2__ST2MMU__IRQ_STATUS] = "MMU_STAGE2__ST2MMU__IRQ_STATUS", + [MMU_STAGE2__ST2MMU__AS0__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS0__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS0__MEMATTR] = "MMU_STAGE2__ST2MMU__AS0__MEMATTR", + [MMU_STAGE2__ST2MMU__AS0__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS0__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS0__COMMAND] = "MMU_STAGE2__ST2MMU__AS0__COMMAND", + [MMU_STAGE2__ST2MMU__AS0__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS0__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS0__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS0__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS0__STATUS] = "MMU_STAGE2__ST2MMU__AS0__STATUS", + [MMU_STAGE2__ST2MMU__AS0__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS0__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS0__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS0__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS1__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS1__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS1__MEMATTR] = "MMU_STAGE2__ST2MMU__AS1__MEMATTR", + [MMU_STAGE2__ST2MMU__AS1__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS1__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS1__COMMAND] = "MMU_STAGE2__ST2MMU__AS1__COMMAND", + [MMU_STAGE2__ST2MMU__AS1__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS1__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS1__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS1__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS1__STATUS] = "MMU_STAGE2__ST2MMU__AS1__STATUS", + [MMU_STAGE2__ST2MMU__AS1__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS1__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS1__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS1__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS2__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS2__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS2__MEMATTR] = "MMU_STAGE2__ST2MMU__AS2__MEMATTR", + [MMU_STAGE2__ST2MMU__AS2__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS2__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS2__COMMAND] = "MMU_STAGE2__ST2MMU__AS2__COMMAND", + [MMU_STAGE2__ST2MMU__AS2__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS2__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS2__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS2__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS2__STATUS] = "MMU_STAGE2__ST2MMU__AS2__STATUS", + [MMU_STAGE2__ST2MMU__AS2__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS2__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS2__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS2__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS3__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS3__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS3__MEMATTR] = "MMU_STAGE2__ST2MMU__AS3__MEMATTR", + [MMU_STAGE2__ST2MMU__AS3__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS3__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS3__COMMAND] = "MMU_STAGE2__ST2MMU__AS3__COMMAND", + [MMU_STAGE2__ST2MMU__AS3__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS3__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS3__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS3__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS3__STATUS] = "MMU_STAGE2__ST2MMU__AS3__STATUS", + [MMU_STAGE2__ST2MMU__AS3__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS3__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS3__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS3__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS4__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS4__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS4__MEMATTR] = "MMU_STAGE2__ST2MMU__AS4__MEMATTR", + [MMU_STAGE2__ST2MMU__AS4__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS4__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS4__COMMAND] = "MMU_STAGE2__ST2MMU__AS4__COMMAND", + [MMU_STAGE2__ST2MMU__AS4__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS4__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS4__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS4__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS4__STATUS] = "MMU_STAGE2__ST2MMU__AS4__STATUS", + [MMU_STAGE2__ST2MMU__AS4__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS4__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS4__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS4__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS5__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS5__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS5__MEMATTR] = "MMU_STAGE2__ST2MMU__AS5__MEMATTR", + [MMU_STAGE2__ST2MMU__AS5__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS5__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS5__COMMAND] = "MMU_STAGE2__ST2MMU__AS5__COMMAND", + [MMU_STAGE2__ST2MMU__AS5__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS5__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS5__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS5__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS5__STATUS] = "MMU_STAGE2__ST2MMU__AS5__STATUS", + [MMU_STAGE2__ST2MMU__AS5__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS5__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS5__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS5__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS6__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS6__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS6__MEMATTR] = "MMU_STAGE2__ST2MMU__AS6__MEMATTR", + [MMU_STAGE2__ST2MMU__AS6__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS6__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS6__COMMAND] = "MMU_STAGE2__ST2MMU__AS6__COMMAND", + [MMU_STAGE2__ST2MMU__AS6__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS6__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS6__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS6__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS6__STATUS] = "MMU_STAGE2__ST2MMU__AS6__STATUS", + [MMU_STAGE2__ST2MMU__AS6__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS6__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS6__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS6__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS7__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS7__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS7__MEMATTR] = "MMU_STAGE2__ST2MMU__AS7__MEMATTR", + [MMU_STAGE2__ST2MMU__AS7__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS7__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS7__COMMAND] = "MMU_STAGE2__ST2MMU__AS7__COMMAND", + [MMU_STAGE2__ST2MMU__AS7__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS7__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS7__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS7__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS7__STATUS] = "MMU_STAGE2__ST2MMU__AS7__STATUS", + [MMU_STAGE2__ST2MMU__AS7__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS7__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS7__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS7__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS8__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS8__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS8__MEMATTR] = "MMU_STAGE2__ST2MMU__AS8__MEMATTR", + [MMU_STAGE2__ST2MMU__AS8__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS8__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS8__COMMAND] = "MMU_STAGE2__ST2MMU__AS8__COMMAND", + [MMU_STAGE2__ST2MMU__AS8__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS8__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS8__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS8__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS8__STATUS] = "MMU_STAGE2__ST2MMU__AS8__STATUS", + [MMU_STAGE2__ST2MMU__AS8__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS8__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS8__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS8__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS9__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS9__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS9__MEMATTR] = "MMU_STAGE2__ST2MMU__AS9__MEMATTR", + [MMU_STAGE2__ST2MMU__AS9__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS9__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS9__COMMAND] = "MMU_STAGE2__ST2MMU__AS9__COMMAND", + [MMU_STAGE2__ST2MMU__AS9__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS9__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS9__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS9__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS9__STATUS] = "MMU_STAGE2__ST2MMU__AS9__STATUS", + [MMU_STAGE2__ST2MMU__AS9__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS9__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS9__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS9__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS10__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS10__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS10__MEMATTR] = "MMU_STAGE2__ST2MMU__AS10__MEMATTR", + [MMU_STAGE2__ST2MMU__AS10__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS10__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS10__COMMAND] = "MMU_STAGE2__ST2MMU__AS10__COMMAND", + [MMU_STAGE2__ST2MMU__AS10__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS10__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS10__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS10__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS10__STATUS] = "MMU_STAGE2__ST2MMU__AS10__STATUS", + [MMU_STAGE2__ST2MMU__AS10__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS10__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS10__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS10__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS11__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS11__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS11__MEMATTR] = "MMU_STAGE2__ST2MMU__AS11__MEMATTR", + [MMU_STAGE2__ST2MMU__AS11__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS11__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS11__COMMAND] = "MMU_STAGE2__ST2MMU__AS11__COMMAND", + [MMU_STAGE2__ST2MMU__AS11__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS11__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS11__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS11__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS11__STATUS] = "MMU_STAGE2__ST2MMU__AS11__STATUS", + [MMU_STAGE2__ST2MMU__AS11__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS11__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS11__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS11__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS12__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS12__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS12__MEMATTR] = "MMU_STAGE2__ST2MMU__AS12__MEMATTR", + [MMU_STAGE2__ST2MMU__AS12__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS12__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS12__COMMAND] = "MMU_STAGE2__ST2MMU__AS12__COMMAND", + [MMU_STAGE2__ST2MMU__AS12__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS12__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS12__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS12__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS12__STATUS] = "MMU_STAGE2__ST2MMU__AS12__STATUS", + [MMU_STAGE2__ST2MMU__AS12__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS12__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS12__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS12__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS13__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS13__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS13__MEMATTR] = "MMU_STAGE2__ST2MMU__AS13__MEMATTR", + [MMU_STAGE2__ST2MMU__AS13__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS13__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS13__COMMAND] = "MMU_STAGE2__ST2MMU__AS13__COMMAND", + [MMU_STAGE2__ST2MMU__AS13__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS13__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS13__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS13__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS13__STATUS] = "MMU_STAGE2__ST2MMU__AS13__STATUS", + [MMU_STAGE2__ST2MMU__AS13__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS13__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS13__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS13__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS14__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS14__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS14__MEMATTR] = "MMU_STAGE2__ST2MMU__AS14__MEMATTR", + [MMU_STAGE2__ST2MMU__AS14__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS14__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS14__COMMAND] = "MMU_STAGE2__ST2MMU__AS14__COMMAND", + [MMU_STAGE2__ST2MMU__AS14__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS14__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS14__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS14__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS14__STATUS] = "MMU_STAGE2__ST2MMU__AS14__STATUS", + [MMU_STAGE2__ST2MMU__AS14__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS14__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS14__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS14__FAULTEXTRA", + [MMU_STAGE2__ST2MMU__AS15__TRANSTAB] = "MMU_STAGE2__ST2MMU__AS15__TRANSTAB", + [MMU_STAGE2__ST2MMU__AS15__MEMATTR] = "MMU_STAGE2__ST2MMU__AS15__MEMATTR", + [MMU_STAGE2__ST2MMU__AS15__LOCKADDR] = "MMU_STAGE2__ST2MMU__AS15__LOCKADDR", + [MMU_STAGE2__ST2MMU__AS15__COMMAND] = "MMU_STAGE2__ST2MMU__AS15__COMMAND", + [MMU_STAGE2__ST2MMU__AS15__FAULTSTATUS] = "MMU_STAGE2__ST2MMU__AS15__FAULTSTATUS", + [MMU_STAGE2__ST2MMU__AS15__FAULTADDRESS] = "MMU_STAGE2__ST2MMU__AS15__FAULTADDRESS", + [MMU_STAGE2__ST2MMU__AS15__STATUS] = "MMU_STAGE2__ST2MMU__AS15__STATUS", + [MMU_STAGE2__ST2MMU__AS15__TRANSCFG] = "MMU_STAGE2__ST2MMU__AS15__TRANSCFG", + [MMU_STAGE2__ST2MMU__AS15__FAULTEXTRA] = "MMU_STAGE2__ST2MMU__AS15__FAULTEXTRA", + [GPU_CONTROL__REVIDR] = "GPU_CONTROL__REVIDR", + [GPU_CONTROL__STACK_PRESENT] = "GPU_CONTROL__STACK_PRESENT", + [GPU_CONTROL__STACK_PWROFF] = "GPU_CONTROL__STACK_PWROFF", + [GPU_CONTROL__STACK_PWRON] = "GPU_CONTROL__STACK_PWRON", + [GPU_CONTROL__STACK_PWRTRANS] = "GPU_CONTROL__STACK_PWRTRANS", + [GPU_CONTROL__STACK_READY] = "GPU_CONTROL__STACK_READY", + [GPU_CONTROL__TEXTURE_FEATURES_3] = "GPU_CONTROL__TEXTURE_FEATURES_3", + [GPU_CONTROL__CORE_FEATURES] = "GPU_CONTROL__CORE_FEATURES", + [GPU_CONTROL__THREAD_TLS_ALLOC] = "GPU_CONTROL__THREAD_TLS_ALLOC", + [GPU_CONTROL__L2_CONFIG] = "GPU_CONTROL__L2_CONFIG", +}; + +const char *kbase_reg_get_enum_string(u32 reg_enum) +{ + if (reg_enum >= ARRAY_SIZE(enum_strings)) + return "INVALID_REG"; + return enum_strings[reg_enum]; +} +#endif /* CONFIG_MALI_BIFROST_DEBUG */ diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h new file mode 100644 index 000000000000..f5618c4794db --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h @@ -0,0 +1,762 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * This header is autogenerated. Avoid modifying this file unless absolutely + * necessary. + */ + +#ifndef _MALI_KBASE_REGMAP_JM_ENUMS_H_ +#define _MALI_KBASE_REGMAP_JM_ENUMS_H_ + +#if MALI_USE_CSF +#error "Cannot be compiled with CSF" +#endif + +enum kbase_regmap_enum_v6_0 { + GPU_CONTROL__GPU_ID = 0, /* (RO) 32-bit 0x0 */ + GPU_CONTROL__L2_FEATURES, /* (RO) 32-bit 0x4 */ + GPU_CONTROL__TILER_FEATURES, /* (RO) 32-bit 0xC */ + GPU_CONTROL__MEM_FEATURES, /* (RO) 32-bit 0x10 */ + GPU_CONTROL__MMU_FEATURES, /* (RO) 32-bit 0x14 */ + GPU_CONTROL__AS_PRESENT, /* (RO) 32-bit 0x18 */ + GPU_CONTROL__JS_PRESENT, /* (RO) 32-bit 0x1C */ + GPU_CONTROL__GPU_IRQ_RAWSTAT, /* (RW) 32-bit 0x20 */ + GPU_CONTROL__GPU_IRQ_CLEAR, /* (WO) 32-bit 0x24 */ + GPU_CONTROL__GPU_IRQ_MASK, /* (RW) 32-bit 0x28 */ + GPU_CONTROL__GPU_IRQ_STATUS, /* (RO) 32-bit 0x2C */ + GPU_CONTROL__GPU_COMMAND, /* (WO) 32-bit 0x30 */ + GPU_CONTROL__GPU_STATUS, /* (RO) 32-bit 0x34 */ + GPU_CONTROL__LATEST_FLUSH, /* (RO) 32-bit 0x38 */ + GPU_CONTROL__GPU_FAULTSTATUS, /* (RO) 32-bit 0x3C */ + GPU_CONTROL__GPU_FAULTADDRESS, /* (RO) 64-bit 0x40 */ + GPU_CONTROL__AFBC_FEATURES, /* (RO) 32-bit 0x4C */ + GPU_CONTROL__PWR_KEY, /* (WO) 32-bit 0x50 */ + GPU_CONTROL__PWR_OVERRIDE0, /* (RW) 32-bit 0x54 */ + GPU_CONTROL__PWR_OVERRIDE1, /* (RW) 32-bit 0x58 */ + GPU_CONTROL__PRFCNT_BASE, /* (RW) 64-bit 0x60 */ + GPU_CONTROL__PRFCNT_CONFIG, /* (RW) 32-bit 0x68 */ + GPU_CONTROL__PRFCNT_JM_EN, /* (RW) 32-bit 0x6C */ + GPU_CONTROL__PRFCNT_SHADER_EN, /* (RW) 32-bit 0x70 */ + GPU_CONTROL__PRFCNT_TILER_EN, /* (RW) 32-bit 0x74 */ + GPU_CONTROL__PRFCNT_MMU_L2_EN, /* (RW) 32-bit 0x7C */ + GPU_CONTROL__CYCLE_COUNT, /* (RO) 64-bit 0x90 */ + GPU_CONTROL__TIMESTAMP, /* (RO) 64-bit 0x98 */ + GPU_CONTROL__THREAD_MAX_THREADS, /* (RO) 32-bit 0xA0 */ + GPU_CONTROL__THREAD_MAX_WORKGROUP_SIZE, /* (RO) 32-bit 0xA4 */ + GPU_CONTROL__THREAD_MAX_BARRIER_SIZE, /* (RO) 32-bit 0xA8 */ + GPU_CONTROL__THREAD_FEATURES, /* (RO) 32-bit 0xAC */ + GPU_CONTROL__TEXTURE_FEATURES_0, /* (RO) 32-bit 0xB0 */ + GPU_CONTROL__TEXTURE_FEATURES_1, /* (RO) 32-bit 0xB4 */ + GPU_CONTROL__TEXTURE_FEATURES_2, /* (RO) 32-bit 0xB8 */ + GPU_CONTROL__JS0_FEATURES, /* (RO) 32-bit 0xC0 */ + GPU_CONTROL__JS1_FEATURES, /* (RO) 32-bit 0xC4 */ + GPU_CONTROL__JS2_FEATURES, /* (RO) 32-bit 0xC8 */ + GPU_CONTROL__JS3_FEATURES, /* (RO) 32-bit 0xCC */ + GPU_CONTROL__JS4_FEATURES, /* (RO) 32-bit 0xD0 */ + GPU_CONTROL__JS5_FEATURES, /* (RO) 32-bit 0xD4 */ + GPU_CONTROL__JS6_FEATURES, /* (RO) 32-bit 0xD8 */ + GPU_CONTROL__JS7_FEATURES, /* (RO) 32-bit 0xDC */ + GPU_CONTROL__JS8_FEATURES, /* (RO) 32-bit 0xE0 */ + GPU_CONTROL__JS9_FEATURES, /* (RO) 32-bit 0xE4 */ + GPU_CONTROL__JS10_FEATURES, /* (RO) 32-bit 0xE8 */ + GPU_CONTROL__JS11_FEATURES, /* (RO) 32-bit 0xEC */ + GPU_CONTROL__JS12_FEATURES, /* (RO) 32-bit 0xF0 */ + GPU_CONTROL__JS13_FEATURES, /* (RO) 32-bit 0xF4 */ + GPU_CONTROL__JS14_FEATURES, /* (RO) 32-bit 0xF8 */ + GPU_CONTROL__JS15_FEATURES, /* (RO) 32-bit 0xFC */ + GPU_CONTROL__SHADER_PRESENT, /* (RO) 64-bit 0x100 */ + GPU_CONTROL__TILER_PRESENT, /* (RO) 64-bit 0x110 */ + GPU_CONTROL__L2_PRESENT, /* (RO) 64-bit 0x120 */ + GPU_CONTROL__SHADER_READY, /* (RO) 64-bit 0x140 */ + GPU_CONTROL__TILER_READY, /* (RO) 64-bit 0x150 */ + GPU_CONTROL__L2_READY, /* (RO) 64-bit 0x160 */ + GPU_CONTROL__SHADER_PWRON, /* (WO) 64-bit 0x180 */ + GPU_CONTROL__TILER_PWRON, /* (WO) 64-bit 0x190 */ + GPU_CONTROL__L2_PWRON, /* (WO) 64-bit 0x1A0 */ + GPU_CONTROL__SHADER_PWROFF, /* (WO) 64-bit 0x1C0 */ + GPU_CONTROL__TILER_PWROFF, /* (WO) 64-bit 0x1D0 */ + GPU_CONTROL__L2_PWROFF, /* (WO) 64-bit 0x1E0 */ + GPU_CONTROL__SHADER_PWRTRANS, /* (RO) 64-bit 0x200 */ + GPU_CONTROL__TILER_PWRTRANS, /* (RO) 64-bit 0x210 */ + GPU_CONTROL__L2_PWRTRANS, /* (RO) 64-bit 0x220 */ + GPU_CONTROL__SHADER_PWRACTIVE, /* (RO) 64-bit 0x240 */ + GPU_CONTROL__TILER_PWRACTIVE, /* (RO) 64-bit 0x250 */ + GPU_CONTROL__L2_PWRACTIVE, /* (RO) 64-bit 0x260 */ + GPU_CONTROL__COHERENCY_FEATURES, /* (RO) 32-bit 0x300 */ + GPU_CONTROL__COHERENCY_ENABLE, /* (RW) 32-bit 0x304 */ + GPU_CONTROL__GPU_USER_INn, /* (RO) 32-bit 0x400 */ + GPU_CONTROL__GPU_USER_OUTn, /* (RW) 32-bit 0x500 */ + GPU_CONTROL__JM_CONFIG, /* (RW) 32-bit 0xF00 */ + GPU_CONTROL__SHADER_CONFIG, /* (RW) 32-bit 0xF04 */ + GPU_CONTROL__TILER_CONFIG, /* (RW) 32-bit 0xF08 */ + GPU_CONTROL__L2_MMU_CONFIG, /* (RW) 32-bit 0xF0C */ + JOB_CONTROL__JOB_IRQ_RAWSTAT, /* (RW) 32-bit 0x1000 */ + JOB_CONTROL__JOB_IRQ_CLEAR, /* (WO) 32-bit 0x1004 */ + JOB_CONTROL__JOB_IRQ_MASK, /* (RW) 32-bit 0x1008 */ + JOB_CONTROL__JOB_IRQ_STATUS, /* (RO) 32-bit 0x100C */ + JOB_CONTROL__JOB_IRQ_JS_STATE, /* (RO) 32-bit 0x1010 */ + JOB_CONTROL__JOB_IRQ_THROTTLE, /* (RW) 32-bit 0x1014 */ + JOB_CONTROL__EVENT_IRQ_RAWSTAT, /* (RW) 32-bit 0x1020 */ + JOB_CONTROL__EVENT_IRQ_CLEAR, /* (WO) 32-bit 0x1024 */ + JOB_CONTROL__EVENT_IRQ_MASK, /* (RW) 32-bit 0x1028 */ + JOB_CONTROL__EVENT_IRQ_STATUS, /* (RO) 32-bit 0x102C */ + JOB_CONTROL__JS0__HEAD, /* (RO) 64-bit 0x1800 */ + JOB_CONTROL__JS0__TAIL, /* (RO) 64-bit 0x1808 */ + JOB_CONTROL__JS0__AFFINITY, /* (RO) 64-bit 0x1810 */ + JOB_CONTROL__JS0__CONFIG, /* (RO) 32-bit 0x1818 */ + JOB_CONTROL__JS0__XAFFINITY, /* (RO) 32-bit 0x181C */ + JOB_CONTROL__JS0__COMMAND, /* (RW) 32-bit 0x1820 */ + JOB_CONTROL__JS0__STATUS, /* (RO) 32-bit 0x1824 */ + JOB_CONTROL__JS0__FLUSH_ID, /* (RO) 32-bit 0x1830 */ + JOB_CONTROL__JS0__EVENT_0, /* (RW) 32-bit 0x1834 */ + JOB_CONTROL__JS0__EVENT_1, /* (RW) 32-bit 0x1838 */ + JOB_CONTROL__JS0__EVENT_MASK, /* (RW) 32-bit 0x183C */ + JOB_CONTROL__JS0__HEAD_NEXT, /* (RW) 64-bit 0x1840 */ + JOB_CONTROL__JS0__TAIL_NEXT, /* (RW) 64-bit 0x1848 */ + JOB_CONTROL__JS0__AFFINITY_NEXT, /* (RW) 64-bit 0x1850 */ + JOB_CONTROL__JS0__CONFIG_NEXT, /* (RW) 32-bit 0x1858 */ + JOB_CONTROL__JS0__XAFFINITY_NEXT, /* (RW) 32-bit 0x185C */ + JOB_CONTROL__JS0__COMMAND_NEXT, /* (RW) 32-bit 0x1860 */ + JOB_CONTROL__JS0__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1870 */ + JOB_CONTROL__JS0__EVENT_MASK_NEXT, /* (RW) 32-bit 0x187C */ + JOB_CONTROL__JS1__HEAD, /* (RO) 64-bit 0x1880 */ + JOB_CONTROL__JS1__TAIL, /* (RO) 64-bit 0x1888 */ + JOB_CONTROL__JS1__AFFINITY, /* (RO) 64-bit 0x1890 */ + JOB_CONTROL__JS1__CONFIG, /* (RO) 32-bit 0x1898 */ + JOB_CONTROL__JS1__XAFFINITY, /* (RO) 32-bit 0x189C */ + JOB_CONTROL__JS1__COMMAND, /* (RW) 32-bit 0x18A0 */ + JOB_CONTROL__JS1__STATUS, /* (RO) 32-bit 0x18A4 */ + JOB_CONTROL__JS1__FLUSH_ID, /* (RO) 32-bit 0x18B0 */ + JOB_CONTROL__JS1__EVENT_0, /* (RW) 32-bit 0x18B4 */ + JOB_CONTROL__JS1__EVENT_1, /* (RW) 32-bit 0x18B8 */ + JOB_CONTROL__JS1__EVENT_MASK, /* (RW) 32-bit 0x18BC */ + JOB_CONTROL__JS1__HEAD_NEXT, /* (RW) 64-bit 0x18C0 */ + JOB_CONTROL__JS1__TAIL_NEXT, /* (RW) 64-bit 0x18C8 */ + JOB_CONTROL__JS1__AFFINITY_NEXT, /* (RW) 64-bit 0x18D0 */ + JOB_CONTROL__JS1__CONFIG_NEXT, /* (RW) 32-bit 0x18D8 */ + JOB_CONTROL__JS1__XAFFINITY_NEXT, /* (RW) 32-bit 0x18DC */ + JOB_CONTROL__JS1__COMMAND_NEXT, /* (RW) 32-bit 0x18E0 */ + JOB_CONTROL__JS1__FLUSH_ID_NEXT, /* (RW) 32-bit 0x18F0 */ + JOB_CONTROL__JS1__EVENT_MASK_NEXT, /* (RW) 32-bit 0x18FC */ + JOB_CONTROL__JS2__HEAD, /* (RO) 64-bit 0x1900 */ + JOB_CONTROL__JS2__TAIL, /* (RO) 64-bit 0x1908 */ + JOB_CONTROL__JS2__AFFINITY, /* (RO) 64-bit 0x1910 */ + JOB_CONTROL__JS2__CONFIG, /* (RO) 32-bit 0x1918 */ + JOB_CONTROL__JS2__XAFFINITY, /* (RO) 32-bit 0x191C */ + JOB_CONTROL__JS2__COMMAND, /* (RW) 32-bit 0x1920 */ + JOB_CONTROL__JS2__STATUS, /* (RO) 32-bit 0x1924 */ + JOB_CONTROL__JS2__FLUSH_ID, /* (RO) 32-bit 0x1930 */ + JOB_CONTROL__JS2__EVENT_0, /* (RW) 32-bit 0x1934 */ + JOB_CONTROL__JS2__EVENT_1, /* (RW) 32-bit 0x1938 */ + JOB_CONTROL__JS2__EVENT_MASK, /* (RW) 32-bit 0x193C */ + JOB_CONTROL__JS2__HEAD_NEXT, /* (RW) 64-bit 0x1940 */ + JOB_CONTROL__JS2__TAIL_NEXT, /* (RW) 64-bit 0x1948 */ + JOB_CONTROL__JS2__AFFINITY_NEXT, /* (RW) 64-bit 0x1950 */ + JOB_CONTROL__JS2__CONFIG_NEXT, /* (RW) 32-bit 0x1958 */ + JOB_CONTROL__JS2__XAFFINITY_NEXT, /* (RW) 32-bit 0x195C */ + JOB_CONTROL__JS2__COMMAND_NEXT, /* (RW) 32-bit 0x1960 */ + JOB_CONTROL__JS2__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1970 */ + JOB_CONTROL__JS2__EVENT_MASK_NEXT, /* (RW) 32-bit 0x197C */ + JOB_CONTROL__JS3__HEAD, /* (RO) 64-bit 0x1980 */ + JOB_CONTROL__JS3__TAIL, /* (RO) 64-bit 0x1988 */ + JOB_CONTROL__JS3__AFFINITY, /* (RO) 64-bit 0x1990 */ + JOB_CONTROL__JS3__CONFIG, /* (RO) 32-bit 0x1998 */ + JOB_CONTROL__JS3__XAFFINITY, /* (RO) 32-bit 0x199C */ + JOB_CONTROL__JS3__COMMAND, /* (RW) 32-bit 0x19A0 */ + JOB_CONTROL__JS3__STATUS, /* (RO) 32-bit 0x19A4 */ + JOB_CONTROL__JS3__FLUSH_ID, /* (RO) 32-bit 0x19B0 */ + JOB_CONTROL__JS3__EVENT_0, /* (RW) 32-bit 0x19B4 */ + JOB_CONTROL__JS3__EVENT_1, /* (RW) 32-bit 0x19B8 */ + JOB_CONTROL__JS3__EVENT_MASK, /* (RW) 32-bit 0x19BC */ + JOB_CONTROL__JS3__HEAD_NEXT, /* (RW) 64-bit 0x19C0 */ + JOB_CONTROL__JS3__TAIL_NEXT, /* (RW) 64-bit 0x19C8 */ + JOB_CONTROL__JS3__AFFINITY_NEXT, /* (RW) 64-bit 0x19D0 */ + JOB_CONTROL__JS3__CONFIG_NEXT, /* (RW) 32-bit 0x19D8 */ + JOB_CONTROL__JS3__XAFFINITY_NEXT, /* (RW) 32-bit 0x19DC */ + JOB_CONTROL__JS3__COMMAND_NEXT, /* (RW) 32-bit 0x19E0 */ + JOB_CONTROL__JS3__FLUSH_ID_NEXT, /* (RW) 32-bit 0x19F0 */ + JOB_CONTROL__JS3__EVENT_MASK_NEXT, /* (RW) 32-bit 0x19FC */ + JOB_CONTROL__JS4__HEAD, /* (RO) 64-bit 0x1A00 */ + JOB_CONTROL__JS4__TAIL, /* (RO) 64-bit 0x1A08 */ + JOB_CONTROL__JS4__AFFINITY, /* (RO) 64-bit 0x1A10 */ + JOB_CONTROL__JS4__CONFIG, /* (RO) 32-bit 0x1A18 */ + JOB_CONTROL__JS4__XAFFINITY, /* (RO) 32-bit 0x1A1C */ + JOB_CONTROL__JS4__COMMAND, /* (RW) 32-bit 0x1A20 */ + JOB_CONTROL__JS4__STATUS, /* (RO) 32-bit 0x1A24 */ + JOB_CONTROL__JS4__FLUSH_ID, /* (RO) 32-bit 0x1A30 */ + JOB_CONTROL__JS4__EVENT_0, /* (RW) 32-bit 0x1A34 */ + JOB_CONTROL__JS4__EVENT_1, /* (RW) 32-bit 0x1A38 */ + JOB_CONTROL__JS4__EVENT_MASK, /* (RW) 32-bit 0x1A3C */ + JOB_CONTROL__JS4__HEAD_NEXT, /* (RW) 64-bit 0x1A40 */ + JOB_CONTROL__JS4__TAIL_NEXT, /* (RW) 64-bit 0x1A48 */ + JOB_CONTROL__JS4__AFFINITY_NEXT, /* (RW) 64-bit 0x1A50 */ + JOB_CONTROL__JS4__CONFIG_NEXT, /* (RW) 32-bit 0x1A58 */ + JOB_CONTROL__JS4__XAFFINITY_NEXT, /* (RW) 32-bit 0x1A5C */ + JOB_CONTROL__JS4__COMMAND_NEXT, /* (RW) 32-bit 0x1A60 */ + JOB_CONTROL__JS4__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1A70 */ + JOB_CONTROL__JS4__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1A7C */ + JOB_CONTROL__JS5__HEAD, /* (RO) 64-bit 0x1A80 */ + JOB_CONTROL__JS5__TAIL, /* (RO) 64-bit 0x1A88 */ + JOB_CONTROL__JS5__AFFINITY, /* (RO) 64-bit 0x1A90 */ + JOB_CONTROL__JS5__CONFIG, /* (RO) 32-bit 0x1A98 */ + JOB_CONTROL__JS5__XAFFINITY, /* (RO) 32-bit 0x1A9C */ + JOB_CONTROL__JS5__COMMAND, /* (RW) 32-bit 0x1AA0 */ + JOB_CONTROL__JS5__STATUS, /* (RO) 32-bit 0x1AA4 */ + JOB_CONTROL__JS5__FLUSH_ID, /* (RO) 32-bit 0x1AB0 */ + JOB_CONTROL__JS5__EVENT_0, /* (RW) 32-bit 0x1AB4 */ + JOB_CONTROL__JS5__EVENT_1, /* (RW) 32-bit 0x1AB8 */ + JOB_CONTROL__JS5__EVENT_MASK, /* (RW) 32-bit 0x1ABC */ + JOB_CONTROL__JS5__HEAD_NEXT, /* (RW) 64-bit 0x1AC0 */ + JOB_CONTROL__JS5__TAIL_NEXT, /* (RW) 64-bit 0x1AC8 */ + JOB_CONTROL__JS5__AFFINITY_NEXT, /* (RW) 64-bit 0x1AD0 */ + JOB_CONTROL__JS5__CONFIG_NEXT, /* (RW) 32-bit 0x1AD8 */ + JOB_CONTROL__JS5__XAFFINITY_NEXT, /* (RW) 32-bit 0x1ADC */ + JOB_CONTROL__JS5__COMMAND_NEXT, /* (RW) 32-bit 0x1AE0 */ + JOB_CONTROL__JS5__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1AF0 */ + JOB_CONTROL__JS5__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1AFC */ + JOB_CONTROL__JS6__HEAD, /* (RO) 64-bit 0x1B00 */ + JOB_CONTROL__JS6__TAIL, /* (RO) 64-bit 0x1B08 */ + JOB_CONTROL__JS6__AFFINITY, /* (RO) 64-bit 0x1B10 */ + JOB_CONTROL__JS6__CONFIG, /* (RO) 32-bit 0x1B18 */ + JOB_CONTROL__JS6__XAFFINITY, /* (RO) 32-bit 0x1B1C */ + JOB_CONTROL__JS6__COMMAND, /* (RW) 32-bit 0x1B20 */ + JOB_CONTROL__JS6__STATUS, /* (RO) 32-bit 0x1B24 */ + JOB_CONTROL__JS6__FLUSH_ID, /* (RO) 32-bit 0x1B30 */ + JOB_CONTROL__JS6__EVENT_0, /* (RW) 32-bit 0x1B34 */ + JOB_CONTROL__JS6__EVENT_1, /* (RW) 32-bit 0x1B38 */ + JOB_CONTROL__JS6__EVENT_MASK, /* (RW) 32-bit 0x1B3C */ + JOB_CONTROL__JS6__HEAD_NEXT, /* (RW) 64-bit 0x1B40 */ + JOB_CONTROL__JS6__TAIL_NEXT, /* (RW) 64-bit 0x1B48 */ + JOB_CONTROL__JS6__AFFINITY_NEXT, /* (RW) 64-bit 0x1B50 */ + JOB_CONTROL__JS6__CONFIG_NEXT, /* (RW) 32-bit 0x1B58 */ + JOB_CONTROL__JS6__XAFFINITY_NEXT, /* (RW) 32-bit 0x1B5C */ + JOB_CONTROL__JS6__COMMAND_NEXT, /* (RW) 32-bit 0x1B60 */ + JOB_CONTROL__JS6__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1B70 */ + JOB_CONTROL__JS6__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1B7C */ + JOB_CONTROL__JS7__HEAD, /* (RO) 64-bit 0x1B80 */ + JOB_CONTROL__JS7__TAIL, /* (RO) 64-bit 0x1B88 */ + JOB_CONTROL__JS7__AFFINITY, /* (RO) 64-bit 0x1B90 */ + JOB_CONTROL__JS7__CONFIG, /* (RO) 32-bit 0x1B98 */ + JOB_CONTROL__JS7__XAFFINITY, /* (RO) 32-bit 0x1B9C */ + JOB_CONTROL__JS7__COMMAND, /* (RW) 32-bit 0x1BA0 */ + JOB_CONTROL__JS7__STATUS, /* (RO) 32-bit 0x1BA4 */ + JOB_CONTROL__JS7__FLUSH_ID, /* (RO) 32-bit 0x1BB0 */ + JOB_CONTROL__JS7__EVENT_0, /* (RW) 32-bit 0x1BB4 */ + JOB_CONTROL__JS7__EVENT_1, /* (RW) 32-bit 0x1BB8 */ + JOB_CONTROL__JS7__EVENT_MASK, /* (RW) 32-bit 0x1BBC */ + JOB_CONTROL__JS7__HEAD_NEXT, /* (RW) 64-bit 0x1BC0 */ + JOB_CONTROL__JS7__TAIL_NEXT, /* (RW) 64-bit 0x1BC8 */ + JOB_CONTROL__JS7__AFFINITY_NEXT, /* (RW) 64-bit 0x1BD0 */ + JOB_CONTROL__JS7__CONFIG_NEXT, /* (RW) 32-bit 0x1BD8 */ + JOB_CONTROL__JS7__XAFFINITY_NEXT, /* (RW) 32-bit 0x1BDC */ + JOB_CONTROL__JS7__COMMAND_NEXT, /* (RW) 32-bit 0x1BE0 */ + JOB_CONTROL__JS7__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1BF0 */ + JOB_CONTROL__JS7__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1BFC */ + JOB_CONTROL__JS8__HEAD, /* (RO) 64-bit 0x1C00 */ + JOB_CONTROL__JS8__TAIL, /* (RO) 64-bit 0x1C08 */ + JOB_CONTROL__JS8__AFFINITY, /* (RO) 64-bit 0x1C10 */ + JOB_CONTROL__JS8__CONFIG, /* (RO) 32-bit 0x1C18 */ + JOB_CONTROL__JS8__XAFFINITY, /* (RO) 32-bit 0x1C1C */ + JOB_CONTROL__JS8__COMMAND, /* (RW) 32-bit 0x1C20 */ + JOB_CONTROL__JS8__STATUS, /* (RO) 32-bit 0x1C24 */ + JOB_CONTROL__JS8__FLUSH_ID, /* (RO) 32-bit 0x1C30 */ + JOB_CONTROL__JS8__EVENT_0, /* (RW) 32-bit 0x1C34 */ + JOB_CONTROL__JS8__EVENT_1, /* (RW) 32-bit 0x1C38 */ + JOB_CONTROL__JS8__EVENT_MASK, /* (RW) 32-bit 0x1C3C */ + JOB_CONTROL__JS8__HEAD_NEXT, /* (RW) 64-bit 0x1C40 */ + JOB_CONTROL__JS8__TAIL_NEXT, /* (RW) 64-bit 0x1C48 */ + JOB_CONTROL__JS8__AFFINITY_NEXT, /* (RW) 64-bit 0x1C50 */ + JOB_CONTROL__JS8__CONFIG_NEXT, /* (RW) 32-bit 0x1C58 */ + JOB_CONTROL__JS8__XAFFINITY_NEXT, /* (RW) 32-bit 0x1C5C */ + JOB_CONTROL__JS8__COMMAND_NEXT, /* (RW) 32-bit 0x1C60 */ + JOB_CONTROL__JS8__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1C70 */ + JOB_CONTROL__JS8__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1C7C */ + JOB_CONTROL__JS9__HEAD, /* (RO) 64-bit 0x1C80 */ + JOB_CONTROL__JS9__TAIL, /* (RO) 64-bit 0x1C88 */ + JOB_CONTROL__JS9__AFFINITY, /* (RO) 64-bit 0x1C90 */ + JOB_CONTROL__JS9__CONFIG, /* (RO) 32-bit 0x1C98 */ + JOB_CONTROL__JS9__XAFFINITY, /* (RO) 32-bit 0x1C9C */ + JOB_CONTROL__JS9__COMMAND, /* (RW) 32-bit 0x1CA0 */ + JOB_CONTROL__JS9__STATUS, /* (RO) 32-bit 0x1CA4 */ + JOB_CONTROL__JS9__FLUSH_ID, /* (RO) 32-bit 0x1CB0 */ + JOB_CONTROL__JS9__EVENT_0, /* (RW) 32-bit 0x1CB4 */ + JOB_CONTROL__JS9__EVENT_1, /* (RW) 32-bit 0x1CB8 */ + JOB_CONTROL__JS9__EVENT_MASK, /* (RW) 32-bit 0x1CBC */ + JOB_CONTROL__JS9__HEAD_NEXT, /* (RW) 64-bit 0x1CC0 */ + JOB_CONTROL__JS9__TAIL_NEXT, /* (RW) 64-bit 0x1CC8 */ + JOB_CONTROL__JS9__AFFINITY_NEXT, /* (RW) 64-bit 0x1CD0 */ + JOB_CONTROL__JS9__CONFIG_NEXT, /* (RW) 32-bit 0x1CD8 */ + JOB_CONTROL__JS9__XAFFINITY_NEXT, /* (RW) 32-bit 0x1CDC */ + JOB_CONTROL__JS9__COMMAND_NEXT, /* (RW) 32-bit 0x1CE0 */ + JOB_CONTROL__JS9__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1CF0 */ + JOB_CONTROL__JS9__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1CFC */ + JOB_CONTROL__JS10__HEAD, /* (RO) 64-bit 0x1D00 */ + JOB_CONTROL__JS10__TAIL, /* (RO) 64-bit 0x1D08 */ + JOB_CONTROL__JS10__AFFINITY, /* (RO) 64-bit 0x1D10 */ + JOB_CONTROL__JS10__CONFIG, /* (RO) 32-bit 0x1D18 */ + JOB_CONTROL__JS10__XAFFINITY, /* (RO) 32-bit 0x1D1C */ + JOB_CONTROL__JS10__COMMAND, /* (RW) 32-bit 0x1D20 */ + JOB_CONTROL__JS10__STATUS, /* (RO) 32-bit 0x1D24 */ + JOB_CONTROL__JS10__FLUSH_ID, /* (RO) 32-bit 0x1D30 */ + JOB_CONTROL__JS10__EVENT_0, /* (RW) 32-bit 0x1D34 */ + JOB_CONTROL__JS10__EVENT_1, /* (RW) 32-bit 0x1D38 */ + JOB_CONTROL__JS10__EVENT_MASK, /* (RW) 32-bit 0x1D3C */ + JOB_CONTROL__JS10__HEAD_NEXT, /* (RW) 64-bit 0x1D40 */ + JOB_CONTROL__JS10__TAIL_NEXT, /* (RW) 64-bit 0x1D48 */ + JOB_CONTROL__JS10__AFFINITY_NEXT, /* (RW) 64-bit 0x1D50 */ + JOB_CONTROL__JS10__CONFIG_NEXT, /* (RW) 32-bit 0x1D58 */ + JOB_CONTROL__JS10__XAFFINITY_NEXT, /* (RW) 32-bit 0x1D5C */ + JOB_CONTROL__JS10__COMMAND_NEXT, /* (RW) 32-bit 0x1D60 */ + JOB_CONTROL__JS10__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1D70 */ + JOB_CONTROL__JS10__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1D7C */ + JOB_CONTROL__JS11__HEAD, /* (RO) 64-bit 0x1D80 */ + JOB_CONTROL__JS11__TAIL, /* (RO) 64-bit 0x1D88 */ + JOB_CONTROL__JS11__AFFINITY, /* (RO) 64-bit 0x1D90 */ + JOB_CONTROL__JS11__CONFIG, /* (RO) 32-bit 0x1D98 */ + JOB_CONTROL__JS11__XAFFINITY, /* (RO) 32-bit 0x1D9C */ + JOB_CONTROL__JS11__COMMAND, /* (RW) 32-bit 0x1DA0 */ + JOB_CONTROL__JS11__STATUS, /* (RO) 32-bit 0x1DA4 */ + JOB_CONTROL__JS11__FLUSH_ID, /* (RO) 32-bit 0x1DB0 */ + JOB_CONTROL__JS11__EVENT_0, /* (RW) 32-bit 0x1DB4 */ + JOB_CONTROL__JS11__EVENT_1, /* (RW) 32-bit 0x1DB8 */ + JOB_CONTROL__JS11__EVENT_MASK, /* (RW) 32-bit 0x1DBC */ + JOB_CONTROL__JS11__HEAD_NEXT, /* (RW) 64-bit 0x1DC0 */ + JOB_CONTROL__JS11__TAIL_NEXT, /* (RW) 64-bit 0x1DC8 */ + JOB_CONTROL__JS11__AFFINITY_NEXT, /* (RW) 64-bit 0x1DD0 */ + JOB_CONTROL__JS11__CONFIG_NEXT, /* (RW) 32-bit 0x1DD8 */ + JOB_CONTROL__JS11__XAFFINITY_NEXT, /* (RW) 32-bit 0x1DDC */ + JOB_CONTROL__JS11__COMMAND_NEXT, /* (RW) 32-bit 0x1DE0 */ + JOB_CONTROL__JS11__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1DF0 */ + JOB_CONTROL__JS11__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1DFC */ + JOB_CONTROL__JS12__HEAD, /* (RO) 64-bit 0x1E00 */ + JOB_CONTROL__JS12__TAIL, /* (RO) 64-bit 0x1E08 */ + JOB_CONTROL__JS12__AFFINITY, /* (RO) 64-bit 0x1E10 */ + JOB_CONTROL__JS12__CONFIG, /* (RO) 32-bit 0x1E18 */ + JOB_CONTROL__JS12__XAFFINITY, /* (RO) 32-bit 0x1E1C */ + JOB_CONTROL__JS12__COMMAND, /* (RW) 32-bit 0x1E20 */ + JOB_CONTROL__JS12__STATUS, /* (RO) 32-bit 0x1E24 */ + JOB_CONTROL__JS12__FLUSH_ID, /* (RO) 32-bit 0x1E30 */ + JOB_CONTROL__JS12__EVENT_0, /* (RW) 32-bit 0x1E34 */ + JOB_CONTROL__JS12__EVENT_1, /* (RW) 32-bit 0x1E38 */ + JOB_CONTROL__JS12__EVENT_MASK, /* (RW) 32-bit 0x1E3C */ + JOB_CONTROL__JS12__HEAD_NEXT, /* (RW) 64-bit 0x1E40 */ + JOB_CONTROL__JS12__TAIL_NEXT, /* (RW) 64-bit 0x1E48 */ + JOB_CONTROL__JS12__AFFINITY_NEXT, /* (RW) 64-bit 0x1E50 */ + JOB_CONTROL__JS12__CONFIG_NEXT, /* (RW) 32-bit 0x1E58 */ + JOB_CONTROL__JS12__XAFFINITY_NEXT, /* (RW) 32-bit 0x1E5C */ + JOB_CONTROL__JS12__COMMAND_NEXT, /* (RW) 32-bit 0x1E60 */ + JOB_CONTROL__JS12__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1E70 */ + JOB_CONTROL__JS12__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1E7C */ + JOB_CONTROL__JS13__HEAD, /* (RO) 64-bit 0x1E80 */ + JOB_CONTROL__JS13__TAIL, /* (RO) 64-bit 0x1E88 */ + JOB_CONTROL__JS13__AFFINITY, /* (RO) 64-bit 0x1E90 */ + JOB_CONTROL__JS13__CONFIG, /* (RO) 32-bit 0x1E98 */ + JOB_CONTROL__JS13__XAFFINITY, /* (RO) 32-bit 0x1E9C */ + JOB_CONTROL__JS13__COMMAND, /* (RW) 32-bit 0x1EA0 */ + JOB_CONTROL__JS13__STATUS, /* (RO) 32-bit 0x1EA4 */ + JOB_CONTROL__JS13__FLUSH_ID, /* (RO) 32-bit 0x1EB0 */ + JOB_CONTROL__JS13__EVENT_0, /* (RW) 32-bit 0x1EB4 */ + JOB_CONTROL__JS13__EVENT_1, /* (RW) 32-bit 0x1EB8 */ + JOB_CONTROL__JS13__EVENT_MASK, /* (RW) 32-bit 0x1EBC */ + JOB_CONTROL__JS13__HEAD_NEXT, /* (RW) 64-bit 0x1EC0 */ + JOB_CONTROL__JS13__TAIL_NEXT, /* (RW) 64-bit 0x1EC8 */ + JOB_CONTROL__JS13__AFFINITY_NEXT, /* (RW) 64-bit 0x1ED0 */ + JOB_CONTROL__JS13__CONFIG_NEXT, /* (RW) 32-bit 0x1ED8 */ + JOB_CONTROL__JS13__XAFFINITY_NEXT, /* (RW) 32-bit 0x1EDC */ + JOB_CONTROL__JS13__COMMAND_NEXT, /* (RW) 32-bit 0x1EE0 */ + JOB_CONTROL__JS13__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1EF0 */ + JOB_CONTROL__JS13__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1EFC */ + JOB_CONTROL__JS14__HEAD, /* (RO) 64-bit 0x1F00 */ + JOB_CONTROL__JS14__TAIL, /* (RO) 64-bit 0x1F08 */ + JOB_CONTROL__JS14__AFFINITY, /* (RO) 64-bit 0x1F10 */ + JOB_CONTROL__JS14__CONFIG, /* (RO) 32-bit 0x1F18 */ + JOB_CONTROL__JS14__XAFFINITY, /* (RO) 32-bit 0x1F1C */ + JOB_CONTROL__JS14__COMMAND, /* (RW) 32-bit 0x1F20 */ + JOB_CONTROL__JS14__STATUS, /* (RO) 32-bit 0x1F24 */ + JOB_CONTROL__JS14__FLUSH_ID, /* (RO) 32-bit 0x1F30 */ + JOB_CONTROL__JS14__EVENT_0, /* (RW) 32-bit 0x1F34 */ + JOB_CONTROL__JS14__EVENT_1, /* (RW) 32-bit 0x1F38 */ + JOB_CONTROL__JS14__EVENT_MASK, /* (RW) 32-bit 0x1F3C */ + JOB_CONTROL__JS14__HEAD_NEXT, /* (RW) 64-bit 0x1F40 */ + JOB_CONTROL__JS14__TAIL_NEXT, /* (RW) 64-bit 0x1F48 */ + JOB_CONTROL__JS14__AFFINITY_NEXT, /* (RW) 64-bit 0x1F50 */ + JOB_CONTROL__JS14__CONFIG_NEXT, /* (RW) 32-bit 0x1F58 */ + JOB_CONTROL__JS14__XAFFINITY_NEXT, /* (RW) 32-bit 0x1F5C */ + JOB_CONTROL__JS14__COMMAND_NEXT, /* (RW) 32-bit 0x1F60 */ + JOB_CONTROL__JS14__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1F70 */ + JOB_CONTROL__JS14__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1F7C */ + JOB_CONTROL__JS15__HEAD, /* (RO) 64-bit 0x1F80 */ + JOB_CONTROL__JS15__TAIL, /* (RO) 64-bit 0x1F88 */ + JOB_CONTROL__JS15__AFFINITY, /* (RO) 64-bit 0x1F90 */ + JOB_CONTROL__JS15__CONFIG, /* (RO) 32-bit 0x1F98 */ + JOB_CONTROL__JS15__XAFFINITY, /* (RO) 32-bit 0x1F9C */ + JOB_CONTROL__JS15__COMMAND, /* (RW) 32-bit 0x1FA0 */ + JOB_CONTROL__JS15__STATUS, /* (RO) 32-bit 0x1FA4 */ + JOB_CONTROL__JS15__FLUSH_ID, /* (RO) 32-bit 0x1FB0 */ + JOB_CONTROL__JS15__EVENT_0, /* (RW) 32-bit 0x1FB4 */ + JOB_CONTROL__JS15__EVENT_1, /* (RW) 32-bit 0x1FB8 */ + JOB_CONTROL__JS15__EVENT_MASK, /* (RW) 32-bit 0x1FBC */ + JOB_CONTROL__JS15__HEAD_NEXT, /* (RW) 64-bit 0x1FC0 */ + JOB_CONTROL__JS15__TAIL_NEXT, /* (RW) 64-bit 0x1FC8 */ + JOB_CONTROL__JS15__AFFINITY_NEXT, /* (RW) 64-bit 0x1FD0 */ + JOB_CONTROL__JS15__CONFIG_NEXT, /* (RW) 32-bit 0x1FD8 */ + JOB_CONTROL__JS15__XAFFINITY_NEXT, /* (RW) 32-bit 0x1FDC */ + JOB_CONTROL__JS15__COMMAND_NEXT, /* (RW) 32-bit 0x1FE0 */ + JOB_CONTROL__JS15__FLUSH_ID_NEXT, /* (RW) 32-bit 0x1FF0 */ + JOB_CONTROL__JS15__EVENT_MASK_NEXT, /* (RW) 32-bit 0x1FFC */ + MMU_STAGE1__ST1MMU__IRQ_RAWSTAT, /* (RW) 32-bit 0x2000 */ + MMU_STAGE1__ST1MMU__IRQ_CLEAR, /* (WO) 32-bit 0x2004 */ + MMU_STAGE1__ST1MMU__IRQ_MASK, /* (RW) 32-bit 0x2008 */ + MMU_STAGE1__ST1MMU__IRQ_STATUS, /* (RO) 32-bit 0x200C */ + MMU_STAGE1__ST1MMU__AS0__TRANSTAB, /* (RW) 64-bit 0x2400 */ + MMU_STAGE1__ST1MMU__AS0__MEMATTR, /* (RW) 64-bit 0x2408 */ + MMU_STAGE1__ST1MMU__AS0__LOCKADDR, /* (RW) 64-bit 0x2410 */ + MMU_STAGE1__ST1MMU__AS0__COMMAND, /* (WO) 32-bit 0x2418 */ + MMU_STAGE1__ST1MMU__AS0__FAULTSTATUS, /* (RO) 32-bit 0x241C */ + MMU_STAGE1__ST1MMU__AS0__FAULTADDRESS, /* (RO) 64-bit 0x2420 */ + MMU_STAGE1__ST1MMU__AS0__STATUS, /* (RO) 32-bit 0x2428 */ + MMU_STAGE1__ST1MMU__AS0__TRANSCFG, /* (RW) 64-bit 0x2430 */ + MMU_STAGE1__ST1MMU__AS0__FAULTEXTRA, /* (RO) 64-bit 0x2438 */ + MMU_STAGE1__ST1MMU__AS1__TRANSTAB, /* (RW) 64-bit 0x2440 */ + MMU_STAGE1__ST1MMU__AS1__MEMATTR, /* (RW) 64-bit 0x2448 */ + MMU_STAGE1__ST1MMU__AS1__LOCKADDR, /* (RW) 64-bit 0x2450 */ + MMU_STAGE1__ST1MMU__AS1__COMMAND, /* (WO) 32-bit 0x2458 */ + MMU_STAGE1__ST1MMU__AS1__FAULTSTATUS, /* (RO) 32-bit 0x245C */ + MMU_STAGE1__ST1MMU__AS1__FAULTADDRESS, /* (RO) 64-bit 0x2460 */ + MMU_STAGE1__ST1MMU__AS1__STATUS, /* (RO) 32-bit 0x2468 */ + MMU_STAGE1__ST1MMU__AS1__TRANSCFG, /* (RW) 64-bit 0x2470 */ + MMU_STAGE1__ST1MMU__AS1__FAULTEXTRA, /* (RO) 64-bit 0x2478 */ + MMU_STAGE1__ST1MMU__AS2__TRANSTAB, /* (RW) 64-bit 0x2480 */ + MMU_STAGE1__ST1MMU__AS2__MEMATTR, /* (RW) 64-bit 0x2488 */ + MMU_STAGE1__ST1MMU__AS2__LOCKADDR, /* (RW) 64-bit 0x2490 */ + MMU_STAGE1__ST1MMU__AS2__COMMAND, /* (WO) 32-bit 0x2498 */ + MMU_STAGE1__ST1MMU__AS2__FAULTSTATUS, /* (RO) 32-bit 0x249C */ + MMU_STAGE1__ST1MMU__AS2__FAULTADDRESS, /* (RO) 64-bit 0x24A0 */ + MMU_STAGE1__ST1MMU__AS2__STATUS, /* (RO) 32-bit 0x24A8 */ + MMU_STAGE1__ST1MMU__AS2__TRANSCFG, /* (RW) 64-bit 0x24B0 */ + MMU_STAGE1__ST1MMU__AS2__FAULTEXTRA, /* (RO) 64-bit 0x24B8 */ + MMU_STAGE1__ST1MMU__AS3__TRANSTAB, /* (RW) 64-bit 0x24C0 */ + MMU_STAGE1__ST1MMU__AS3__MEMATTR, /* (RW) 64-bit 0x24C8 */ + MMU_STAGE1__ST1MMU__AS3__LOCKADDR, /* (RW) 64-bit 0x24D0 */ + MMU_STAGE1__ST1MMU__AS3__COMMAND, /* (WO) 32-bit 0x24D8 */ + MMU_STAGE1__ST1MMU__AS3__FAULTSTATUS, /* (RO) 32-bit 0x24DC */ + MMU_STAGE1__ST1MMU__AS3__FAULTADDRESS, /* (RO) 64-bit 0x24E0 */ + MMU_STAGE1__ST1MMU__AS3__STATUS, /* (RO) 32-bit 0x24E8 */ + MMU_STAGE1__ST1MMU__AS3__TRANSCFG, /* (RW) 64-bit 0x24F0 */ + MMU_STAGE1__ST1MMU__AS3__FAULTEXTRA, /* (RO) 64-bit 0x24F8 */ + MMU_STAGE1__ST1MMU__AS4__TRANSTAB, /* (RW) 64-bit 0x2500 */ + MMU_STAGE1__ST1MMU__AS4__MEMATTR, /* (RW) 64-bit 0x2508 */ + MMU_STAGE1__ST1MMU__AS4__LOCKADDR, /* (RW) 64-bit 0x2510 */ + MMU_STAGE1__ST1MMU__AS4__COMMAND, /* (WO) 32-bit 0x2518 */ + MMU_STAGE1__ST1MMU__AS4__FAULTSTATUS, /* (RO) 32-bit 0x251C */ + MMU_STAGE1__ST1MMU__AS4__FAULTADDRESS, /* (RO) 64-bit 0x2520 */ + MMU_STAGE1__ST1MMU__AS4__STATUS, /* (RO) 32-bit 0x2528 */ + MMU_STAGE1__ST1MMU__AS4__TRANSCFG, /* (RW) 64-bit 0x2530 */ + MMU_STAGE1__ST1MMU__AS4__FAULTEXTRA, /* (RO) 64-bit 0x2538 */ + MMU_STAGE1__ST1MMU__AS5__TRANSTAB, /* (RW) 64-bit 0x2540 */ + MMU_STAGE1__ST1MMU__AS5__MEMATTR, /* (RW) 64-bit 0x2548 */ + MMU_STAGE1__ST1MMU__AS5__LOCKADDR, /* (RW) 64-bit 0x2550 */ + MMU_STAGE1__ST1MMU__AS5__COMMAND, /* (WO) 32-bit 0x2558 */ + MMU_STAGE1__ST1MMU__AS5__FAULTSTATUS, /* (RO) 32-bit 0x255C */ + MMU_STAGE1__ST1MMU__AS5__FAULTADDRESS, /* (RO) 64-bit 0x2560 */ + MMU_STAGE1__ST1MMU__AS5__STATUS, /* (RO) 32-bit 0x2568 */ + MMU_STAGE1__ST1MMU__AS5__TRANSCFG, /* (RW) 64-bit 0x2570 */ + MMU_STAGE1__ST1MMU__AS5__FAULTEXTRA, /* (RO) 64-bit 0x2578 */ + MMU_STAGE1__ST1MMU__AS6__TRANSTAB, /* (RW) 64-bit 0x2580 */ + MMU_STAGE1__ST1MMU__AS6__MEMATTR, /* (RW) 64-bit 0x2588 */ + MMU_STAGE1__ST1MMU__AS6__LOCKADDR, /* (RW) 64-bit 0x2590 */ + MMU_STAGE1__ST1MMU__AS6__COMMAND, /* (WO) 32-bit 0x2598 */ + MMU_STAGE1__ST1MMU__AS6__FAULTSTATUS, /* (RO) 32-bit 0x259C */ + MMU_STAGE1__ST1MMU__AS6__FAULTADDRESS, /* (RO) 64-bit 0x25A0 */ + MMU_STAGE1__ST1MMU__AS6__STATUS, /* (RO) 32-bit 0x25A8 */ + MMU_STAGE1__ST1MMU__AS6__TRANSCFG, /* (RW) 64-bit 0x25B0 */ + MMU_STAGE1__ST1MMU__AS6__FAULTEXTRA, /* (RO) 64-bit 0x25B8 */ + MMU_STAGE1__ST1MMU__AS7__TRANSTAB, /* (RW) 64-bit 0x25C0 */ + MMU_STAGE1__ST1MMU__AS7__MEMATTR, /* (RW) 64-bit 0x25C8 */ + MMU_STAGE1__ST1MMU__AS7__LOCKADDR, /* (RW) 64-bit 0x25D0 */ + MMU_STAGE1__ST1MMU__AS7__COMMAND, /* (WO) 32-bit 0x25D8 */ + MMU_STAGE1__ST1MMU__AS7__FAULTSTATUS, /* (RO) 32-bit 0x25DC */ + MMU_STAGE1__ST1MMU__AS7__FAULTADDRESS, /* (RO) 64-bit 0x25E0 */ + MMU_STAGE1__ST1MMU__AS7__STATUS, /* (RO) 32-bit 0x25E8 */ + MMU_STAGE1__ST1MMU__AS7__TRANSCFG, /* (RW) 64-bit 0x25F0 */ + MMU_STAGE1__ST1MMU__AS7__FAULTEXTRA, /* (RO) 64-bit 0x25F8 */ + MMU_STAGE1__ST1MMU__AS8__TRANSTAB, /* (RW) 64-bit 0x2600 */ + MMU_STAGE1__ST1MMU__AS8__MEMATTR, /* (RW) 64-bit 0x2608 */ + MMU_STAGE1__ST1MMU__AS8__LOCKADDR, /* (RW) 64-bit 0x2610 */ + MMU_STAGE1__ST1MMU__AS8__COMMAND, /* (WO) 32-bit 0x2618 */ + MMU_STAGE1__ST1MMU__AS8__FAULTSTATUS, /* (RO) 32-bit 0x261C */ + MMU_STAGE1__ST1MMU__AS8__FAULTADDRESS, /* (RO) 64-bit 0x2620 */ + MMU_STAGE1__ST1MMU__AS8__STATUS, /* (RO) 32-bit 0x2628 */ + MMU_STAGE1__ST1MMU__AS8__TRANSCFG, /* (RW) 64-bit 0x2630 */ + MMU_STAGE1__ST1MMU__AS8__FAULTEXTRA, /* (RO) 64-bit 0x2638 */ + MMU_STAGE1__ST1MMU__AS9__TRANSTAB, /* (RW) 64-bit 0x2640 */ + MMU_STAGE1__ST1MMU__AS9__MEMATTR, /* (RW) 64-bit 0x2648 */ + MMU_STAGE1__ST1MMU__AS9__LOCKADDR, /* (RW) 64-bit 0x2650 */ + MMU_STAGE1__ST1MMU__AS9__COMMAND, /* (WO) 32-bit 0x2658 */ + MMU_STAGE1__ST1MMU__AS9__FAULTSTATUS, /* (RO) 32-bit 0x265C */ + MMU_STAGE1__ST1MMU__AS9__FAULTADDRESS, /* (RO) 64-bit 0x2660 */ + MMU_STAGE1__ST1MMU__AS9__STATUS, /* (RO) 32-bit 0x2668 */ + MMU_STAGE1__ST1MMU__AS9__TRANSCFG, /* (RW) 64-bit 0x2670 */ + MMU_STAGE1__ST1MMU__AS9__FAULTEXTRA, /* (RO) 64-bit 0x2678 */ + MMU_STAGE1__ST1MMU__AS10__TRANSTAB, /* (RW) 64-bit 0x2680 */ + MMU_STAGE1__ST1MMU__AS10__MEMATTR, /* (RW) 64-bit 0x2688 */ + MMU_STAGE1__ST1MMU__AS10__LOCKADDR, /* (RW) 64-bit 0x2690 */ + MMU_STAGE1__ST1MMU__AS10__COMMAND, /* (WO) 32-bit 0x2698 */ + MMU_STAGE1__ST1MMU__AS10__FAULTSTATUS, /* (RO) 32-bit 0x269C */ + MMU_STAGE1__ST1MMU__AS10__FAULTADDRESS, /* (RO) 64-bit 0x26A0 */ + MMU_STAGE1__ST1MMU__AS10__STATUS, /* (RO) 32-bit 0x26A8 */ + MMU_STAGE1__ST1MMU__AS10__TRANSCFG, /* (RW) 64-bit 0x26B0 */ + MMU_STAGE1__ST1MMU__AS10__FAULTEXTRA, /* (RO) 64-bit 0x26B8 */ + MMU_STAGE1__ST1MMU__AS11__TRANSTAB, /* (RW) 64-bit 0x26C0 */ + MMU_STAGE1__ST1MMU__AS11__MEMATTR, /* (RW) 64-bit 0x26C8 */ + MMU_STAGE1__ST1MMU__AS11__LOCKADDR, /* (RW) 64-bit 0x26D0 */ + MMU_STAGE1__ST1MMU__AS11__COMMAND, /* (WO) 32-bit 0x26D8 */ + MMU_STAGE1__ST1MMU__AS11__FAULTSTATUS, /* (RO) 32-bit 0x26DC */ + MMU_STAGE1__ST1MMU__AS11__FAULTADDRESS, /* (RO) 64-bit 0x26E0 */ + MMU_STAGE1__ST1MMU__AS11__STATUS, /* (RO) 32-bit 0x26E8 */ + MMU_STAGE1__ST1MMU__AS11__TRANSCFG, /* (RW) 64-bit 0x26F0 */ + MMU_STAGE1__ST1MMU__AS11__FAULTEXTRA, /* (RO) 64-bit 0x26F8 */ + MMU_STAGE1__ST1MMU__AS12__TRANSTAB, /* (RW) 64-bit 0x2700 */ + MMU_STAGE1__ST1MMU__AS12__MEMATTR, /* (RW) 64-bit 0x2708 */ + MMU_STAGE1__ST1MMU__AS12__LOCKADDR, /* (RW) 64-bit 0x2710 */ + MMU_STAGE1__ST1MMU__AS12__COMMAND, /* (WO) 32-bit 0x2718 */ + MMU_STAGE1__ST1MMU__AS12__FAULTSTATUS, /* (RO) 32-bit 0x271C */ + MMU_STAGE1__ST1MMU__AS12__FAULTADDRESS, /* (RO) 64-bit 0x2720 */ + MMU_STAGE1__ST1MMU__AS12__STATUS, /* (RO) 32-bit 0x2728 */ + MMU_STAGE1__ST1MMU__AS12__TRANSCFG, /* (RW) 64-bit 0x2730 */ + MMU_STAGE1__ST1MMU__AS12__FAULTEXTRA, /* (RO) 64-bit 0x2738 */ + MMU_STAGE1__ST1MMU__AS13__TRANSTAB, /* (RW) 64-bit 0x2740 */ + MMU_STAGE1__ST1MMU__AS13__MEMATTR, /* (RW) 64-bit 0x2748 */ + MMU_STAGE1__ST1MMU__AS13__LOCKADDR, /* (RW) 64-bit 0x2750 */ + MMU_STAGE1__ST1MMU__AS13__COMMAND, /* (WO) 32-bit 0x2758 */ + MMU_STAGE1__ST1MMU__AS13__FAULTSTATUS, /* (RO) 32-bit 0x275C */ + MMU_STAGE1__ST1MMU__AS13__FAULTADDRESS, /* (RO) 64-bit 0x2760 */ + MMU_STAGE1__ST1MMU__AS13__STATUS, /* (RO) 32-bit 0x2768 */ + MMU_STAGE1__ST1MMU__AS13__TRANSCFG, /* (RW) 64-bit 0x2770 */ + MMU_STAGE1__ST1MMU__AS13__FAULTEXTRA, /* (RO) 64-bit 0x2778 */ + MMU_STAGE1__ST1MMU__AS14__TRANSTAB, /* (RW) 64-bit 0x2780 */ + MMU_STAGE1__ST1MMU__AS14__MEMATTR, /* (RW) 64-bit 0x2788 */ + MMU_STAGE1__ST1MMU__AS14__LOCKADDR, /* (RW) 64-bit 0x2790 */ + MMU_STAGE1__ST1MMU__AS14__COMMAND, /* (WO) 32-bit 0x2798 */ + MMU_STAGE1__ST1MMU__AS14__FAULTSTATUS, /* (RO) 32-bit 0x279C */ + MMU_STAGE1__ST1MMU__AS14__FAULTADDRESS, /* (RO) 64-bit 0x27A0 */ + MMU_STAGE1__ST1MMU__AS14__STATUS, /* (RO) 32-bit 0x27A8 */ + MMU_STAGE1__ST1MMU__AS14__TRANSCFG, /* (RW) 64-bit 0x27B0 */ + MMU_STAGE1__ST1MMU__AS14__FAULTEXTRA, /* (RO) 64-bit 0x27B8 */ + MMU_STAGE1__ST1MMU__AS15__TRANSTAB, /* (RW) 64-bit 0x27C0 */ + MMU_STAGE1__ST1MMU__AS15__MEMATTR, /* (RW) 64-bit 0x27C8 */ + MMU_STAGE1__ST1MMU__AS15__LOCKADDR, /* (RW) 64-bit 0x27D0 */ + MMU_STAGE1__ST1MMU__AS15__COMMAND, /* (WO) 32-bit 0x27D8 */ + MMU_STAGE1__ST1MMU__AS15__FAULTSTATUS, /* (RO) 32-bit 0x27DC */ + MMU_STAGE1__ST1MMU__AS15__FAULTADDRESS, /* (RO) 64-bit 0x27E0 */ + MMU_STAGE1__ST1MMU__AS15__STATUS, /* (RO) 32-bit 0x27E8 */ + MMU_STAGE1__ST1MMU__AS15__TRANSCFG, /* (RW) 64-bit 0x27F0 */ + MMU_STAGE1__ST1MMU__AS15__FAULTEXTRA, /* (RO) 64-bit 0x27F8 */ + MMU_STAGE2__ST2MMU__IRQ_RAWSTAT, /* (RW) 32-bit 0x10000 */ + MMU_STAGE2__ST2MMU__IRQ_CLEAR, /* (WO) 32-bit 0x10004 */ + MMU_STAGE2__ST2MMU__IRQ_MASK, /* (RW) 32-bit 0x10008 */ + MMU_STAGE2__ST2MMU__IRQ_STATUS, /* (RO) 32-bit 0x1000C */ + MMU_STAGE2__ST2MMU__AS0__TRANSTAB, /* (RW) 64-bit 0x10400 */ + MMU_STAGE2__ST2MMU__AS0__MEMATTR, /* (RW) 64-bit 0x10408 */ + MMU_STAGE2__ST2MMU__AS0__LOCKADDR, /* (RW) 64-bit 0x10410 */ + MMU_STAGE2__ST2MMU__AS0__COMMAND, /* (WO) 32-bit 0x10418 */ + MMU_STAGE2__ST2MMU__AS0__FAULTSTATUS, /* (RO) 32-bit 0x1041C */ + MMU_STAGE2__ST2MMU__AS0__FAULTADDRESS, /* (RO) 64-bit 0x10420 */ + MMU_STAGE2__ST2MMU__AS0__STATUS, /* (RO) 32-bit 0x10428 */ + MMU_STAGE2__ST2MMU__AS0__TRANSCFG, /* (RW) 64-bit 0x10430 */ + MMU_STAGE2__ST2MMU__AS0__FAULTEXTRA, /* (RO) 64-bit 0x10438 */ + MMU_STAGE2__ST2MMU__AS1__TRANSTAB, /* (RW) 64-bit 0x10440 */ + MMU_STAGE2__ST2MMU__AS1__MEMATTR, /* (RW) 64-bit 0x10448 */ + MMU_STAGE2__ST2MMU__AS1__LOCKADDR, /* (RW) 64-bit 0x10450 */ + MMU_STAGE2__ST2MMU__AS1__COMMAND, /* (WO) 32-bit 0x10458 */ + MMU_STAGE2__ST2MMU__AS1__FAULTSTATUS, /* (RO) 32-bit 0x1045C */ + MMU_STAGE2__ST2MMU__AS1__FAULTADDRESS, /* (RO) 64-bit 0x10460 */ + MMU_STAGE2__ST2MMU__AS1__STATUS, /* (RO) 32-bit 0x10468 */ + MMU_STAGE2__ST2MMU__AS1__TRANSCFG, /* (RW) 64-bit 0x10470 */ + MMU_STAGE2__ST2MMU__AS1__FAULTEXTRA, /* (RO) 64-bit 0x10478 */ + MMU_STAGE2__ST2MMU__AS2__TRANSTAB, /* (RW) 64-bit 0x10480 */ + MMU_STAGE2__ST2MMU__AS2__MEMATTR, /* (RW) 64-bit 0x10488 */ + MMU_STAGE2__ST2MMU__AS2__LOCKADDR, /* (RW) 64-bit 0x10490 */ + MMU_STAGE2__ST2MMU__AS2__COMMAND, /* (WO) 32-bit 0x10498 */ + MMU_STAGE2__ST2MMU__AS2__FAULTSTATUS, /* (RO) 32-bit 0x1049C */ + MMU_STAGE2__ST2MMU__AS2__FAULTADDRESS, /* (RO) 64-bit 0x104A0 */ + MMU_STAGE2__ST2MMU__AS2__STATUS, /* (RO) 32-bit 0x104A8 */ + MMU_STAGE2__ST2MMU__AS2__TRANSCFG, /* (RW) 64-bit 0x104B0 */ + MMU_STAGE2__ST2MMU__AS2__FAULTEXTRA, /* (RO) 64-bit 0x104B8 */ + MMU_STAGE2__ST2MMU__AS3__TRANSTAB, /* (RW) 64-bit 0x104C0 */ + MMU_STAGE2__ST2MMU__AS3__MEMATTR, /* (RW) 64-bit 0x104C8 */ + MMU_STAGE2__ST2MMU__AS3__LOCKADDR, /* (RW) 64-bit 0x104D0 */ + MMU_STAGE2__ST2MMU__AS3__COMMAND, /* (WO) 32-bit 0x104D8 */ + MMU_STAGE2__ST2MMU__AS3__FAULTSTATUS, /* (RO) 32-bit 0x104DC */ + MMU_STAGE2__ST2MMU__AS3__FAULTADDRESS, /* (RO) 64-bit 0x104E0 */ + MMU_STAGE2__ST2MMU__AS3__STATUS, /* (RO) 32-bit 0x104E8 */ + MMU_STAGE2__ST2MMU__AS3__TRANSCFG, /* (RW) 64-bit 0x104F0 */ + MMU_STAGE2__ST2MMU__AS3__FAULTEXTRA, /* (RO) 64-bit 0x104F8 */ + MMU_STAGE2__ST2MMU__AS4__TRANSTAB, /* (RW) 64-bit 0x10500 */ + MMU_STAGE2__ST2MMU__AS4__MEMATTR, /* (RW) 64-bit 0x10508 */ + MMU_STAGE2__ST2MMU__AS4__LOCKADDR, /* (RW) 64-bit 0x10510 */ + MMU_STAGE2__ST2MMU__AS4__COMMAND, /* (WO) 32-bit 0x10518 */ + MMU_STAGE2__ST2MMU__AS4__FAULTSTATUS, /* (RO) 32-bit 0x1051C */ + MMU_STAGE2__ST2MMU__AS4__FAULTADDRESS, /* (RO) 64-bit 0x10520 */ + MMU_STAGE2__ST2MMU__AS4__STATUS, /* (RO) 32-bit 0x10528 */ + MMU_STAGE2__ST2MMU__AS4__TRANSCFG, /* (RW) 64-bit 0x10530 */ + MMU_STAGE2__ST2MMU__AS4__FAULTEXTRA, /* (RO) 64-bit 0x10538 */ + MMU_STAGE2__ST2MMU__AS5__TRANSTAB, /* (RW) 64-bit 0x10540 */ + MMU_STAGE2__ST2MMU__AS5__MEMATTR, /* (RW) 64-bit 0x10548 */ + MMU_STAGE2__ST2MMU__AS5__LOCKADDR, /* (RW) 64-bit 0x10550 */ + MMU_STAGE2__ST2MMU__AS5__COMMAND, /* (WO) 32-bit 0x10558 */ + MMU_STAGE2__ST2MMU__AS5__FAULTSTATUS, /* (RO) 32-bit 0x1055C */ + MMU_STAGE2__ST2MMU__AS5__FAULTADDRESS, /* (RO) 64-bit 0x10560 */ + MMU_STAGE2__ST2MMU__AS5__STATUS, /* (RO) 32-bit 0x10568 */ + MMU_STAGE2__ST2MMU__AS5__TRANSCFG, /* (RW) 64-bit 0x10570 */ + MMU_STAGE2__ST2MMU__AS5__FAULTEXTRA, /* (RO) 64-bit 0x10578 */ + MMU_STAGE2__ST2MMU__AS6__TRANSTAB, /* (RW) 64-bit 0x10580 */ + MMU_STAGE2__ST2MMU__AS6__MEMATTR, /* (RW) 64-bit 0x10588 */ + MMU_STAGE2__ST2MMU__AS6__LOCKADDR, /* (RW) 64-bit 0x10590 */ + MMU_STAGE2__ST2MMU__AS6__COMMAND, /* (WO) 32-bit 0x10598 */ + MMU_STAGE2__ST2MMU__AS6__FAULTSTATUS, /* (RO) 32-bit 0x1059C */ + MMU_STAGE2__ST2MMU__AS6__FAULTADDRESS, /* (RO) 64-bit 0x105A0 */ + MMU_STAGE2__ST2MMU__AS6__STATUS, /* (RO) 32-bit 0x105A8 */ + MMU_STAGE2__ST2MMU__AS6__TRANSCFG, /* (RW) 64-bit 0x105B0 */ + MMU_STAGE2__ST2MMU__AS6__FAULTEXTRA, /* (RO) 64-bit 0x105B8 */ + MMU_STAGE2__ST2MMU__AS7__TRANSTAB, /* (RW) 64-bit 0x105C0 */ + MMU_STAGE2__ST2MMU__AS7__MEMATTR, /* (RW) 64-bit 0x105C8 */ + MMU_STAGE2__ST2MMU__AS7__LOCKADDR, /* (RW) 64-bit 0x105D0 */ + MMU_STAGE2__ST2MMU__AS7__COMMAND, /* (WO) 32-bit 0x105D8 */ + MMU_STAGE2__ST2MMU__AS7__FAULTSTATUS, /* (RO) 32-bit 0x105DC */ + MMU_STAGE2__ST2MMU__AS7__FAULTADDRESS, /* (RO) 64-bit 0x105E0 */ + MMU_STAGE2__ST2MMU__AS7__STATUS, /* (RO) 32-bit 0x105E8 */ + MMU_STAGE2__ST2MMU__AS7__TRANSCFG, /* (RW) 64-bit 0x105F0 */ + MMU_STAGE2__ST2MMU__AS7__FAULTEXTRA, /* (RO) 64-bit 0x105F8 */ + MMU_STAGE2__ST2MMU__AS8__TRANSTAB, /* (RW) 64-bit 0x10600 */ + MMU_STAGE2__ST2MMU__AS8__MEMATTR, /* (RW) 64-bit 0x10608 */ + MMU_STAGE2__ST2MMU__AS8__LOCKADDR, /* (RW) 64-bit 0x10610 */ + MMU_STAGE2__ST2MMU__AS8__COMMAND, /* (WO) 32-bit 0x10618 */ + MMU_STAGE2__ST2MMU__AS8__FAULTSTATUS, /* (RO) 32-bit 0x1061C */ + MMU_STAGE2__ST2MMU__AS8__FAULTADDRESS, /* (RO) 64-bit 0x10620 */ + MMU_STAGE2__ST2MMU__AS8__STATUS, /* (RO) 32-bit 0x10628 */ + MMU_STAGE2__ST2MMU__AS8__TRANSCFG, /* (RW) 64-bit 0x10630 */ + MMU_STAGE2__ST2MMU__AS8__FAULTEXTRA, /* (RO) 64-bit 0x10638 */ + MMU_STAGE2__ST2MMU__AS9__TRANSTAB, /* (RW) 64-bit 0x10640 */ + MMU_STAGE2__ST2MMU__AS9__MEMATTR, /* (RW) 64-bit 0x10648 */ + MMU_STAGE2__ST2MMU__AS9__LOCKADDR, /* (RW) 64-bit 0x10650 */ + MMU_STAGE2__ST2MMU__AS9__COMMAND, /* (WO) 32-bit 0x10658 */ + MMU_STAGE2__ST2MMU__AS9__FAULTSTATUS, /* (RO) 32-bit 0x1065C */ + MMU_STAGE2__ST2MMU__AS9__FAULTADDRESS, /* (RO) 64-bit 0x10660 */ + MMU_STAGE2__ST2MMU__AS9__STATUS, /* (RO) 32-bit 0x10668 */ + MMU_STAGE2__ST2MMU__AS9__TRANSCFG, /* (RW) 64-bit 0x10670 */ + MMU_STAGE2__ST2MMU__AS9__FAULTEXTRA, /* (RO) 64-bit 0x10678 */ + MMU_STAGE2__ST2MMU__AS10__TRANSTAB, /* (RW) 64-bit 0x10680 */ + MMU_STAGE2__ST2MMU__AS10__MEMATTR, /* (RW) 64-bit 0x10688 */ + MMU_STAGE2__ST2MMU__AS10__LOCKADDR, /* (RW) 64-bit 0x10690 */ + MMU_STAGE2__ST2MMU__AS10__COMMAND, /* (WO) 32-bit 0x10698 */ + MMU_STAGE2__ST2MMU__AS10__FAULTSTATUS, /* (RO) 32-bit 0x1069C */ + MMU_STAGE2__ST2MMU__AS10__FAULTADDRESS, /* (RO) 64-bit 0x106A0 */ + MMU_STAGE2__ST2MMU__AS10__STATUS, /* (RO) 32-bit 0x106A8 */ + MMU_STAGE2__ST2MMU__AS10__TRANSCFG, /* (RW) 64-bit 0x106B0 */ + MMU_STAGE2__ST2MMU__AS10__FAULTEXTRA, /* (RO) 64-bit 0x106B8 */ + MMU_STAGE2__ST2MMU__AS11__TRANSTAB, /* (RW) 64-bit 0x106C0 */ + MMU_STAGE2__ST2MMU__AS11__MEMATTR, /* (RW) 64-bit 0x106C8 */ + MMU_STAGE2__ST2MMU__AS11__LOCKADDR, /* (RW) 64-bit 0x106D0 */ + MMU_STAGE2__ST2MMU__AS11__COMMAND, /* (WO) 32-bit 0x106D8 */ + MMU_STAGE2__ST2MMU__AS11__FAULTSTATUS, /* (RO) 32-bit 0x106DC */ + MMU_STAGE2__ST2MMU__AS11__FAULTADDRESS, /* (RO) 64-bit 0x106E0 */ + MMU_STAGE2__ST2MMU__AS11__STATUS, /* (RO) 32-bit 0x106E8 */ + MMU_STAGE2__ST2MMU__AS11__TRANSCFG, /* (RW) 64-bit 0x106F0 */ + MMU_STAGE2__ST2MMU__AS11__FAULTEXTRA, /* (RO) 64-bit 0x106F8 */ + MMU_STAGE2__ST2MMU__AS12__TRANSTAB, /* (RW) 64-bit 0x10700 */ + MMU_STAGE2__ST2MMU__AS12__MEMATTR, /* (RW) 64-bit 0x10708 */ + MMU_STAGE2__ST2MMU__AS12__LOCKADDR, /* (RW) 64-bit 0x10710 */ + MMU_STAGE2__ST2MMU__AS12__COMMAND, /* (WO) 32-bit 0x10718 */ + MMU_STAGE2__ST2MMU__AS12__FAULTSTATUS, /* (RO) 32-bit 0x1071C */ + MMU_STAGE2__ST2MMU__AS12__FAULTADDRESS, /* (RO) 64-bit 0x10720 */ + MMU_STAGE2__ST2MMU__AS12__STATUS, /* (RO) 32-bit 0x10728 */ + MMU_STAGE2__ST2MMU__AS12__TRANSCFG, /* (RW) 64-bit 0x10730 */ + MMU_STAGE2__ST2MMU__AS12__FAULTEXTRA, /* (RO) 64-bit 0x10738 */ + MMU_STAGE2__ST2MMU__AS13__TRANSTAB, /* (RW) 64-bit 0x10740 */ + MMU_STAGE2__ST2MMU__AS13__MEMATTR, /* (RW) 64-bit 0x10748 */ + MMU_STAGE2__ST2MMU__AS13__LOCKADDR, /* (RW) 64-bit 0x10750 */ + MMU_STAGE2__ST2MMU__AS13__COMMAND, /* (WO) 32-bit 0x10758 */ + MMU_STAGE2__ST2MMU__AS13__FAULTSTATUS, /* (RO) 32-bit 0x1075C */ + MMU_STAGE2__ST2MMU__AS13__FAULTADDRESS, /* (RO) 64-bit 0x10760 */ + MMU_STAGE2__ST2MMU__AS13__STATUS, /* (RO) 32-bit 0x10768 */ + MMU_STAGE2__ST2MMU__AS13__TRANSCFG, /* (RW) 64-bit 0x10770 */ + MMU_STAGE2__ST2MMU__AS13__FAULTEXTRA, /* (RO) 64-bit 0x10778 */ + MMU_STAGE2__ST2MMU__AS14__TRANSTAB, /* (RW) 64-bit 0x10780 */ + MMU_STAGE2__ST2MMU__AS14__MEMATTR, /* (RW) 64-bit 0x10788 */ + MMU_STAGE2__ST2MMU__AS14__LOCKADDR, /* (RW) 64-bit 0x10790 */ + MMU_STAGE2__ST2MMU__AS14__COMMAND, /* (WO) 32-bit 0x10798 */ + MMU_STAGE2__ST2MMU__AS14__FAULTSTATUS, /* (RO) 32-bit 0x1079C */ + MMU_STAGE2__ST2MMU__AS14__FAULTADDRESS, /* (RO) 64-bit 0x107A0 */ + MMU_STAGE2__ST2MMU__AS14__STATUS, /* (RO) 32-bit 0x107A8 */ + MMU_STAGE2__ST2MMU__AS14__TRANSCFG, /* (RW) 64-bit 0x107B0 */ + MMU_STAGE2__ST2MMU__AS14__FAULTEXTRA, /* (RO) 64-bit 0x107B8 */ + MMU_STAGE2__ST2MMU__AS15__TRANSTAB, /* (RW) 64-bit 0x107C0 */ + MMU_STAGE2__ST2MMU__AS15__MEMATTR, /* (RW) 64-bit 0x107C8 */ + MMU_STAGE2__ST2MMU__AS15__LOCKADDR, /* (RW) 64-bit 0x107D0 */ + MMU_STAGE2__ST2MMU__AS15__COMMAND, /* (WO) 32-bit 0x107D8 */ + MMU_STAGE2__ST2MMU__AS15__FAULTSTATUS, /* (RO) 32-bit 0x107DC */ + MMU_STAGE2__ST2MMU__AS15__FAULTADDRESS, /* (RO) 64-bit 0x107E0 */ + MMU_STAGE2__ST2MMU__AS15__STATUS, /* (RO) 32-bit 0x107E8 */ + MMU_STAGE2__ST2MMU__AS15__TRANSCFG, /* (RW) 64-bit 0x107F0 */ + MMU_STAGE2__ST2MMU__AS15__FAULTEXTRA, /* (RO) 64-bit 0x107F8 */ + NR_V6_0_REGS, +}; + +enum kbase_regmap_enum_v6_2 { + GPU_CONTROL__REVIDR = NR_V6_0_REGS, /* (RO) 32-bit 0x280 */ + GPU_CONTROL__STACK_PRESENT, /* (RO) 64-bit 0xE00 */ + GPU_CONTROL__STACK_PWROFF, /* (WO) 64-bit 0xE30 */ + GPU_CONTROL__STACK_PWRON, /* (WO) 64-bit 0xE20 */ + GPU_CONTROL__STACK_PWRTRANS, /* (RO) 64-bit 0xE40 */ + GPU_CONTROL__STACK_READY, /* (RO) 64-bit 0xE10 */ + NR_V6_2_REGS, +}; + +enum kbase_regmap_enum_v7_0 { + GPU_CONTROL__TEXTURE_FEATURES_3 = NR_V6_2_REGS, /* (RO) 32-bit 0xBC */ + NR_V7_0_REGS, +}; + +enum kbase_regmap_enum_v7_2 { + GPU_CONTROL__CORE_FEATURES = NR_V7_0_REGS, /* (RO) 32-bit 0x8 */ + GPU_CONTROL__THREAD_TLS_ALLOC, /* (RO) 32-bit 0x310 */ + NR_V7_2_REGS, +}; + +enum kbase_regmap_enum_v9_0 { + NR_V9_0_REGS = NR_V7_2_REGS, +}; + +/* + * V9_0_REMOVED_REGS: + * GPU_CONTROL__CORE_FEATURES + * GPU_CONTROL__THREAD_TLS_ALLOC + * JOB_CONTROL__JOB_IRQ_THROTTLE + */ + +enum kbase_regmap_enum_v9_2 { + GPU_CONTROL__L2_CONFIG = NR_V9_0_REGS, /* (RW) 32-bit 0x48 */ + NR_V9_2_REGS, +}; + +#endif /* _MALI_KBASE_REGMAP_JM_ENUMS_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h new file mode 100644 index 000000000000..1cdd215735eb --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h @@ -0,0 +1,297 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_REGMAP_JM_MACROS_H_ +#define _MALI_KBASE_REGMAP_JM_MACROS_H_ + +#if MALI_USE_CSF +#error "Cannot be compiled with CSF" +#endif + +#define ENUM_OFFSET(_index, _base, _next) (_base + _index * (_next - _base)) + +#define GPU_CONTROL_ENUM(regname) GPU_CONTROL__##regname +#define GPU_TEXTURE_FEATURES_ENUM(n) GPU_CONTROL_ENUM(TEXTURE_FEATURES_##n) +#define GPU_TEXTURE_FEATURES_OFFSET(n) (GPU_TEXTURE_FEATURES_ENUM(0) + n) +#define GPU_JS_FEATURES_ENUM(n) GPU_CONTROL_ENUM(JS##n##_FEATURES) +#define GPU_JS_FEATURES_OFFSET(n) (GPU_JS_FEATURES_ENUM(0) + n) + +#define JOB_CONTROL_ENUM(regname) JOB_CONTROL__##regname +#define JOB_SLOT_ENUM(n, regname) JOB_CONTROL_ENUM(JS##n##__##regname) +#define JOB_SLOT_BASE_ENUM(n) JOB_SLOT_ENUM(n, HEAD) +#define JOB_SLOT_OFFSET(n, regname) \ + ENUM_OFFSET(n, JOB_SLOT_ENUM(0, regname), JOB_SLOT_ENUM(1, regname)) +#define JOB_SLOT_BASE_OFFSET(n) JOB_SLOT_OFFSET(n, HEAD) + +#define MMU_CONTROL_ENUM(regname) MMU_STAGE1__ST1MMU__##regname +#define MMU_AS_ENUM(n, regname) MMU_CONTROL_ENUM(AS##n##__##regname) +#define MMU_AS_BASE_ENUM(n) MMU_AS_ENUM(n, TRANSTAB) +#define MMU_AS_OFFSET(n, regname) ENUM_OFFSET(n, MMU_AS_ENUM(0, regname), MMU_AS_ENUM(1, regname)) +#define MMU_AS_BASE_OFFSET(n) MMU_AS_OFFSET(n, TRANSTAB) + +/* register value macros */ +/* GPU_STATUS values */ +#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ + +/* PRFCNT_CONFIG register values */ +#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + +/* The performance counters are disabled. */ +#define PRFCNT_CONFIG_MODE_OFF 0 +/* The performance counters are enabled, but are only written out when a + * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. + */ +#define PRFCNT_CONFIG_MODE_MANUAL 1 +/* The performance counters are enabled, and are written out each time a tile + * finishes rendering. + */ +#define PRFCNT_CONFIG_MODE_TILE 2 + +/* + * Begin AARCH64 MMU TRANSTAB register values + */ +#define AS_TRANSTAB_BASE_SHIFT GPU_U(4) +#define AS_TRANSTAB_BASE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFF) << AS_TRANSTAB_BASE_SHIFT) +#define AS_TRANSTAB_BASE_GET(reg_val) (((reg_val)&AS_TRANSTAB_BASE_MASK) >> AS_TRANSTAB_BASE_SHIFT) +#define AS_TRANSTAB_BASE_SET(reg_val, value) \ + (~(~(reg_val) | AS_TRANSTAB_BASE_MASK) | \ + (((uint64_t)(value) << AS_TRANSTAB_BASE_SHIFT) & AS_TRANSTAB_BASE_MASK)) + +#define AS_FAULTSTATUS_EXCEPTION_TYPE_OK 0x0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_DONE 0x1 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_STOPPED 0x3 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TERMINATED 0x4 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_KABOOM 0x5 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_EUREKA 0x6 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ACTIVE 0x8 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_JOB_CONFIG_FAULT 0x40 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_JOB_POWER_FAULT 0x41 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_JOB_READ_FAULT 0x42 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_JOB_WRITE_FAULT 0x43 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_JOB_AFFINITY_FAULT 0x44 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_JOB_BUS_FAULT 0x48 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A +#define AS_FAULTSTATUS_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B +#define AS_FAULTSTATUS_EXCEPTION_TYPE_OUT_OF_MEMORY 0x60 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_UNKNOWN 0x7F +#define AS_FAULTSTATUS_EXCEPTION_TYPE_DELAYED_BUS_FAULT 0x80 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_1 0xC1 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_2 0xC2 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_3 0xC3 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_4 0xC4 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_IDENTITY 0xC7 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_0 0xC8 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_1 0xC9 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_2 0xCA +#define AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_3 0xCB +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_0 0xD0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_1 0xD1 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_2 0xD2 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_3 0xD3 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_0 0xD8 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_1 0xD9 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_2 0xDA +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_3 0xDB +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN0 0xE0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN1 0xE1 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN2 0xE2 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN3 0xE3 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT0 0xE4 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT1 0xE5 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT2 0xE6 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT3 0xE7 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0 0xE8 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1 0xE9 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2 0xEA +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3 0xEB +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_0 0xEC +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_1 0xED +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_2 0xEE +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_3 0xEF + +/* No JM-specific MMU control registers */ +/* No JM-specific MMU address space control registers */ + +/* JS_COMMAND register commands */ +#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ +#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ +#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ +#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ +#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ +#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + +#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ + +/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ +#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) +#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) +#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8) +#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) +#define JS_CONFIG_START_MMU (1u << 10) +#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) +#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION +#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) +#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) +#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) +#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) + +/* JS_XAFFINITY register values */ +#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) +#define JS_XAFFINITY_TILER_ENABLE (1u << 8) +#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) + +/* JS_STATUS register values */ + +/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. + * The values are separated to avoid dependency of userspace and kernel code. + */ + +/* Group of values representing the job status instead of a particular fault */ +#define JS_STATUS_NO_EXCEPTION_BASE 0x00 +#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ +#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ +#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ + +/* General fault values */ +#define JS_STATUS_FAULT_BASE 0x40 +#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ +#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ +#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ +#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ +#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ +#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ + +/* Instruction or data faults */ +#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 +#define JS_STATUS_INSTR_INVALID_PC \ + (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ +#define JS_STATUS_INSTR_INVALID_ENC \ + (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ +#define JS_STATUS_INSTR_TYPE_MISMATCH \ + (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ +#define JS_STATUS_INSTR_OPERAND_FAULT \ + (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ +#define JS_STATUS_INSTR_TLS_FAULT \ + (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ +#define JS_STATUS_INSTR_BARRIER_FAULT \ + (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ +#define JS_STATUS_INSTR_ALIGN_FAULT \ + (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ +/* NOTE: No fault with 0x57 code defined in spec. */ +#define JS_STATUS_DATA_INVALID_FAULT \ + (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ +#define JS_STATUS_TILE_RANGE_FAULT \ + (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ +#define JS_STATUS_ADDRESS_RANGE_FAULT \ + (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ + +/* Other faults */ +#define JS_STATUS_MEMORY_FAULT_BASE 0x60 +#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ +#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ + +/* JS_FEATURES register */ +#define JS_FEATURE_NULL_JOB (1u << 1) +#define JS_FEATURE_SET_VALUE_JOB (1u << 2) +#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) +#define JS_FEATURE_COMPUTE_JOB (1u << 4) +#define JS_FEATURE_VERTEX_JOB (1u << 5) +#define JS_FEATURE_GEOMETRY_JOB (1u << 6) +#define JS_FEATURE_TILER_JOB (1u << 7) +#define JS_FEATURE_FUSED_JOB (1u << 8) +#define JS_FEATURE_FRAGMENT_JOB (1u << 9) + +/* JM_CONFIG register */ +#define JM_TIMESTAMP_OVERRIDE (1ul << 0) +#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) +#define JM_JOB_THROTTLE_ENABLE (1ul << 2) +#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) +#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) +#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) + +/* GPU_COMMAND values */ +#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ +#define GPU_COMMAND_SOFT_RESET \ + 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ +#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ +#define GPU_COMMAND_PRFCNT_CLEAR \ + 0x03 /* Clear all performance counters, setting them all to zero. */ +#define GPU_COMMAND_PRFCNT_SAMPLE \ + 0x04 /* Sample all performance counters, writing them out to memory */ +#define GPU_COMMAND_CYCLE_COUNT_START \ + 0x05 /* Starts the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_STOP \ + 0x06 /* Stops the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ +#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ +#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ + +/* GPU_COMMAND cache flush alias to CSF command payload */ +#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES +#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES +#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES +#define GPU_COMMAND_CACHE_CLN_INV_LSC GPU_COMMAND_CLEAN_INV_CACHES + +/* Merge cache flush commands */ +#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) > (cmd2) ? (cmd1) : (cmd2)) + +/* IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ +#define FLUSH_PA_RANGE_COMPLETED \ + (1 << 20) /* Set when a physical range cache clean operation has completed. */ + +/* + * In Debug build, + * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable + * interrupts sources of GPU_IRQ by writing it onto GPU_IRQ_CLEAR/MASK registers. + * + * In Release build, + * GPU_IRQ_REG_COMMON is used. + * + * Note: + * CLEAN_CACHES_COMPLETED - Used separately for cache operation. + */ +#define GPU_IRQ_REG_COMMON \ + (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED | POWER_CHANGED_ALL | \ + PRFCNT_SAMPLE_COMPLETED) + +#endif /* _MALI_KBASE_REGMAP_JM_MACROS_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_legacy_csf.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_legacy_csf.h new file mode 100644 index 000000000000..0e3a56e9d664 --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_legacy_csf.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_REGMAP_LEGACY_CSF_H_ +#define _MALI_KBASE_REGMAP_LEGACY_CSF_H_ + +#if !MALI_USE_CSF && defined(__KERNEL__) +#error "Cannot be compiled with JM" +#endif +/* GPU control registers */ +#define MCU_CONTROL 0x700 + +/* GPU_CONTROL_MCU base address */ +#define GPU_CONTROL_MCU_BASE 0x3000 + +/* MCU_SUBSYSTEM base address */ +#define MCU_SUBSYSTEM_BASE 0x20000 + +/* IPA control registers */ +#define IPA_CONTROL_BASE 0x40000 +#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r)) + +#define COMMAND 0x000 /* (WO) Command register */ +#define STATUS 0x004 /* (RO) Status register */ +#define TIMER 0x008 /* (RW) Timer control register */ + +#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ +#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */ +#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */ +#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */ +#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */ +#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */ +#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */ +#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */ + +/* Accumulated counter values for CS hardware */ +#define VALUE_CSHW_BASE 0x100 +#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_CSHW_REG_HI(n) \ + (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +/* Accumulated counter values for memory system */ +#define VALUE_MEMSYS_BASE 0x140 +#define VALUE_MEMSYS_REG_LO(n) \ + (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_MEMSYS_REG_HI(n) \ + (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +#define VALUE_TILER_BASE 0x180 +#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_TILER_REG_HI(n) \ + (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +#define VALUE_SHADER_BASE 0x1C0 +#define VALUE_SHADER_REG_LO(n) \ + (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_SHADER_REG_HI(n) \ + (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +/* Configuration bits for the CSF. */ +#define CSF_CONFIG 0xF00 + +/* GPU control registers */ +#define CORE_FEATURES 0x008 /* () Shader Core Features */ +#define MCU_STATUS 0x704 + +#endif /* _MALI_KBASE_REGMAP_LEGACY_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_legacy_jm.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_legacy_jm.h new file mode 100644 index 000000000000..c1a54991f4c6 --- /dev/null +++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_legacy_jm.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_REGMAP_LEGACY_JM_H_ +#define _MALI_KBASE_REGMAP_LEGACY_JM_H_ + +#if MALI_USE_CSF && defined(__KERNEL__) +#error "Cannot be compiled with CSF" +#endif + +/* GPU control registers */ +#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ +#define JS_PRESENT 0x01C /* (RO) Job slots present */ +#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest clean-and-invalidate operation */ +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory region base address, low word */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory region base address, high word */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */ +#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable flags for Job Manager */ +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable flags for shader cores */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable flags for tiler */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable flags for MMU/L2 cache */ + +#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ +#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ +#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ +#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ +#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ +#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ +#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ +#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ +#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ +#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ +#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ +#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ +#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ +#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ +#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ +#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ + +#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) + +#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */ + +/* Job control registers */ +/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ +#define JOB_IRQ_JS_STATE 0x010 +/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS + * is NOT affected by this, just the delivery of the interrupt. + */ +#define JOB_IRQ_THROTTLE 0x014 + +#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ +#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) +#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ +#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ +#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ +#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ +#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ +#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ +#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ +#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ +#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ +#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ +#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ +#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ +#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ +#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ +#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ + +/* JM Job control register definitions for mali_kbase_debug_job_fault */ +#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ +#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ +#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ +#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ +#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ +#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ +#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ +#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/ +#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ +#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ +#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ +#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ +#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ +#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ +#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ +#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */ +#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ +#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ + +#endif /* _MALI_KBASE_REGMAP_LEGACY_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/Kbuild b/drivers/gpu/arm/bifrost/hwcnt/Kbuild index c1a381b24593..959c1f6134f1 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/Kbuild +++ b/drivers/gpu/arm/bifrost/hwcnt/Kbuild @@ -21,7 +21,6 @@ bifrost_kbase-y += \ hwcnt/mali_kbase_hwcnt.o \ hwcnt/mali_kbase_hwcnt_gpu.o \ - hwcnt/mali_kbase_hwcnt_gpu_narrow.o \ hwcnt/mali_kbase_hwcnt_types.o \ hwcnt/mali_kbase_hwcnt_virtualizer.o \ hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h index 6cfa6f5ee6f4..cc3ba98ab6fe 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -123,11 +123,21 @@ kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend, * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with * the backend. * @backend: Non-NULL pointer to backend. + * @dump_buffer: Pointer to an accumulated dump buffer to update or NULL. + * @enable_map: Pointer to enable map specifying enabled counters. Must be NULL if no @dump_buffer * * If the backend is already disabled, does nothing. - * Any undumped counter values since the last dump get will be lost. + * + * Any undumped counter values since the last dump get will be lost. However, Undumped block state + * can be retained by the backend. + * + * @dump_buffer and @enable_map gives the backend an opportunity to update an existing accumulated + * buffer with state information, and for the caller take ownership of it. In particular, the + * caller can use this when they require such information whilst the counter dumps are disabled. */ -typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend); +typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map); /** * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c index 4a429a6cd1ae..f23a5aacdbfd 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c @@ -172,15 +172,16 @@ struct kbase_hwcnt_backend_csf_info { /** * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout - * information. - * @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are - * sub-categorized into 4 classes: front-end, tiler, memory system, and shader. - * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt. + * information, as defined by the spec. * @fe_cnt: Front end block count. * @tiler_cnt: Tiler block count. * @mmu_l2_cnt: Memory system (MMU and L2 cache) block count. * @shader_cnt: Shader Core block count. - * @fw_block_cnt: Total number of firmware counters blocks. + * @fw_block_cnt: Total number of firmware counter blocks, with a single + * global FW block and a block per CSG. + * @hw_block_cnt: Total number of hardware counter blocks. The hw counters blocks are + * sub-categorized into 4 classes: front-end, tiler, memory system, and shader. + * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt. * @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt). * @shader_avail_mask: Bitmap of all shader cores in the system. * @enable_mask_offset: Offset in array elements of enable mask in each block @@ -190,12 +191,12 @@ struct kbase_hwcnt_backend_csf_info { * @values_per_block: For any block, the number of counters in total (header + payload). */ struct kbase_hwcnt_csf_physical_layout { - u8 hw_block_cnt; u8 fe_cnt; u8 tiler_cnt; u8 mmu_l2_cnt; u8 shader_cnt; u8 fw_block_cnt; + u8 hw_block_cnt; u8 block_cnt; u64 shader_avail_mask; size_t enable_mask_offset; @@ -220,6 +221,13 @@ struct kbase_hwcnt_csf_physical_layout { * @old_sample_buf: HWC sample buffer to save the previous values * for delta calculation, size * prfcnt_info.dump_bytes. + * @block_states: Pointer to array of block_state values for all + * blocks. + * @to_user_block_states: Block state buffer for client user. + * @accum_all_blk_stt: Block state to accumulate for all known blocks + * on next sample. + * @sampled_all_blk_stt: Block State to accumulate for all known blocks + * into the current sample. * @watchdog_last_seen_insert_idx: The insert index which watchdog has last * seen, to check any new firmware automatic * samples generated during the watchdog @@ -243,6 +251,8 @@ struct kbase_hwcnt_csf_physical_layout { * @hwc_dump_work: Worker to accumulate samples. * @hwc_threshold_work: Worker for consuming available samples when * threshold interrupt raised. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. */ struct kbase_hwcnt_backend_csf { struct kbase_hwcnt_backend_csf_info *info; @@ -253,6 +263,10 @@ struct kbase_hwcnt_backend_csf { u64 *to_user_buf; u64 *accum_buf; u32 *old_sample_buf; + blk_stt_t *block_states; + blk_stt_t *to_user_block_states; + blk_stt_t accum_all_blk_stt; + blk_stt_t sampled_all_blk_stt; u32 watchdog_last_seen_insert_idx; struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf; void *ring_buf_cpu_base; @@ -265,15 +279,45 @@ struct kbase_hwcnt_backend_csf { struct workqueue_struct *hwc_dump_workq; struct work_struct hwc_dump_work; struct work_struct hwc_threshold_work; + size_t num_l2_slices; + u64 shader_present_bitmap; }; static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info) { - WARN_ON(!csf_info); + if (WARN_ON(!csf_info)) + return false; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); return (csf_info->backend != NULL); } +void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, + size_t num_l2_slices, u64 shader_present_bitmap) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + + if (!iface) + return; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + /* Early out if the backend does not exist. */ + if (!csf_info || !csf_info->backend) + return; + + if (WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)) + return; + + if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) || + WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) != + shader_present_bitmap)) + return; + + csf_info->backend->num_l2_slices = num_l2_slices; + csf_info->backend->shader_present_bitmap = shader_present_bitmap; +} + /** * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count * tracking. @@ -352,8 +396,7 @@ kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_m /* Enable header if any counter is required from user, the header is * controlled by bit 0 of the enable mask. */ - if (phys_enable_map->fe_bm) - phys_enable_map->fe_bm |= 1; + phys_enable_map->fe_bm |= 1; if (phys_enable_map->tiler_bm) phys_enable_map->tiler_bm |= 1; @@ -363,6 +406,13 @@ kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_m if (phys_enable_map->shader_bm) phys_enable_map->shader_bm |= 1; + + if (phys_enable_map->fw_bm) + phys_enable_map->fw_bm |= 1; + + if (phys_enable_map->csg_bm) + phys_enable_map->csg_bm |= 1; + } static void kbasep_hwcnt_backend_csf_init_layout( @@ -371,32 +421,35 @@ static void kbasep_hwcnt_backend_csf_init_layout( { size_t shader_core_cnt; size_t values_per_block; - size_t fw_blocks_count; - size_t hw_blocks_count; + size_t fw_block_cnt; + size_t hw_block_cnt; + size_t core_cnt; + WARN_ON(!prfcnt_info); WARN_ON(!phys_layout); shader_core_cnt = fls64(prfcnt_info->core_mask); values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; - fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); - hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); + fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); + hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); + + core_cnt = shader_core_cnt; /* The number of hardware counters reported by the GPU matches the legacy guess-work we * have done in the past */ - WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT + - KBASE_HWCNT_V5_TILER_BLOCK_COUNT + - prfcnt_info->l2_count + shader_core_cnt); + WARN_ON(hw_block_cnt != KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + + prfcnt_info->l2_count + core_cnt); *phys_layout = (struct kbase_hwcnt_csf_physical_layout){ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, .mmu_l2_cnt = prfcnt_info->l2_count, .shader_cnt = shader_core_cnt, - .fw_block_cnt = fw_blocks_count, - .hw_block_cnt = hw_blocks_count, - .block_cnt = fw_blocks_count + hw_blocks_count, + .fw_block_cnt = fw_block_cnt, + .hw_block_cnt = hw_block_cnt, + .block_cnt = fw_block_cnt + hw_block_cnt, .shader_avail_mask = prfcnt_info->core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = values_per_block, @@ -409,10 +462,14 @@ static void kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf) { size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; + size_t block_state_bytes = backend_csf->phys_layout.block_cnt * + KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE; memset(backend_csf->to_user_buf, 0, user_buf_bytes); memset(backend_csf->accum_buf, 0, user_buf_bytes); memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); + memset(backend_csf->block_states, 0, block_state_bytes); + memset(backend_csf->to_user_block_states, 0, block_state_bytes); } static void @@ -450,38 +507,127 @@ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_cs static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf) { size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; + size_t block_state_bytes = backend_csf->phys_layout.block_cnt * + KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE; /* Copy the data into the sample and wait for the user to get it. */ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes); + memcpy(backend_csf->to_user_block_states, backend_csf->block_states, block_state_bytes); /* After copied data into user sample, clear the accumulator values to * prepare for the next accumulator, such as the next request or * threshold. */ memset(backend_csf->accum_buf, 0, user_buf_bytes); + memset(backend_csf->block_states, 0, block_state_bytes); +} + +/** + * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with + * information from a sample. + * @phys_layout: Physical memory layout information of HWC + * sample buffer. + * @enable_mask: Counter enable mask for the block whose state is being updated. + * @enable_state: The CSF backend internal enabled state. + * @exiting_protm: Whether or not the sample is taken when the GPU is exiting + * protected mode. + * @block_idx: Index of block within the ringbuffer. + * @block_state: Pointer to existing block state of the block whose state is being + * updated. + * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling. + */ +static void kbasep_hwcnt_backend_csf_update_block_state( + const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask, + enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm, + size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode) +{ + /* Offset of shader core blocks from the start of the HW blocks in the sample */ + size_t shader_core_block_offset = phys_layout->hw_block_cnt - phys_layout->shader_cnt; + bool is_shader_core_block; + + is_shader_core_block = block_idx >= shader_core_block_offset; + + /* Set power bits for the block state for the block, for the sample */ + switch (enable_state) { + /* Disabled states */ + case KBASE_HWCNT_BACKEND_CSF_DISABLED: + case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: + case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_OFF); + break; + /* Enabled states */ + case KBASE_HWCNT_BACKEND_CSF_ENABLED: + case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: + if (!is_shader_core_block) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_ON); + else if (!exiting_protm) { + /* When not exiting protected mode, a zero enable mask on a shader core + * counter block indicates the block was powered off for the sample, and + * a non-zero counter enable mask indicates the block was powered on for + * the sample. + */ + kbase_hwcnt_block_state_append(block_state, + (enable_mask ? KBASE_HWCNT_STATE_ON : + KBASE_HWCNT_STATE_OFF)); + } + break; + /* Error states */ + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: + default: + /* Do nothing */ + break; + } + + /* The following four cases apply to a block state in either normal mode or protected mode: + * 1. GPU executing in normal mode: Only set normal mode bit. + * 2. First sample request after GPU enters protected mode: Set both normal mode and + * protected mode bit. In this case, there will at least be one sample to accumulate + * in the ring buffer which was automatically triggered before GPU entered protected + * mode. + * 3. Subsequent sample requests while GPU remains in protected mode: Only set protected + * mode bit. In this case, the ring buffer should be empty and dump should return 0s but + * block state should be updated accordingly. This case is not handled here. + * 4. Samples requested after GPU exits protected mode: Set both protected mode and normal + * mode bits. + */ + if (exiting_protm || fw_in_protected_mode) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_PROTECTED | + KBASE_HWCNT_STATE_NORMAL); + else + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL); } static void kbasep_hwcnt_backend_csf_accumulate_sample( const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, - u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples) + u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, + blk_stt_t *const block_states, bool clearing_samples, + enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode) { size_t block_idx; const u32 *old_block = old_sample_buf; const u32 *new_block = new_sample_buf; u64 *acc_block = accum_buf; + /* Flag to indicate whether current sample is when exiting protected mode. */ + bool exiting_protm = false; const size_t values_per_block = phys_layout->values_per_block; - /* Performance counter blocks for firmware are stored before blocks for hardware. - * We skip over the firmware's performance counter blocks (counters dumping is not - * supported for firmware blocks, only hardware ones). + /* The block pointers now point to the first HW block, which is always a CSHW/front-end + * block. The counter enable mask for this block can be checked to determine whether this + * sample is taken after leaving protected mode - this is the only scenario where the CSHW + * block counter enable mask is all-zero. In this case, the values in this sample would not + * be meaningful, so they don't need to be accumulated. */ - old_block += values_per_block * phys_layout->fw_block_cnt; - new_block += values_per_block * phys_layout->fw_block_cnt; + exiting_protm = !new_block[phys_layout->enable_mask_offset]; - for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt; - block_idx++) { + for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset]; + /* Update block state with information of the current sample */ + kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask, + enable_state, exiting_protm, block_idx, + &block_states[block_idx], + fw_in_protected_mode); if (new_enable_mask == 0) { /* Hardware block was unavailable or we didn't turn on @@ -492,7 +638,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( * enabled. We need to update the accumulation buffer. */ size_t ctr_idx; - /* Unconditionally copy the headers. */ for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) { acc_block[ctr_idx] = new_block[ctr_idx]; @@ -518,7 +663,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( */ if (!clearing_samples) { if (old_enable_mask == 0) { - /* Hardware block was previously + /* Block was previously * unavailable. Accumulate the new * counters only, as we know previous * values are zeroes. @@ -545,15 +690,14 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( } } } + old_block += values_per_block; new_block += values_per_block; acc_block += values_per_block; } - WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); - WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) - - (values_per_block * phys_layout->fw_block_cnt)); + WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); (void)dump_bytes; } @@ -569,10 +713,23 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; u32 *old_sample_buf = backend_csf->old_sample_buf; u32 *new_sample_buf = old_sample_buf; + const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout; - if (extract_index_to_start == insert_index_to_stop) - /* No samples to accumulate. Early out. */ + if (extract_index_to_start == insert_index_to_stop) { + /* No samples to accumulate but block states need to be updated for dump. */ + size_t block_idx; + + for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { + /* Set protected mode bit for block state if GPU is in protected mode, + * otherwise set the normal mode bit. + */ + kbase_hwcnt_block_state_append(&backend_csf->block_states[block_idx], + backend_csf->info->fw_in_protected_mode ? + KBASE_HWCNT_STATE_PROTECTED : + KBASE_HWCNT_STATE_NORMAL); + } return; + } /* Sync all the buffers to CPU side before read the data. */ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, @@ -587,11 +744,10 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; - - kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout, - buf_dump_bytes, backend_csf->accum_buf, - old_sample_buf, new_sample_buf, - clearing_samples); + kbasep_hwcnt_backend_csf_accumulate_sample( + phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf, + new_sample_buf, backend_csf->block_states, clearing_samples, + backend_csf->enable_state, backend_csf->info->fw_in_protected_mode); old_sample_buf = new_sample_buf; } @@ -875,6 +1031,8 @@ kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *bac enable->shader_bm = phys_enable_map.shader_bm; enable->tiler_bm = phys_enable_map.tiler_bm; enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm; + enable->fw_bm = phys_enable_map.fw_bm; + enable->csg_bm = phys_enable_map.csg_bm; enable->counter_set = phys_counter_set; enable->clk_enable_map = enable_map->clk_enable_map; } @@ -893,6 +1051,17 @@ kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend, backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + /* Enabling counters is an indication that the power may have previously been off for all + * blocks. + * + * In any case, the counters would not have been counting recently, so an 'off' block state + * is an approximation for this. + * + * This will be transferred to the dump only after a dump_wait(), or dump_disable() in + * cases where the caller requested such information. This is to handle when a + * dump_enable() happens in between dump_wait() and dump_get(). + */ + kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF); kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable); /* enable_state should be DISABLED before we transfer it to enabled */ @@ -956,13 +1125,19 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( } /* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) +static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map) { unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; bool do_disable = false; - WARN_ON(!backend_csf); + if (WARN_ON(!backend_csf || + (dump_buffer && (backend_csf->info->metadata != dump_buffer->metadata)) || + (enable_map && (backend_csf->info->metadata != enable_map->metadata)) || + (dump_buffer && !enable_map))) + return; backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); @@ -1048,6 +1223,42 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba * for next enable. */ kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); + + /* Disabling HWCNT is an indication that blocks have been powered off. This is important to + * know for L2, CSHW, and Tiler blocks, as this is currently the only way a backend can + * know if they are being powered off. + * + * In any case, even if they weren't really powered off, we won't be counting whilst + * disabled. + * + * Update the block state information in the block state accumulator to show this, so that + * in the next dump blocks will have been seen as powered off for some of the time. + */ + kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF); + + if (dump_buffer) { + /* In some use-cases, the caller will need the information whilst the counters are + * disabled, but will not be able to call into the backend to dump them. Instead, + * they have an opportunity here to request them to be accumulated into their + * buffer immediately. + * + * This consists of taking a sample of the accumulated block state (as though a + * real dump_get() had happened), then transfer ownership of that to the caller + * (i.e. erasing our copy of it). + */ + kbase_hwcnt_block_state_accumulate(&backend_csf->sampled_all_blk_stt, + &backend_csf->accum_all_blk_stt); + kbase_hwcnt_dump_buffer_block_state_update(dump_buffer, enable_map, + backend_csf->sampled_all_blk_stt); + /* Now the block state has been passed out into the caller's own accumulation + * buffer, clear our own accumulated and sampled block state - ownership has been + * transferred. + */ + kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + } } /* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ @@ -1183,6 +1394,16 @@ static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backen backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + /* Now that we've completed a sample, also sample+clear the accumulated block state. + * + * This is to ensure that a dump_enable() that happens in between dump_wait() and + * dump_get() is reported on the _next_ dump, not the _current_ dump. That is, the block + * state is reported at the actual time that counters are being sampled. + */ + kbase_hwcnt_block_state_accumulate(&backend_csf->sampled_all_blk_stt, + &backend_csf->accum_all_blk_stt); + kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + return errcode; } @@ -1238,7 +1459,20 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend * as it is undefined to call this function without a prior succeeding * one to dump_wait(). */ - ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate); + ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, + backend_csf->to_user_block_states, dst_enable_map, + backend_csf->num_l2_slices, + backend_csf->shader_present_bitmap, accumulate); + + /* If no error occurred (zero ret value), then update block state for all blocks in the + * accumulation with the current sample's block state. + */ + if (!ret) { + kbase_hwcnt_dump_buffer_block_state_update(dst, dst_enable_map, + backend_csf->sampled_all_blk_stt); + kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + } return ret; } @@ -1269,6 +1503,12 @@ static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *bac kfree(backend_csf->to_user_buf); backend_csf->to_user_buf = NULL; + kfree(backend_csf->block_states); + backend_csf->block_states = NULL; + + kfree(backend_csf->to_user_block_states); + backend_csf->to_user_block_states = NULL; + kfree(backend_csf); } @@ -1285,6 +1525,7 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info * { struct kbase_hwcnt_backend_csf *backend_csf = NULL; int errcode = -ENOMEM; + size_t block_state_bytes; WARN_ON(!csf_info); WARN_ON(!out_backend); @@ -1308,6 +1549,17 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info * if (!backend_csf->to_user_buf) goto err_alloc_user_sample_buf; + /* Allocate space to store block state values for each block */ + block_state_bytes = backend_csf->phys_layout.block_cnt * KBASE_HWCNT_BLOCK_STATE_BYTES * + KBASE_HWCNT_BLOCK_STATE_STRIDE; + backend_csf->block_states = kzalloc(block_state_bytes, GFP_KERNEL); + if (!backend_csf->block_states) + goto err_alloc_block_states_buf; + + backend_csf->to_user_block_states = kzalloc(block_state_bytes, GFP_KERNEL); + if (!backend_csf->to_user_block_states) + goto err_alloc_user_block_state_buf; + errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt, &backend_csf->ring_buf_cpu_base, &backend_csf->ring_buf); @@ -1343,6 +1595,8 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info * complete_all(&backend_csf->dump_completed); backend_csf->user_requested = false; backend_csf->watchdog_last_seen_insert_idx = 0; + kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); *out_backend = backend_csf; return 0; @@ -1351,6 +1605,12 @@ err_alloc_workqueue: backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, backend_csf->ring_buf); err_ring_buf_alloc: + kfree(backend_csf->to_user_block_states); + backend_csf->to_user_block_states = NULL; +err_alloc_user_block_state_buf: + kfree(backend_csf->block_states); + backend_csf->block_states = NULL; +err_alloc_block_states_buf: kfree(backend_csf->to_user_buf); backend_csf->to_user_buf = NULL; err_alloc_user_sample_buf: @@ -1417,7 +1677,7 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) if (!backend) return; - kbasep_hwcnt_backend_csf_dump_disable(backend); + kbasep_hwcnt_backend_csf_dump_disable(backend, NULL, NULL); /* Set the backend in csf_info to NULL so we won't handle any external * notification anymore since we are terminating. @@ -1828,7 +2088,21 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface * if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) return -EIO; + /* We should reject initializing the metadata for any malformed + * firmware size. The legitimate firmware sizes are as follows: + * 1. fw_size == 0 on older GPUs + * 2. fw_size == block_size on GPUs that support FW counters but not CSG counters + * 3. fw_size == (1 + #CSG) * block size on GPUs that support CSG counters + */ + if ((csf_info->prfcnt_info.prfcnt_fw_size != 0) && + (csf_info->prfcnt_info.prfcnt_fw_size != csf_info->prfcnt_info.prfcnt_block_size) && + (csf_info->prfcnt_info.prfcnt_fw_size != + ((csf_info->prfcnt_info.csg_count + 1) * csf_info->prfcnt_info.prfcnt_block_size))) + return -EINVAL; + + gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0; gpu_info.l2_count = csf_info->prfcnt_info.l2_count; + gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count; gpu_info.core_mask = csf_info->prfcnt_info.core_mask; gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; gpu_info.prfcnt_values_per_block = diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h index 9c5a5c996ebd..da78c1f76aae 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -114,6 +114,19 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_i */ void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface); +/** + * kbase_hwcnt_backend_csf_set_hw_availability() - CSF HWC backend function to + * set current HW configuration. + * HWC must be disabled before + * this function is called. + * @iface: Non-NULL pointer to HWC backend interface. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. + */ +void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, + size_t num_l2_slices, + uint64_t shader_present_bitmap); + /** * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample * complete interrupt handler. diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h index 382a3adaa127..65bb965bcf9c 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,6 +39,8 @@ struct kbase_hwcnt_backend_csf_if_ring_buf; * @shader_bm: Shader counters selection bitmask. * @tiler_bm: Tiler counters selection bitmask. * @mmu_l2_bm: MMU_L2 counters selection bitmask. + * @fw_bm: FW counters selection bitmask + * @csg_bm: FW CSG counters selection bitmask. * @counter_set: The performance counter set to enable. * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle * counter for a given clock domain. @@ -48,6 +50,8 @@ struct kbase_hwcnt_backend_csf_if_enable { u32 shader_bm; u32 tiler_bm; u32 mmu_l2_bm; + u32 fw_bm; + u32 csg_bm; u8 counter_set; u64 clk_enable_map; }; @@ -63,6 +67,7 @@ struct kbase_hwcnt_backend_csf_if_enable { * counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size. * @prfcnt_block_size: Bytes of each performance counter block. * @l2_count: The MMU L2 cache count. + * @csg_count: The total number of CSGs in the system * @core_mask: Shader core mask. * @clk_cnt: Clock domain count in the system. * @clearing_samples: Indicates whether counters are cleared after each sample @@ -74,6 +79,7 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info { size_t dump_bytes; size_t prfcnt_block_size; size_t l2_count; + u32 csg_count; u64 core_mask; u8 clk_cnt; bool clearing_samples; @@ -85,8 +91,8 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info { * held. * @ctx: Non-NULL pointer to a CSF context. */ -typedef void -kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef void (*kbase_hwcnt_backend_csf_if_assert_lock_held_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock. @@ -95,8 +101,8 @@ kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if * @flags: Pointer to the memory location that would store the previous * interrupt state. */ -typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - unsigned long *flags); +typedef void (*kbase_hwcnt_backend_csf_if_lock_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long *flags); /** * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock. @@ -105,8 +111,8 @@ typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_i * @flags: Previously stored interrupt state when Scheduler interrupt * spinlock was acquired. */ -typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - unsigned long flags); +typedef void (*kbase_hwcnt_backend_csf_if_unlock_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long flags); /** * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance @@ -115,7 +121,7 @@ typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf * @prfcnt_info: Non-NULL pointer to struct where performance counter * information should be stored. */ -typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn( +typedef void (*kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info); @@ -135,10 +141,9 @@ typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn( * * Return: 0 on success, else error code. */ -typedef int -kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - u32 buf_count, void **cpu_dump_base, - struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); +typedef int (*kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base, + struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); /** * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers @@ -157,10 +162,10 @@ kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_c * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU * are correctly observed. */ -typedef void -kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - u32 buf_index_first, u32 buf_index_last, bool for_cpu); +typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_sync_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, u32 buf_index_first, + u32 buf_index_last, bool for_cpu); /** * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for @@ -169,9 +174,9 @@ kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ct * @ctx: Non-NULL pointer to a CSF interface context. * @ring_buf: Non-NULL pointer to the ring buffer which to be freed. */ -typedef void -kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); +typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_free_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); /** * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current @@ -181,7 +186,8 @@ kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ct * * Return: CSF interface timestamp in nanoseconds. */ -typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef u64 (*kbase_hwcnt_backend_csf_if_timestamp_ns_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware @@ -192,10 +198,10 @@ typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backen * * Requires lock to be taken before calling. */ -typedef void -kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - struct kbase_hwcnt_backend_csf_if_enable *enable); +typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + struct kbase_hwcnt_backend_csf_if_enable *enable); /** * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter @@ -204,7 +210,8 @@ kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef void (*kbase_hwcnt_backend_csf_if_dump_disable_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump. @@ -213,7 +220,8 @@ typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backe * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef void (*kbase_hwcnt_backend_csf_if_dump_request_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and @@ -226,8 +234,8 @@ typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backe * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - u32 *extract_index, u32 *insert_index); +typedef void (*kbase_hwcnt_backend_csf_if_get_indexes_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, u32 *insert_index); /** * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract @@ -239,9 +247,8 @@ typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backen * * Requires lock to be taken before calling. */ -typedef void -kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - u32 extract_index); +typedef void (*kbase_hwcnt_backend_csf_if_set_extract_index_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index); /** * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current @@ -255,9 +262,8 @@ kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_i * * Requires lock to be taken before calling. */ -typedef void -kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - u64 *cycle_counts, u64 clk_enable_map); +typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, u64 clk_enable_map); /** * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual @@ -283,20 +289,20 @@ kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf */ struct kbase_hwcnt_backend_csf_if { struct kbase_hwcnt_backend_csf_if_ctx *ctx; - kbase_hwcnt_backend_csf_if_assert_lock_held_fn *assert_lock_held; - kbase_hwcnt_backend_csf_if_lock_fn *lock; - kbase_hwcnt_backend_csf_if_unlock_fn *unlock; - kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn *get_prfcnt_info; - kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn *ring_buf_alloc; - kbase_hwcnt_backend_csf_if_ring_buf_sync_fn *ring_buf_sync; - kbase_hwcnt_backend_csf_if_ring_buf_free_fn *ring_buf_free; - kbase_hwcnt_backend_csf_if_timestamp_ns_fn *timestamp_ns; - kbase_hwcnt_backend_csf_if_dump_enable_fn *dump_enable; - kbase_hwcnt_backend_csf_if_dump_disable_fn *dump_disable; - kbase_hwcnt_backend_csf_if_dump_request_fn *dump_request; - kbase_hwcnt_backend_csf_if_get_indexes_fn *get_indexes; - kbase_hwcnt_backend_csf_if_set_extract_index_fn *set_extract_index; - kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn *get_gpu_cycle_count; + kbase_hwcnt_backend_csf_if_assert_lock_held_fn assert_lock_held; + kbase_hwcnt_backend_csf_if_lock_fn lock; + kbase_hwcnt_backend_csf_if_unlock_fn unlock; + kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn get_prfcnt_info; + kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn ring_buf_alloc; + kbase_hwcnt_backend_csf_if_ring_buf_sync_fn ring_buf_sync; + kbase_hwcnt_backend_csf_if_ring_buf_free_fn ring_buf_free; + kbase_hwcnt_backend_csf_if_timestamp_ns_fn timestamp_ns; + kbase_hwcnt_backend_csf_if_dump_enable_fn dump_enable; + kbase_hwcnt_backend_csf_if_dump_disable_fn dump_disable; + kbase_hwcnt_backend_csf_if_dump_request_fn dump_request; + kbase_hwcnt_backend_csf_if_get_indexes_fn get_indexes; + kbase_hwcnt_backend_csf_if_set_extract_index_fn set_extract_index; + kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn get_gpu_cycle_count; }; #endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index b11f3a4e50e2..a44651949abf 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ */ #include -#include +#include #include #include "hwcnt/mali_kbase_hwcnt_gpu.h" #include "hwcnt/mali_kbase_hwcnt_types.h" @@ -39,7 +39,6 @@ #include #include "mali_kbase_ccswe.h" - /* Ring buffer virtual address start at 4GB */ #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) @@ -206,6 +205,20 @@ kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_c kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener); } +#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +/** + * kbasep_hwcnt_backend_csf_core_mask() - Obtain Core Mask - MAX Core ID + * + * @gpu_props: gpu_props structure + * + * Return: calculated core mask (maximum Core ID) + */ +static u64 kbasep_hwcnt_backend_csf_core_mask(struct kbase_gpu_props *gpu_props) +{ + return gpu_props->coherency_info.group.core_mask; +} +#endif + static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) @@ -234,6 +247,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( u32 prfcnt_size; u32 prfcnt_hw_size; u32 prfcnt_fw_size; + u32 csg_count; + u32 fw_block_count = 0; u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES; @@ -242,28 +257,41 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; kbdev = fw_ctx->kbdev; + csg_count = kbdev->csf.global_iface.group_num; prfcnt_size = kbdev->csf.global_iface.prfcnt_size; prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size); prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size); - fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; /* Read the block size if the GPU has the register PRFCNT_FEATURES * which was introduced in architecture version 11.x.7. */ - if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >= - GPU_ID2_PRODUCT_TTUX) { - prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET( - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES))) + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(PRFCNT_FEATURES))) { + prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(KBASE_REG_READ( + kbdev, GPU_CONTROL_ENUM(PRFCNT_FEATURES))) << 8; } + /* Extra sanity check to ensure that we support two different configurations: + * a global FW block without CSG blocks and a global FW block with CSG blocks. + */ + if (!prfcnt_fw_size) + fw_block_count = 0; + else if (prfcnt_fw_size == prfcnt_block_size) + fw_block_count = 1; + else if (prfcnt_fw_size == ((1 + csg_count) * prfcnt_block_size)) + fw_block_count = 1 + csg_count; + else + WARN_ON_ONCE(true); + + fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ .prfcnt_hw_size = prfcnt_hw_size, .prfcnt_fw_size = prfcnt_fw_size, .dump_bytes = fw_ctx->buf_bytes, .prfcnt_block_size = prfcnt_block_size, - .l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices, - .core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask, + .l2_count = kbdev->gpu_props.num_l2_slices, + .core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props), + .csg_count = fw_block_count > 1 ? csg_count : 0, .clk_cnt = fw_ctx->clk_cnt, .clearing_samples = true, }; @@ -284,7 +312,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( struct page **page_list; void *cpu_addr; int ret; - int i; + size_t i; size_t num_pages; u64 flags; struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf; @@ -330,7 +358,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( /* Get physical page for the buffer */ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys, false, NULL); - if (ret != num_pages) + if ((size_t)ret != num_pages) goto phys_mem_pool_alloc_error; /* Get the CPU virtual address */ @@ -342,12 +370,12 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( goto vmap_error; flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_NON_CACHEABLE); /* Update MMU table */ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, - mmu_sync_info, NULL, false); + mmu_sync_info, NULL); if (ret) goto mmu_insert_failed; @@ -480,10 +508,10 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c if (fw_ring_buf->phys) { u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; - WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, - gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys, - fw_ring_buf->num_pages, fw_ring_buf->num_pages, - MCU_AS_NR, true)); + WARN_ON(kbase_mmu_teardown_firmware_pages( + fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, + fw_ring_buf->phys, fw_ring_buf->num_pages, fw_ring_buf->num_pages, + MCU_AS_NR)); vunmap(fw_ring_buf->cpu_dump_base); @@ -508,6 +536,7 @@ kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; + u32 max_csg_slots; WARN_ON(!ctx); WARN_ON(!ring_buf); @@ -516,6 +545,7 @@ kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; + max_csg_slots = kbdev->csf.global_iface.group_num; /* Configure */ prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count); @@ -536,6 +566,12 @@ kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_FW_EN, enable->fw_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSG_EN, enable->csg_bm); + + /* Enable all of the CSGs by default. */ + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSG_SELECT, max_csg_slots); + /* Configure the HWC set and buffer size */ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config); diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c index 669701c29152..5156706bdf2b 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -89,6 +89,10 @@ struct kbase_hwcnt_jm_physical_layout { * @to_user_buf: HWC sample buffer for client user, size * metadata.dump_buf_bytes. * @enabled: True if dumping has been enabled, else false. + * @accum_all_blk_stt: Block State to accumulate on next sample, for all types + * of block. + * @sampled_all_blk_stt: Block State to accumulate into the current sample, for + * all types of block. * @pm_core_mask: PM state sync-ed shaders core mask for the enabled * dumping. * @curr_config: Current allocated hardware resources to correctly map the @@ -113,6 +117,8 @@ struct kbase_hwcnt_backend_jm { struct kbase_vmap_struct *vmap; u64 *to_user_buf; bool enabled; + blk_stt_t accum_all_blk_stt; + blk_stt_t sampled_all_blk_stt; u64 pm_core_mask; struct kbase_hwcnt_curr_config curr_config; u64 clk_enable_map; @@ -136,26 +142,22 @@ struct kbase_hwcnt_backend_jm { static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, struct kbase_hwcnt_gpu_info *info) { - size_t clk; + size_t clk, l2_count, core_mask; if (!kbdev || !info) return -EINVAL; #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; - info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; - info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; -#else /* CONFIG_MALI_BIFROST_NO_MALI */ - { - const struct base_gpu_props *props = &kbdev->gpu_props.props; - const size_t l2_count = props->l2_props.num_l2_slices; - const size_t core_mask = props->coherency_info.group[0].core_mask; + l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; +#else + l2_count = kbdev->gpu_props.num_l2_slices; + core_mask = kbdev->gpu_props.coherency_info.group.core_mask; +#endif - info->l2_count = l2_count; - info->core_mask = core_mask; - info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; - } -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ + info->l2_count = l2_count; + info->core_mask = core_mask; + info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; /* Determine the number of available clock domains. */ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { @@ -398,6 +400,17 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); backend_jm->enabled = true; + /* Enabling counters is an indication that the power may have previously been off for all + * blocks. + * + * In any case, the counters would not have been counting recently, so an 'off' block state + * is an approximation for this. + * + * This will be transferred to the dump only after a dump_wait(), or dump_disable() in + * cases where the caller requested such information. This is to handle when a + * dump_enable() happens in between dump_wait() and dump_get(). + */ + kbase_hwcnt_block_state_append(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF); kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); @@ -430,12 +443,20 @@ static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backe } /* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend) +static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map) { int errcode; struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; - if (WARN_ON(!backend_jm) || !backend_jm->enabled) + if (WARN_ON(!backend_jm || + (dump_buffer && (backend_jm->info->metadata != dump_buffer->metadata)) || + (enable_map && (backend_jm->info->metadata != enable_map->metadata)) || + (dump_buffer && !enable_map))) + return; + /* No WARN needed here, but still return early if backend is already disabled */ + if (!backend_jm->enabled) return; kbasep_hwcnt_backend_jm_cc_disable(backend_jm); @@ -443,6 +464,42 @@ static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *bac errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx); WARN_ON(errcode); + /* Disabling HWCNT is an indication that blocks have been powered off. This is important to + * know for L2 and Tiler blocks, as this is currently the only way a backend can know if + * they are being powered off. + * + * In any case, even if they weren't really powered off, we won't be counting whilst + * disabled. + * + * Update the block state information in the block state accumulator to show this, so that + * in the next dump blocks will have been seen as powered off for some of the time. + */ + kbase_hwcnt_block_state_append(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF); + + if (dump_buffer) { + /* In some use-cases, the caller will need the information whilst the counters are + * disabled, but will not be able to call into the backend to dump them. Instead, + * they have an opportunity here to request them to be accumulated into their + * buffer immediately. + * + * This consists of taking a sample of the accumulated block state (as though a + * real dump_get() had happened), then transfer ownership of that to the caller + * (i.e. erasing our copy of it). + */ + kbase_hwcnt_block_state_accumulate(&backend_jm->sampled_all_blk_stt, + &backend_jm->accum_all_blk_stt); + kbase_hwcnt_dump_buffer_block_state_update(dump_buffer, enable_map, + backend_jm->sampled_all_blk_stt); + /* Now the block state has been passed out into the caller's own accumulation + * buffer, clear our own accumulated and sampled block state - ownership has been + * transferred. + */ + kbase_hwcnt_block_state_set(&backend_jm->sampled_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + kbase_hwcnt_block_state_set(&backend_jm->accum_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + } + backend_jm->enabled = false; } @@ -514,12 +571,27 @@ static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *back /* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */ static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend) { + int errcode; struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; if (!backend_jm || !backend_jm->enabled) return -EINVAL; - return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); + errcode = kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); + if (errcode) + return errcode; + + /* Now that we've completed a sample, also sample+clear the accumulated block state. + * + * This is to ensure that a dump_enable() that happens in between dump_wait() and + * dump_get() is reported on the _next_ dump, not the _current_ dump. That is, the block + * state is reported at the actual time that counters are being sampled. + */ + kbase_hwcnt_block_state_accumulate(&backend_jm->sampled_all_blk_stt, + &backend_jm->accum_all_blk_stt); + kbase_hwcnt_block_state_set(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + + return errcode; } /* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */ @@ -533,8 +605,8 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) struct kbase_device *kbdev; unsigned long flags; - int errcode; #endif /* CONFIG_MALI_BIFROST_NO_MALI */ + int errcode; if (!backend_jm || !dst || !dst_enable_map || (backend_jm->info->metadata != dst->metadata) || @@ -572,9 +644,17 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, if (errcode) return errcode; #endif /* CONFIG_MALI_BIFROST_NO_MALI */ - return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, - backend_jm->pm_core_mask, &backend_jm->curr_config, - accumulate); + errcode = kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, + backend_jm->pm_core_mask, &backend_jm->curr_config, + accumulate); + + if (errcode) + return errcode; + + kbase_hwcnt_dump_buffer_block_state_update(dst, dst_enable_map, + backend_jm->sampled_all_blk_stt); + kbase_hwcnt_block_state_set(&backend_jm->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + return errcode; } /** @@ -705,6 +785,8 @@ static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_in kbase_ccswe_init(&backend->ccswe_shader_cores); backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; + kbase_hwcnt_block_state_set(&backend->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + kbase_hwcnt_block_state_set(&backend->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); *out_backend = backend; return 0; @@ -752,7 +834,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) if (!backend) return; - kbasep_hwcnt_backend_jm_dump_disable(backend); + kbasep_hwcnt_backend_jm_dump_disable(backend, NULL, NULL); kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend); } diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c index 564700b2d978..cf2a2e65bc25 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -268,9 +268,9 @@ kbasep_hwcnt_backend_jm_watchdog_info_create(struct kbase_hwcnt_backend_interfac if (!info) return NULL; - *info = (struct kbase_hwcnt_backend_jm_watchdog_info){ .jm_backend_iface = backend_iface, - .dump_watchdog_iface = - watchdog_iface }; + *info = (struct kbase_hwcnt_backend_jm_watchdog_info){ + .jm_backend_iface = backend_iface, .dump_watchdog_iface = watchdog_iface + }; return info; } @@ -443,7 +443,8 @@ static int kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); } else /*Reverting the job manager backend back to disabled*/ - wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend); + wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend, + NULL, NULL); } return errcode; @@ -472,7 +473,10 @@ kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock(struct kbase_hwcnt_backend * } /* Job manager watchdog backend, implementation of dump_disable */ -static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend) +static void +kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *buf_enable_map) { struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; unsigned long flags; @@ -497,7 +501,8 @@ static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_bac wd_backend->info->dump_watchdog_iface->disable( wd_backend->info->dump_watchdog_iface->timer); - wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend); + wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend, dump_buffer, + buf_enable_map); } /* Job manager watchdog backend, implementation of dump_clear */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c index 34deb5d9e3fc..8b1de2e1cdaf 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c @@ -292,7 +292,8 @@ static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, b accum->accumulated = true; disable: - hctx->iface->dump_disable(accum->backend); + hctx->iface->dump_disable(accum->backend, (accum->accumulated) ? &accum->accum_buf : NULL, + &accum->enable_map); /* Regardless of any errors during the accumulate, put the accumulator * in the disabled state. @@ -453,8 +454,20 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 * */ if ((state == ACCUM_STATE_ENABLED) && new_map) { /* Backend is only enabled if there were any enabled counters */ - if (cur_map_any_enabled) - hctx->iface->dump_disable(accum->backend); + if (cur_map_any_enabled) { + /* In this case we do *not* want to have the buffer updated with extra + * block state, it should instead remain in the backend until the next dump + * happens, hence supplying NULL as the dump_buffer parameter here. + * + * Attempting to take ownership of backend-accumulated block state at this + * point will instead give inaccurate information. For example the dump + * buffer for 'set_counters' operation might be dumping a period that + * should've been entirely in the 'ON' state, but would report it as + * partially in the 'OFF' state. Instead, that 'OFF' state should be + * reported in the _next_ dump. + */ + hctx->iface->dump_disable(accum->backend, NULL, NULL); + } /* (Re-)enable the backend if the new map has enabled counters. * No need to acquire the spinlock, as concurrent enable while @@ -481,9 +494,15 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 * /* If we've not written anything into the dump buffer so far, it * means there was nothing to write. Zero any enabled counters. + * + * In this state, the blocks are likely to be off (and at the very least, not + * counting), so write in the 'off' block state */ - if (!dump_written) + if (!dump_written) { kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map); + kbase_hwcnt_dump_buffer_block_state_update(dump_buf, cur_map, + KBASE_HWCNT_STATE_OFF); + } } /* Write out timestamps */ @@ -498,8 +517,13 @@ error: /* An error was only physically possible if the backend was enabled */ WARN_ON(state != ACCUM_STATE_ENABLED); - /* Disable the backend, and transition to the error state */ - hctx->iface->dump_disable(accum->backend); + /* Disable the backend, and transition to the error state. In this case, we can try to save + * the block state into the accumulated buffer, but there's no guarantee we'll have one, so + * this is more of a 'best effort' for error cases. There would be an suitable block + * state recorded on the next dump_enable() anyway. + */ + hctx->iface->dump_disable(accum->backend, (accum->accumulated) ? &accum->accum_buf : NULL, + cur_map); spin_lock_irqsave(&hctx->state_lock, flags); accum->state = ACCUM_STATE_ERROR; diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c index 74916dab060d..875643654627 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,10 +19,11 @@ * */ +#include #include "hwcnt/mali_kbase_hwcnt_gpu.h" -#include "hwcnt/mali_kbase_hwcnt_types.h" #include +#include /** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements */ @@ -32,78 +33,107 @@ enum enable_map_idx { EM_COUNT, }; -static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) +static enum kbase_hwcnt_gpu_v5_block_type kbasep_get_fe_block_type(enum kbase_hwcnt_set counter_set, + bool is_csf) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE; case KBASE_HWCNT_SET_SECONDARY: if (is_csf) - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; else - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; case KBASE_HWCNT_SET_TERTIARY: if (is_csf) - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; else - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; default: - WARN_ON(true); + WARN(true, "Invalid counter set for FE block type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; } } -static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) +static enum kbase_hwcnt_gpu_v5_block_type +kbasep_get_tiler_block_type(enum kbase_hwcnt_set counter_set) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; case KBASE_HWCNT_SET_SECONDARY: case KBASE_HWCNT_SET_TERTIARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED; default: - WARN_ON(true); + WARN(true, "Invalid counter set for tiler block type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED; } } -static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) +static enum kbase_hwcnt_gpu_v5_block_type kbasep_get_sc_block_type(enum kbase_hwcnt_set counter_set, + bool is_csf) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; case KBASE_HWCNT_SET_SECONDARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2; case KBASE_HWCNT_SET_TERTIARY: if (is_csf) - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; else - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED; default: - WARN_ON(true); + WARN(true, "Invalid counter set for shader core block type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED; } } -static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) + +static enum kbase_hwcnt_gpu_v5_block_type +kbasep_get_memsys_block_type(enum kbase_hwcnt_set counter_set) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; case KBASE_HWCNT_SET_SECONDARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; case KBASE_HWCNT_SET_TERTIARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED; default: - WARN_ON(true); + WARN(true, "Invalid counter set for Memsys block type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED; + } +} + +static enum kbase_hwcnt_gpu_v5_block_type kbasep_get_fw_block_type(enum kbase_hwcnt_set counter_set) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW; + case KBASE_HWCNT_SET_SECONDARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2; + case KBASE_HWCNT_SET_TERTIARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3; + default: + WARN(true, "Invalid counter set for FW type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED; + } +} + +static enum kbase_hwcnt_gpu_v5_block_type +kbasep_get_csg_block_type(enum kbase_hwcnt_set counter_set) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG; + case KBASE_HWCNT_SET_SECONDARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2; + case KBASE_HWCNT_SET_TERTIARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3; + default: + WARN(true, "Invalid counter set for CSG type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED; } } @@ -124,49 +154,89 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu const struct kbase_hwcnt_metadata **metadata) { struct kbase_hwcnt_description desc; - struct kbase_hwcnt_group_description group; - struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; - size_t non_sc_block_count; + struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT] = {}; + size_t non_core_block_count; + size_t core_block_count; size_t sc_block_count; + size_t blk_idx = 0; - WARN_ON(!gpu_info); - WARN_ON(!metadata); - - /* Calculate number of block instances that aren't shader cores */ - non_sc_block_count = 2 + gpu_info->l2_count; - /* Calculate number of block instances that are shader cores */ - sc_block_count = fls64(gpu_info->core_mask); - - /* - * A system can have up to 64 shader cores, but the 64-bit - * availability mask can't physically represent that many cores as well - * as the other hardware blocks. - * Error out if there are more blocks than our implementation can - * support. - */ - if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) + if (WARN_ON(!gpu_info)) return -EINVAL; - /* One Front End block */ - kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf); - blks[0].inst_cnt = 1; - blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + if (WARN_ON(!metadata)) + return -EINVAL; - /* One Tiler block */ - kbasep_get_tiler_block_type(&blks[1].type, counter_set); - blks[1].inst_cnt = 1; - blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + /* Calculate number of block instances that aren't cores */ + non_core_block_count = 2 + gpu_info->l2_count; + /* Calculate number of block instances that are shader cores */ + sc_block_count = fls64(gpu_info->core_mask); + /* Determine the total number of cores */ + core_block_count = sc_block_count; - /* l2_count memsys blks */ - kbasep_get_memsys_block_type(&blks[2].type, counter_set); - blks[2].inst_cnt = gpu_info->l2_count; - blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + + if (gpu_info->has_fw_counters) + non_core_block_count += 1 + gpu_info->csg_cnt; /* - * There are as many shader cores in the system as there are bits set in + * Check we have enough bits to represent the number of cores that + * exist in the system. Error-out if there are more blocks than our implementation can + * support. + */ + if ((core_block_count + non_core_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) + return -EINVAL; + + /* The dump starts with, on supporting systems, the FW blocks, and as such, + * they should be taken into account first. + */ + if (gpu_info->has_fw_counters) { + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_fw_block_type(counter_set), + .inst_cnt = 1, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - + KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; + } + + /* Some systems may support FW counters but not CSG counters, so the + * two are handled differently. + */ + if (gpu_info->csg_cnt > 0) { + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_csg_block_type(counter_set), + .inst_cnt = gpu_info->csg_cnt, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - + KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; + } + + /* One Front End block */ + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_fe_block_type(counter_set, is_csf), + .inst_cnt = 1, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; + + /* One Tiler block */ + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_tiler_block_type(counter_set), + .inst_cnt = 1, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; + + /* l2_count memsys blks */ + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_memsys_block_type(counter_set), + .inst_cnt = gpu_info->l2_count, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; + + /* + * There are as many cores in the system as there are bits set in * the core mask. However, the dump buffer memory requirements need to * take into account the fact that the core mask may be non-contiguous. * @@ -179,27 +249,35 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu * * We find the core mask's last set bit to determine the memory * requirements, and embed the core mask into the availability mask so - * we can determine later which shader cores physically exist. + * we can determine later which cores physically exist. */ - kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf); - blks[3].inst_cnt = sc_block_count; - blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_sc_block_type(counter_set, is_csf), + .inst_cnt = sc_block_count, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; - WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); - group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; - group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; - group.blks = blks; + /* Currently, we're only handling a maximum of seven blocks, and this needs + * to be changed whenever the number of blocks increases + */ + BUILD_BUG_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 7); - desc.grp_cnt = 1; - desc.grps = &group; + /* After assembling the block list in the code above, we should not end up with more + * elements than KBASE_HWCNT_V5_BLOCK_TYPE_COUNT. + */ + WARN_ON(blk_idx > KBASE_HWCNT_V5_BLOCK_TYPE_COUNT); + + desc.blk_cnt = blk_idx; + desc.blks = blks; desc.clk_cnt = gpu_info->clk_cnt; /* The JM, Tiler, and L2s are always available, and are before cores */ - desc.avail_mask = (1ull << non_sc_block_count) - 1; - /* Embed the core mask directly in the availability mask */ - desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count); + kbase_hwcnt_set_avail_mask(&desc.avail_mask, (1ull << non_core_block_count) - 1, 0); + kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, non_core_block_count, sc_block_count, + gpu_info->core_mask); + return kbase_hwcnt_metadata_create(&desc, metadata); } @@ -248,7 +326,10 @@ int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, * metadata since physical HW uses 32-bit per value but metadata * specifies 64-bit per value. */ - WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes); + if (WARN(dump_bytes * 2 != metadata->dump_buf_bytes, + "Dump buffer size expected to be %zu, instead is %zu", dump_bytes * 2, + metadata->dump_buf_bytes)) + return -EINVAL; *out_metadata = metadata; *out_dump_bytes = dump_bytes; @@ -291,14 +372,10 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat kbase_hwcnt_metadata_destroy(metadata); } -static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk) +static bool is_block_type_shader(const u64 blk_type) { bool is_shader = false; - /* Warn on unknown group type */ - if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) - return false; - if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 || @@ -308,21 +385,14 @@ static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const s return is_shader; } -static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type) +static bool is_block_type_l2_cache(const u64 blk_type) { bool is_l2_cache = false; - switch (grp_type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || - blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 || - blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED) - is_l2_cache = true; - break; - default: - /* Warn on unknown group type */ - WARN_ON(true); - } + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED) + is_l2_cache = true; return is_l2_cache; } @@ -332,31 +402,29 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, const struct kbase_hwcnt_curr_config *curr_config, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; const u64 *dump_src = src; size_t src_offset = 0; u64 core_mask = pm_core_mask; /* Variables to deal with the current configuration */ - int l2_count = 0; + size_t l2_count = 0; if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) return -EINVAL; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { - const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); - const size_t ctr_cnt = - kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); - const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); - const bool is_shader_core = is_block_type_shader( - kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk); - const bool is_l2_cache = is_block_type_l2_cache( - kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); - const bool is_undefined = kbase_hwcnt_is_block_type_undefined( - kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); + const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk); + const size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk); + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, blk); + const bool is_shader_core = is_block_type_shader(blk_type); + const bool is_l2_cache = is_block_type_l2_cache(blk_type); + const bool is_undefined = kbase_hwcnt_is_block_type_undefined(blk_type); + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); bool hw_res_available = true; /* @@ -383,45 +451,68 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, /* * Skip block if no values in the destination block are enabled. */ - if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { - u64 *dst_blk = - kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; - bool blk_powered; + bool blk_valid = (!is_undefined && hw_res_available); - if (!is_shader_core) { - /* Under the current PM system, counters will - * only be enabled after all non shader core - * blocks are powered up. - */ - blk_powered = true; - } else { - /* Check the PM core mask to see if the shader - * core is powered up. - */ - blk_powered = core_mask & 1; - } + if (blk_valid) { + bool blk_powered; + blk_stt_t current_block_state; - if (blk_powered && !is_undefined && hw_res_available) { - /* Only powered and defined blocks have valid data. */ - if (accumulate) { - kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, - hdr_cnt, ctr_cnt); + if (!is_shader_core) { + /* Under the current PM system, counters will only be + * enabled after all non shader core blocks are powered up. + */ + blk_powered = true; } else { - kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, - (hdr_cnt + ctr_cnt)); + /* Check the PM core mask to see if the shader core is + * powered up. + */ + blk_powered = core_mask & 1; + } + current_block_state = (blk_powered) ? KBASE_HWCNT_STATE_ON : + KBASE_HWCNT_STATE_OFF; + + /* Note: KBASE_HWCNT_STATE_OFF for non-shader cores (L2, Tiler, JM) + * is handled on this backend's dump_disable function + */ + if (accumulate) { + /* Only update existing counter values if block was powered + * and valid + */ + if (blk_powered) + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, ctr_cnt); + + kbase_hwcnt_block_state_append(dst_blk_stt, + current_block_state); + } else { + if (blk_powered) { + kbase_hwcnt_dump_buffer_block_copy( + dst_blk, src_blk, (hdr_cnt + ctr_cnt)); + } else { + /* src is garbage, so zero the dst */ + kbase_hwcnt_dump_buffer_block_zero( + dst_blk, (hdr_cnt + ctr_cnt)); + } + + kbase_hwcnt_block_state_set(dst_blk_stt, + current_block_state); } } else { - /* Even though the block might be undefined, the - * user has enabled counter collection for it. - * We should not propagate garbage data. + /* Even though the block might be undefined, the user has enabled + * counter collection for it. We should not propagate garbage data, + * or copy/accumulate the block states. */ if (accumulate) { /* No-op to preserve existing values */ } else { - /* src is garbage, so zero the dst */ + /* src is garbage, so zero the dst and reset block state */ kbase_hwcnt_dump_buffer_block_zero(dst_blk, (hdr_cnt + ctr_cnt)); + kbase_hwcnt_block_state_set(dst_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); } } } @@ -437,58 +528,67 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, } int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) + blk_stt_t *src_block_stt, + const struct kbase_hwcnt_enable_map *dst_enable_map, + size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u64 *dump_src = src; size_t src_offset = 0; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; + size_t blk_inst_count = 0; - if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) + if (!dst || !src || !src_block_stt || !dst_enable_map || + (dst_enable_map->metadata != dst->metadata)) return -EINVAL; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { - const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); - const size_t ctr_cnt = - kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); - const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); - const bool is_undefined = kbase_hwcnt_is_block_type_undefined( - kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); + const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk); + const size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk); + const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, blk); + const bool is_undefined = kbase_hwcnt_is_block_type_undefined(blk_type); + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); /* * Skip block if no values in the destination block are enabled. */ - if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { - u64 *dst_blk = - kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; if (!is_undefined) { if (accumulate) { kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt); + kbase_hwcnt_block_state_append( + dst_blk_stt, src_block_stt[blk_inst_count]); } else { kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, (hdr_cnt + ctr_cnt)); + kbase_hwcnt_block_state_set(dst_blk_stt, + src_block_stt[blk_inst_count]); } } else { - /* Even though the block might be undefined, the - * user has enabled counter collection for it. - * We should not propagate garbage data. + /* Even though the block might be undefined, the user has enabled + * counter collection for it. We should not propagate garbage + * data, or copy/accumulate the block states. */ if (accumulate) { /* No-op to preserve existing values */ } else { - /* src is garbage, so zero the dst */ + /* src is garbage, so zero the dst and reset block state */ kbase_hwcnt_dump_buffer_block_zero(dst_blk, (hdr_cnt + ctr_cnt)); + kbase_hwcnt_block_state_set(dst_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); } } } - + blk_inst_count++; src_offset += (hdr_cnt + ctr_cnt); } @@ -541,58 +641,80 @@ void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_m u64 shader_bm[EM_COUNT] = { 0 }; u64 tiler_bm[EM_COUNT] = { 0 }; u64 mmu_l2_bm[EM_COUNT] = { 0 }; - size_t grp, blk, blk_inst; + u64 fw_bm[EM_COUNT] = { 0 }; + u64 csg_bm[EM_COUNT] = { 0 }; + size_t blk, blk_inst; if (WARN_ON(!src) || WARN_ON(!dst)) return; metadata = src->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { - const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); - const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); - const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst); + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, blk); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, blk, blk_inst); + const size_t map_stride = + kbase_hwcnt_metadata_block_enable_map_stride(metadata, blk); + size_t map_idx; - if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { - const size_t map_stride = - kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); - size_t map_idx; + for (map_idx = 0; map_idx < map_stride; ++map_idx) { + if (WARN_ON(map_idx >= EM_COUNT)) + break; - for (map_idx = 0; map_idx < map_stride; ++map_idx) { - if (WARN_ON(map_idx >= EM_COUNT)) - break; - - switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: - /* Nothing to do in this case. */ - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: - fe_bm[map_idx] |= blk_map[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: - tiler_bm[map_idx] |= blk_map[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: - shader_bm[map_idx] |= blk_map[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: - mmu_l2_bm[map_idx] |= blk_map[map_idx]; - break; - default: - WARN_ON(true); - } + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED: + /* Nothing to do in this case. */ + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: + fe_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + tiler_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: + shader_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + mmu_l2_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3: + fw_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3: + csg_bm[map_idx] |= blk_map[map_idx]; + break; + default: + WARN(true, "Unknown block type %llu", blk_type); } - } else { - WARN_ON(true); } } @@ -603,6 +725,8 @@ void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_m kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]); dst->mmu_l2_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]); + dst->fw_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fw_bm[EM_LO], fw_bm[EM_HI]); + dst->csg_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(csg_bm[EM_LO], csg_bm[EM_HI]); } void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src) @@ -631,7 +755,9 @@ void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst u64 shader_bm[EM_COUNT] = { 0 }; u64 tiler_bm[EM_COUNT] = { 0 }; u64 mmu_l2_bm[EM_COUNT] = { 0 }; - size_t grp, blk, blk_inst; + u64 fw_bm[EM_COUNT] = { 0 }; + u64 csg_bm[EM_COUNT] = { 0 }; + size_t blk, blk_inst; if (WARN_ON(!src) || WARN_ON(!dst)) return; @@ -645,52 +771,75 @@ void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst &tiler_bm[EM_HI]); kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO], &mmu_l2_bm[EM_HI]); + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fw_bm, &fw_bm[EM_LO], &fw_bm[EM_HI]); + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->csg_bm, &csg_bm[EM_LO], + &csg_bm[EM_HI]); - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { - const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); - const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); - u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, blk); + u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, blk, blk_inst); + const size_t map_stride = + kbase_hwcnt_metadata_block_enable_map_stride(metadata, blk); + size_t map_idx; - if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { - const size_t map_stride = - kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); - size_t map_idx; + for (map_idx = 0; map_idx < map_stride; ++map_idx) { + if (WARN_ON(map_idx >= EM_COUNT)) + break; - for (map_idx = 0; map_idx < map_stride; ++map_idx) { - if (WARN_ON(map_idx >= EM_COUNT)) - break; - - switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: - /* Nothing to do in this case. */ - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: - blk_map[map_idx] = fe_bm[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: - blk_map[map_idx] = tiler_bm[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: - blk_map[map_idx] = shader_bm[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: - blk_map[map_idx] = mmu_l2_bm[map_idx]; - break; - default: - WARN_ON(true); - } + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED: + /* Nothing to do in this case. */ + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: + blk_map[map_idx] = fe_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + blk_map[map_idx] = tiler_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: + blk_map[map_idx] = shader_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + blk_map[map_idx] = mmu_l2_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3: + blk_map[map_idx] = fw_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3: + blk_map[map_idx] = csg_bm[map_idx]; + break; + default: + WARN(true, "Invalid block type %llu", blk_type); } - } else { - WARN_ON(true); } } } @@ -699,40 +848,35 @@ void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, const struct kbase_hwcnt_enable_map *enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata)) return; metadata = buf->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { - const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); - u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst); + u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, blk, blk_inst); const u64 *blk_map = - kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); + kbase_hwcnt_enable_map_block_instance(enable_map, blk, blk_inst); - if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { - const size_t map_stride = - kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); - u64 prfcnt_bm[EM_COUNT] = { 0 }; - u32 prfcnt_en = 0; - size_t map_idx; + const size_t map_stride = + kbase_hwcnt_metadata_block_enable_map_stride(metadata, blk); + u64 prfcnt_bm[EM_COUNT] = { 0 }; + u32 prfcnt_en = 0; + size_t map_idx; - for (map_idx = 0; map_idx < map_stride; ++map_idx) { - if (WARN_ON(map_idx >= EM_COUNT)) - break; + for (map_idx = 0; map_idx < map_stride; ++map_idx) { + if (WARN_ON(map_idx >= EM_COUNT)) + break; - prfcnt_bm[map_idx] = blk_map[map_idx]; - } - - prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO], - prfcnt_bm[EM_HI]); - - buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; - } else { - WARN_ON(true); + prfcnt_bm[map_idx] = blk_map[map_idx]; } + + prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO], + prfcnt_bm[EM_HI]); + + buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; } } diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h index a49c31e52f98..2f500fdb2237 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,8 @@ #ifndef _KBASE_HWCNT_GPU_H_ #define _KBASE_HWCNT_GPU_H_ +#include "hwcnt/mali_kbase_hwcnt_types.h" + #include #include @@ -31,10 +33,10 @@ struct kbase_hwcnt_enable_map; struct kbase_hwcnt_dump_buffer; /* Hardware counter version 5 definitions, V5 is the only supported version. */ -#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 +#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 7 #define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 #define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60 -#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ +#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK) /* FrontEnd block count in V5 GPU hardware counter. */ @@ -48,15 +50,6 @@ struct kbase_hwcnt_dump_buffer; /* Number of bytes for each counter value in hardware. */ #define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32)) -/** - * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to - * identify metadata groups. - * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. - */ -enum kbase_hwcnt_gpu_group_type { - KBASE_HWCNT_GPU_GROUP_TYPE_V5, -}; - /** * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, * used to identify metadata blocks. @@ -79,6 +72,14 @@ enum kbase_hwcnt_gpu_group_type { * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW: FW block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2: Secondary FW block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3: Tertiary FW block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED: Undefined FW block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG: CSG block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2: Secondary CSG block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3: Tertiary CSG block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED: Undefined CSG block. */ enum kbase_hwcnt_gpu_v5_block_type { KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE, @@ -94,6 +95,14 @@ enum kbase_hwcnt_gpu_v5_block_type { KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED, }; /** @@ -117,12 +126,16 @@ enum kbase_hwcnt_set { * @shader_bm: Shader counters selection bitmask. * @tiler_bm: Tiler counters selection bitmask. * @mmu_l2_bm: MMU_L2 counters selection bitmask. + * @fw_bm: CSF firmware counters selection bitmask. + * @csg_bm: CSF CSG counters selection bitmask. */ struct kbase_hwcnt_physical_enable_map { u32 fe_bm; u32 shader_bm; u32 tiler_bm; u32 mmu_l2_bm; + u32 fw_bm; + u32 csg_bm; }; /* @@ -140,14 +153,18 @@ enum kbase_hwcnt_physical_set { * @l2_count: L2 cache count. * @core_mask: Shader core mask. May be sparse. * @clk_cnt: Number of clock domains available. + * @csg_cnt: Number of CSGs available. * @prfcnt_values_per_block: Total entries (header + counters) of performance * counter per block. + * @has_fw_counters: Whether the GPU has FW counters available. */ struct kbase_hwcnt_gpu_info { size_t l2_count; u64 core_mask; u8 clk_cnt; + u8 csg_cnt; size_t prfcnt_values_per_block; + bool has_fw_counters; }; /** @@ -197,18 +214,12 @@ struct kbase_hwcnt_curr_config { /** * kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined. * - * @grp_type: Hardware counter group type. * @blk_type: Hardware counter block type. * * Return: true if the block type is undefined, else false. */ -static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type, - const uint64_t blk_type) +static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t blk_type) { - /* Warn on unknown group type */ - if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) - return false; - return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED || @@ -290,12 +301,15 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw * dump buffer in src into the dump buffer * abstraction in dst. - * @dst: Non-NULL pointer to destination dump buffer. - * @src: Non-NULL pointer to source raw dump buffer, of same length - * as dump_buf_bytes in the metadata of dst dump buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * @accumulate: True if counters in src should be accumulated into - * destination, rather than copied. + * @dst: Non-NULL pointer to destination dump buffer. + * @src: Non-NULL pointer to source raw dump buffer, of same length + * as dump_buf_bytes in the metadata of dst dump buffer. + * @src_block_stt: Non-NULL pointer to source block state buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. + * @accumulate: True if counters in src should be accumulated into + * destination, rather than copied. * * The dst and dst_enable_map MUST have been created from the same metadata as * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get @@ -304,7 +318,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, * Return: 0 on success, else error code. */ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate); + blk_stt_t *src_block_stt, + const struct kbase_hwcnt_enable_map *dst_enable_map, + size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate); /** * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c deleted file mode 100644 index 0cf2f94cfb87..000000000000 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c +++ /dev/null @@ -1,298 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include "hwcnt/mali_kbase_hwcnt_gpu.h" -#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h" - -#include -#include -#include - -int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, - const struct kbase_hwcnt_metadata *src_md) -{ - struct kbase_hwcnt_description desc; - struct kbase_hwcnt_group_description group; - struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; - size_t prfcnt_values_per_block; - size_t blk; - int err; - struct kbase_hwcnt_metadata_narrow *metadata_narrow; - - if (!dst_md_narrow || !src_md || !src_md->grp_metadata || - !src_md->grp_metadata[0].blk_metadata) - return -EINVAL; - - /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block - * count in the metadata. - */ - if ((kbase_hwcnt_metadata_group_count(src_md) != 1) || - (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT)) - return -EINVAL; - - /* Get the values count in the first block. */ - prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0); - - /* check all blocks should have same values count. */ - for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { - size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk); - if (val_cnt != prfcnt_values_per_block) - return -EINVAL; - } - - /* Only support 64 and 128 entries per block. */ - if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128)) - return -EINVAL; - - metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL); - if (!metadata_narrow) - return -ENOMEM; - - /* Narrow to 64 entries per block to keep API backward compatibility. */ - prfcnt_values_per_block = 64; - - for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { - size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk); - blks[blk] = (struct kbase_hwcnt_block_description){ - .type = kbase_hwcnt_metadata_block_type(src_md, 0, blk), - .inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk), - .hdr_cnt = blk_hdr_cnt, - .ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt, - }; - } - - group = (struct kbase_hwcnt_group_description){ - .type = kbase_hwcnt_metadata_group_type(src_md, 0), - .blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT, - .blks = blks, - }; - - desc = (struct kbase_hwcnt_description){ - .grp_cnt = kbase_hwcnt_metadata_group_count(src_md), - .avail_mask = src_md->avail_mask, - .clk_cnt = src_md->clk_cnt, - .grps = &group, - }; - - err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata); - if (!err) { - /* Narrow down the buffer size to half as the narrowed metadata - * only supports 32-bit but the created metadata uses 64-bit for - * block entry. - */ - metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1; - *dst_md_narrow = metadata_narrow; - } else { - kfree(metadata_narrow); - } - - return err; -} - -void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow) -{ - if (!md_narrow) - return; - - kbase_hwcnt_metadata_destroy(md_narrow->metadata); - kfree(md_narrow); -} - -int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, - struct kbase_hwcnt_dump_buffer_narrow *dump_buf) -{ - size_t dump_buf_bytes; - size_t clk_cnt_buf_bytes; - u8 *buf; - - if (!md_narrow || !dump_buf) - return -EINVAL; - - dump_buf_bytes = md_narrow->dump_buf_bytes; - clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt; - - /* Make a single allocation for both dump_buf and clk_cnt_buf. */ - buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - *dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){ - .md_narrow = md_narrow, - .dump_buf = (u32 *)buf, - .clk_cnt_buf = (u64 *)(buf + dump_buf_bytes), - }; - - return 0; -} - -void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow) -{ - if (!dump_buf_narrow) - return; - - kfree(dump_buf_narrow->dump_buf); - *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL, - .dump_buf = NULL, - .clk_cnt_buf = NULL }; -} - -int kbase_hwcnt_dump_buffer_narrow_array_alloc( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n, - struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs) -{ - struct kbase_hwcnt_dump_buffer_narrow *buffers; - size_t buf_idx; - unsigned int order; - unsigned long addr; - size_t dump_buf_bytes; - size_t clk_cnt_buf_bytes; - size_t total_dump_buf_size; - - if (!md_narrow || !dump_bufs) - return -EINVAL; - - dump_buf_bytes = md_narrow->dump_buf_bytes; - clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt; - - /* Allocate memory for the dump buffer struct array */ - buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); - if (!buffers) - return -ENOMEM; - - /* Allocate pages for the actual dump buffers, as they tend to be fairly - * large. - */ - order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); - addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - - if (!addr) { - kfree(buffers); - return -ENOMEM; - } - - *dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){ - .page_addr = addr, - .page_order = order, - .buf_cnt = n, - .bufs = buffers, - }; - - total_dump_buf_size = dump_buf_bytes * n; - /* Set the buffer of each dump buf */ - for (buf_idx = 0; buf_idx < n; buf_idx++) { - const size_t dump_buf_offset = dump_buf_bytes * buf_idx; - const size_t clk_cnt_buf_offset = - total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx); - - buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){ - .md_narrow = md_narrow, - .dump_buf = (u32 *)(addr + dump_buf_offset), - .clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset), - }; - } - - return 0; -} - -void kbase_hwcnt_dump_buffer_narrow_array_free( - struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs) -{ - if (!dump_bufs) - return; - - kfree(dump_bufs->bufs); - free_pages(dump_bufs->page_addr, dump_bufs->page_order); - memset(dump_bufs, 0, sizeof(*dump_bufs)); -} - -void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, - const u64 *blk_em, size_t val_cnt) -{ - size_t val; - - for (val = 0; val < val_cnt; val++) { - bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); - u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val]; - - dst_blk[val] = val_enabled ? src_val : 0; - } -} - -void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) -{ - const struct kbase_hwcnt_metadata_narrow *metadata_narrow; - size_t grp; - size_t clk; - - if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || - WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) || - WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) || - WARN_ON(src->metadata->grp_cnt != 1) || - WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != - src->metadata->grp_metadata[0].blk_cnt) || - WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != - KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) || - WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt > - src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt)) - return; - - /* Don't use src metadata since src buffer is bigger than dst buffer. */ - metadata_narrow = dst_narrow->md_narrow; - - for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) { - size_t blk; - size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp); - - for (blk = 0; blk < blk_cnt; blk++) { - size_t blk_inst; - size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count( - metadata_narrow, grp, blk); - - for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) { - /* The narrowed down buffer is only 32-bit. */ - u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance( - dst_narrow, grp, blk, blk_inst); - const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( - dst_enable_map, grp, blk, blk_inst); - size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count( - metadata_narrow, grp, blk); - /* Align upwards to include padding bytes */ - val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( - val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES)); - - kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk, - blk_em, val_cnt); - } - } - } - - for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) { - bool clk_enabled = - kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); - - dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; - } -} diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h deleted file mode 100644 index afd236d71a7c..000000000000 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h +++ /dev/null @@ -1,330 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_HWCNT_GPU_NARROW_H_ -#define _KBASE_HWCNT_GPU_NARROW_H_ - -#include "hwcnt/mali_kbase_hwcnt_types.h" -#include - -struct kbase_device; -struct kbase_hwcnt_metadata; -struct kbase_hwcnt_enable_map; -struct kbase_hwcnt_dump_buffer; - -/** - * struct kbase_hwcnt_metadata_narrow - Narrow metadata describing the physical - * layout of narrow dump buffers. - * For backward compatibility, the narrow - * metadata only supports 64 counters per - * block and 32-bit per block entry. - * @metadata: Non-NULL pointer to the metadata before narrow down to - * 32-bit per block entry, it has 64 counters per block and - * 64-bit per value. - * @dump_buf_bytes: The size in bytes after narrow 64-bit to 32-bit per block - * entry. - */ -struct kbase_hwcnt_metadata_narrow { - const struct kbase_hwcnt_metadata *metadata; - size_t dump_buf_bytes; -}; - -/** - * struct kbase_hwcnt_dump_buffer_narrow - Hardware counter narrow dump buffer. - * @md_narrow: Non-NULL pointer to narrow metadata used to identify, and to - * describe the layout of the narrow dump buffer. - * @dump_buf: Non-NULL pointer to an array of u32 values, the array size - * is md_narrow->dump_buf_bytes. - * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed - * for each clock domain. - */ -struct kbase_hwcnt_dump_buffer_narrow { - const struct kbase_hwcnt_metadata_narrow *md_narrow; - u32 *dump_buf; - u64 *clk_cnt_buf; -}; - -/** - * struct kbase_hwcnt_dump_buffer_narrow_array - Hardware counter narrow dump - * buffer array. - * @page_addr: Address of first allocated page. A single allocation is used for - * all narrow dump buffers in the array. - * @page_order: The allocation order of the pages, the order is on a logarithmic - * scale. - * @buf_cnt: The number of allocated dump buffers. - * @bufs: Non-NULL pointer to the array of narrow dump buffer descriptors. - */ -struct kbase_hwcnt_dump_buffer_narrow_array { - unsigned long page_addr; - unsigned int page_order; - size_t buf_cnt; - struct kbase_hwcnt_dump_buffer_narrow *bufs; -}; - -/** - * kbase_hwcnt_metadata_narrow_group_count() - Get the number of groups from - * narrow metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * - * Return: Number of hardware counter groups described by narrow metadata. - */ -static inline size_t -kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow) -{ - return kbase_hwcnt_metadata_group_count(md_narrow->metadata); -} - -/** - * kbase_hwcnt_metadata_narrow_group_type() - Get the arbitrary type of a group - * from narrow metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * - * Return: Type of the group grp. - */ -static inline u64 -kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow, - size_t grp) -{ - return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp); -} - -/** - * kbase_hwcnt_metadata_narrow_block_count() - Get the number of blocks in a - * group from narrow metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * - * Return: Number of blocks in group grp. - */ -static inline size_t -kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, - size_t grp) -{ - return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp); -} - -/** - * kbase_hwcnt_metadata_narrow_block_instance_count() - Get the number of - * instances of a block - * from narrow metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * @blk: Index of the block in the group. - * - * Return: Number of instances of block blk in group grp. - */ -static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) -{ - return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk); -} - -/** - * kbase_hwcnt_metadata_narrow_block_headers_count() - Get the number of counter - * headers from narrow - * metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * @blk: Index of the block in the group. - * - * Return: Number of counter headers in each instance of block blk in group grp. - */ -static inline size_t -kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, - size_t grp, size_t blk) -{ - return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk); -} - -/** - * kbase_hwcnt_metadata_narrow_block_counters_count() - Get the number of - * counters from narrow - * metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * @blk: Index of the block in the group. - * - * Return: Number of counters in each instance of block blk in group grp. - */ -static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) -{ - return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk); -} - -/** - * kbase_hwcnt_metadata_narrow_block_values_count() - Get the number of values - * from narrow metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * @blk: Index of the block in the group. - * - * Return: Number of headers plus counters in each instance of block blk - * in group grp. - */ -static inline size_t -kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, - size_t grp, size_t blk) -{ - return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) + - kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk); -} - -/** - * kbase_hwcnt_dump_buffer_narrow_block_instance() - Get the pointer to a - * narrowed block instance's - * dump buffer. - * @buf: Non-NULL pointer to narrow dump buffer. - * @grp: Index of the group in the narrow metadata. - * @blk: Index of the block in the group. - * @blk_inst: Index of the block instance in the block. - * - * Return: u32* to the dump buffer for the block instance. - */ -static inline u32 * -kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf, - size_t grp, size_t blk, size_t blk_inst) -{ - return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index + - buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + - (buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * - blk_inst); -} - -/** - * kbase_hwcnt_gpu_metadata_narrow_create() - Create HWC metadata with HWC - * entries per block truncated to - * 64 entries and block entry size - * narrowed down to 32-bit. - * - * @dst_md_narrow: Non-NULL pointer to where created narrow metadata is stored - * on success. - * @src_md: Non-NULL pointer to the HWC metadata used as the source to - * create dst_md_narrow. - * - * For backward compatibility of the interface to user clients, a new metadata - * with entries per block truncated to 64 and block entry size narrowed down - * to 32-bit will be created for dst_md_narrow. - * The total entries per block in src_md must be 64 or 128, if it's other - * values, function returns error since it's not supported. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, - const struct kbase_hwcnt_metadata *src_md); - -/** - * kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow - * metadata object. - * @md_narrow: Pointer to hardware counter narrow metadata. - */ -void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow); - -/** - * kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @dump_buf: Non-NULL pointer to narrow dump buffer to be initialised. Will be - * initialised to undefined values, so must be used as a copy - * destination, or cleared before use. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, - struct kbase_hwcnt_dump_buffer_narrow *dump_buf); - -/** - * kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer. - * @dump_buf: Dump buffer to be freed. - * - * Can be safely called on an all-zeroed narrow dump buffer structure, or on an - * already freed narrow dump buffer. - */ -void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf); - -/** - * kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow - * dump buffers. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @n: Number of narrow dump buffers to allocate - * @dump_bufs: Non-NULL pointer to a kbase_hwcnt_dump_buffer_narrow_array - * object to be initialised. - * - * A single zeroed contiguous page allocation will be used for all of the - * buffers inside the object, where: - * dump_bufs->bufs[n].dump_buf == page_addr + n * md_narrow.dump_buf_bytes - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_dump_buffer_narrow_array_alloc( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n, - struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs); - -/** - * kbase_hwcnt_dump_buffer_narrow_array_free() - Free a narrow dump buffer - * array. - * @dump_bufs: Narrow Dump buffer array to be freed. - * - * Can be safely called on an all-zeroed narrow dump buffer array structure, or - * on an already freed narrow dump buffer array. - */ -void kbase_hwcnt_dump_buffer_narrow_array_free( - struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs); - -/** - * kbase_hwcnt_dump_buffer_block_copy_strict_narrow() - Copy all enabled block - * values from source to - * destination. - * @dst_blk: Non-NULL pointer to destination block obtained from a call to - * kbase_hwcnt_dump_buffer_narrow_block_instance. - * @src_blk: Non-NULL pointer to source block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to - * kbase_hwcnt_enable_map_block_instance. - * @val_cnt: Number of values in the block. - * - * After the copy, any disabled values in destination will be zero, the enabled - * values in destination will be saturated at U32_MAX if the corresponding - * source value is bigger than U32_MAX, or copy the value from source if the - * corresponding source value is less than or equal to U32_MAX. - */ -void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, - const u64 *blk_em, size_t val_cnt); - -/** - * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a - * narrow dump buffer. - * @dst_narrow: Non-NULL pointer to destination dump buffer. - * @src: Non-NULL pointer to source dump buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * - * After the operation, all non-enabled values (including padding bytes) will be - * zero. Slower than the non-strict variant. - * - * The enabled values in dst_narrow will be saturated at U32_MAX if the - * corresponding source value is bigger than U32_MAX, or copy the value from - * source if the corresponding source value is less than or equal to U32_MAX. - */ -void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); - -#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c index 763eb315d9a2..e7f6743f1fb1 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,15 +27,15 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, const struct kbase_hwcnt_metadata **out_metadata) { char *buf; + size_t blk; struct kbase_hwcnt_metadata *metadata; - struct kbase_hwcnt_group_metadata *grp_mds; - size_t grp; - size_t enable_map_count; /* Number of u64 bitfields (inc padding) */ - size_t dump_buf_count; /* Number of u64 values (inc padding) */ - size_t avail_mask_bits; /* Number of availability mask bits */ - - size_t size; + struct kbase_hwcnt_block_metadata *blk_mds; + size_t enable_map_count = 0; /* Number of u64 bitfields (inc padding) */ + size_t dump_buf_count = 0; /* Number of u64 values (inc padding) */ + size_t avail_mask_bits = 0; + size_t state_count = 0; size_t offset; + size_t size; if (!desc || !out_metadata) return -EINVAL; @@ -50,13 +50,8 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, size = 0; size += sizeof(struct kbase_hwcnt_metadata); - /* Group metadata */ - size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; - /* Block metadata */ - for (grp = 0; grp < desc->grp_cnt; grp++) { - size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt; - } + size += sizeof(struct kbase_hwcnt_block_metadata) * desc->blk_cnt; /* Single allocation for the entire metadata */ buf = kmalloc(size, GFP_KERNEL); @@ -70,79 +65,59 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, metadata = (struct kbase_hwcnt_metadata *)(buf + offset); offset += sizeof(struct kbase_hwcnt_metadata); - /* Bump allocate the group metadata */ - grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset); - offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + /* Bump allocate the block metadata */ + blk_mds = (struct kbase_hwcnt_block_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_block_metadata) * desc->blk_cnt; - enable_map_count = 0; - dump_buf_count = 0; - avail_mask_bits = 0; + /* Fill in each block */ + for (blk = 0; blk < desc->blk_cnt; blk++) { + const struct kbase_hwcnt_block_description *blk_desc = desc->blks + blk; + struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk; + const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt; - for (grp = 0; grp < desc->grp_cnt; grp++) { - size_t blk; - - const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp; - struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; - - size_t group_enable_map_count = 0; - size_t group_dump_buffer_count = 0; - size_t group_avail_mask_bits = 0; - - /* Bump allocate this group's block metadata */ - struct kbase_hwcnt_block_metadata *blk_mds = - (struct kbase_hwcnt_block_metadata *)(buf + offset); - offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt; - - /* Fill in each block in the group's information */ - for (blk = 0; blk < grp_desc->blk_cnt; blk++) { - const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk; - struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk; - const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt; - - blk_md->type = blk_desc->type; - blk_md->inst_cnt = blk_desc->inst_cnt; - blk_md->hdr_cnt = blk_desc->hdr_cnt; - blk_md->ctr_cnt = blk_desc->ctr_cnt; - blk_md->enable_map_index = group_enable_map_count; - blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values); - blk_md->dump_buf_index = group_dump_buffer_count; - blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS( + *blk_md = (struct kbase_hwcnt_block_metadata){ + .type = blk_desc->type, + .inst_cnt = blk_desc->inst_cnt, + .hdr_cnt = blk_desc->hdr_cnt, + .ctr_cnt = blk_desc->ctr_cnt, + .enable_map_index = enable_map_count, + .enable_map_stride = kbase_hwcnt_bitfield_count(n_values), + .dump_buf_index = dump_buf_count, + .dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS( n_values, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); - blk_md->avail_mask_index = group_avail_mask_bits; + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)), + .avail_mask_index = avail_mask_bits, + .blk_stt_index = state_count, + .blk_stt_stride = KBASE_HWCNT_BLOCK_STATE_STRIDE, + }; - group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt; - group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt; - group_avail_mask_bits += blk_md->inst_cnt; - } - - /* Fill in the group's information */ - grp_md->type = grp_desc->type; - grp_md->blk_cnt = grp_desc->blk_cnt; - grp_md->blk_metadata = blk_mds; - grp_md->enable_map_index = enable_map_count; - grp_md->dump_buf_index = dump_buf_count; - grp_md->avail_mask_index = avail_mask_bits; - - enable_map_count += group_enable_map_count; - dump_buf_count += group_dump_buffer_count; - avail_mask_bits += group_avail_mask_bits; + enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt; + dump_buf_count += blk_md->dump_buf_stride * blk_md->inst_cnt; + avail_mask_bits += blk_md->inst_cnt; + state_count += blk_md->inst_cnt * blk_md->blk_stt_stride; } /* Fill in the top level metadata's information */ - metadata->grp_cnt = desc->grp_cnt; - metadata->grp_metadata = grp_mds; - metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; - metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; - metadata->avail_mask = desc->avail_mask; - metadata->clk_cnt = desc->clk_cnt; + *metadata = (struct kbase_hwcnt_metadata){ + .blk_cnt = desc->blk_cnt, + .blk_metadata = blk_mds, + .enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES, + .dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES, + .blk_stt_bytes = state_count * KBASE_HWCNT_BLOCK_STATE_BYTES, + .clk_cnt = desc->clk_cnt, + }; + + kbase_hwcnt_cp_avail_mask(&metadata->avail_mask, &desc->avail_mask); + + if (WARN_ON(size != offset)) + return -EINVAL; - WARN_ON(size != offset); /* Due to the block alignment, there should be exactly one enable map * bit per 4 bytes in the dump buffer. */ - WARN_ON(metadata->dump_buf_bytes != - (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); + if (WARN_ON(metadata->dump_buf_bytes != + (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES))) + return -EINVAL; *out_metadata = metadata; return 0; @@ -189,6 +164,7 @@ int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, { size_t dump_buf_bytes; size_t clk_cnt_buf_bytes; + size_t block_state_bytes; u8 *buf; if (!metadata || !dump_buf) @@ -196,15 +172,17 @@ int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, dump_buf_bytes = metadata->dump_buf_bytes; clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; + block_state_bytes = metadata->blk_stt_bytes; - /* Make a single allocation for both dump_buf and clk_cnt_buf. */ - buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); + /* Make a single allocation for dump_buf, clk_cnt_buf and block_state_buf. */ + buf = kzalloc(dump_buf_bytes + clk_cnt_buf_bytes + block_state_bytes, GFP_KERNEL); if (!buf) return -ENOMEM; dump_buf->metadata = metadata; dump_buf->dump_buf = (u64 *)buf; dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes); + dump_buf->blk_stt_buf = (blk_stt_t *)(buf + dump_buf_bytes + clk_cnt_buf_bytes); return 0; } @@ -218,72 +196,11 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) memset(dump_buf, 0, sizeof(*dump_buf)); } -int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, - struct kbase_hwcnt_dump_buffer_array *dump_bufs) -{ - struct kbase_hwcnt_dump_buffer *buffers; - size_t buf_idx; - unsigned int order; - unsigned long addr; - size_t dump_buf_bytes; - size_t clk_cnt_buf_bytes; - - if (!metadata || !dump_bufs) - return -EINVAL; - - dump_buf_bytes = metadata->dump_buf_bytes; - clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; - - /* Allocate memory for the dump buffer struct array */ - buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); - if (!buffers) - return -ENOMEM; - - /* Allocate pages for the actual dump buffers, as they tend to be fairly - * large. - */ - order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); - addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - - if (!addr) { - kfree(buffers); - return -ENOMEM; - } - - dump_bufs->page_addr = addr; - dump_bufs->page_order = order; - dump_bufs->buf_cnt = n; - dump_bufs->bufs = buffers; - - /* Set the buffer of each dump buf */ - for (buf_idx = 0; buf_idx < n; buf_idx++) { - const size_t dump_buf_offset = dump_buf_bytes * buf_idx; - const size_t clk_cnt_buf_offset = - (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx); - - buffers[buf_idx].metadata = metadata; - buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset); - buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset); - } - - return 0; -} - -void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs) -{ - if (!dump_bufs) - return; - - kfree(dump_bufs->bufs); - free_pages(dump_bufs->page_addr, dump_bufs->page_order); - memset(dump_bufs, 0, sizeof(*dump_bufs)); -} - void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || WARN_ON(dst->metadata != dst_enable_map->metadata)) @@ -291,21 +208,22 @@ void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { u64 *dst_blk; size_t val_cnt; - if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) continue; - dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); - val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk); kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); } memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); + memset(dst->blk_stt_buf, 0, metadata->blk_stt_bytes); } void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst) @@ -314,15 +232,15 @@ void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst) return; memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); - memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); + memset(dst->blk_stt_buf, 0, dst->metadata->blk_stt_bytes); } void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || WARN_ON(dst->metadata != dst_enable_map->metadata)) @@ -330,23 +248,30 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *ds metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); const u64 *blk_em = - kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); - size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + kbase_hwcnt_enable_map_block_instance(dst_enable_map, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk); /* Align upwards to include padding bytes */ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); - if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) { + if (kbase_hwcnt_metadata_block_instance_avail(metadata, blk, blk_inst)) { /* Block available, so only zero non-enabled values */ kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt); + + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) + kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + } else { /* Block not available, so zero the entire thing */ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); } } } @@ -356,7 +281,7 @@ void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || @@ -366,20 +291,25 @@ void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { u64 *dst_blk; const u64 *src_blk; + blk_stt_t *dst_blk_stt; + const blk_stt_t *src_blk_stt; size_t val_cnt; - if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) continue; - dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); - src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); - val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk); + dst_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); + src_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst); kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); + kbase_hwcnt_block_state_copy(dst_blk_stt, src_blk_stt); } kbase_hwcnt_metadata_for_each_clock(metadata, clk) @@ -394,7 +324,7 @@ void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || @@ -404,19 +334,28 @@ void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); - const u64 *src_blk = - kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst); + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); + const blk_stt_t *src_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst); const u64 *blk_em = - kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); - size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + kbase_hwcnt_enable_map_block_instance(dst_enable_map, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk); + /* Align upwards to include padding bytes */ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt); + + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) + kbase_hwcnt_block_state_copy(dst_blk_stt, src_blk_stt); + else + kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); } kbase_hwcnt_metadata_for_each_clock(metadata, clk) @@ -433,7 +372,7 @@ void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || @@ -443,22 +382,27 @@ void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { u64 *dst_blk; const u64 *src_blk; + blk_stt_t *dst_blk_stt; + const blk_stt_t *src_blk_stt; size_t hdr_cnt; size_t ctr_cnt; - if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) continue; - dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); - src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); - hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); - ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst); + dst_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); + src_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst); + hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk); + ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk); kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt); + kbase_hwcnt_block_state_accumulate(dst_blk_stt, src_blk_stt); } kbase_hwcnt_metadata_for_each_clock(metadata, clk) @@ -473,7 +417,7 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *d const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || @@ -483,15 +427,20 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *d metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); - const u64 *src_blk = - kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst); const u64 *blk_em = - kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); - size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); - size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + kbase_hwcnt_enable_map_block_instance(dst_enable_map, blk, blk_inst); + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); + const blk_stt_t *src_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst); + + size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk); + size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk); + /* Align upwards to include padding bytes */ ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS( hdr_cnt + ctr_cnt, @@ -499,6 +448,11 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *d kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); + + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) + kbase_hwcnt_block_state_accumulate(dst_blk_stt, src_blk_stt); + else + kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); } kbase_hwcnt_metadata_for_each_clock(metadata, clk) @@ -509,3 +463,29 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *d dst->clk_cnt_buf[clk] = 0; } } + +void kbase_hwcnt_dump_buffer_block_state_update(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + blk_stt_t blk_stt_val) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t blk, blk_inst; + + if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) + { + if (kbase_hwcnt_metadata_block_instance_avail(metadata, blk, blk_inst) && + kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); + + /* Block is available and enabled, so update the block state */ + *dst_blk_stt |= blk_stt_val; + } + } +} diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h index 5c5ada401768..16f68ead170e 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,12 +34,8 @@ * Terminology: * * Hardware Counter System: - * A collection of hardware counter groups, making a full hardware counter + * A collection of hardware counter blocks, making a full hardware counter * system. - * Hardware Counter Group: - * A group of Hardware Counter Blocks (e.g. a t62x might have more than one - * core group, so has one counter group per core group, where each group - * may have a different number and layout of counter blocks). * Hardware Counter Block: * A block of hardware counters (e.g. shader block, tiler block). * Hardware Counter Block Instance: @@ -59,10 +55,16 @@ * * Enable Map: * An array of u64 bitfields, where each bit either enables exactly one - * block value, or is unused (padding). + * block value, or is unused (padding). Note that this is derived from + * the client configuration, and is not obtained from the hardware. * Dump Buffer: * An array of u64 values, where each u64 corresponds either to one block * value, or is unused (padding). + * Block State Buffer: + * An array of blk_stt_t values, where each blk_stt_t corresponds to one block + * instance and is used to track the on/off power state transitions, as well has + * hardware resource availability, and whether the block was operating + * in normal or protected mode. * Availability Mask: * A bitfield, where each bit corresponds to whether a block instance is * physically available (e.g. an MP3 GPU may have a sparse core mask of @@ -74,7 +76,6 @@ * Metadata: * Structure describing the physical layout of the enable map and dump buffers * for a specific hardware counter system. - * */ #ifndef _KBASE_HWCNT_TYPES_H_ @@ -98,10 +99,14 @@ */ #define KBASE_HWCNT_VALUE_BYTES (sizeof(u64)) +/* Number of elements in the avail_mask aray, in kbase_hwcnt_metadata */ +#define KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT 2 + /* Number of bits in an availability mask (i.e. max total number of block * instances supported in a Hardware Counter System) */ -#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) +#define KBASE_HWCNT_AVAIL_MASK_BITS \ + (sizeof(u64) * KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT * BITS_PER_BYTE) /* Minimum alignment of each block of hardware counters */ #define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) @@ -114,9 +119,60 @@ * Return: Input value if already aligned to the specified boundary, or next * (incrementing upwards) aligned value. */ -#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ +#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ (value + ((alignment - (value % alignment)) % alignment)) +typedef u8 blk_stt_t; + +/* Number of bytes storing the per-block state transition information. */ +#define KBASE_HWCNT_BLOCK_STATE_BYTES (sizeof(blk_stt_t)) + +/* Number of entries of blk_stt_t used to store the block state. */ +#define KBASE_HWCNT_BLOCK_STATE_STRIDE (1) + +/* Block state indicating that the hardware block state was indeterminable + * or not set during the sampling period. + */ +#define KBASE_HWCNT_STATE_UNKNOWN ((blk_stt_t)(0)) + +/* Block state indicating that the hardware block was on or transitioned to on + * during the sampling period. + */ +#define KBASE_HWCNT_STATE_ON ((blk_stt_t)(1u << 0)) + +/* Block state indicating that the hardware block was off or transitioned to off + * during the sampling period. + */ +#define KBASE_HWCNT_STATE_OFF ((blk_stt_t)(1u << 1)) + +/* Block state indicating that the hardware block was available to the current + * VM for some portion of the sampling period. + */ +#define KBASE_HWCNT_STATE_AVAILABLE ((blk_stt_t)(1u << 2)) + +/* Block state indicating that the hardware block was unavailable to the current + * VM for some portion of the sampling period. + */ +#define KBASE_HWCNT_STATE_UNAVAILABLE ((blk_stt_t)(1u << 3)) + +/* Block state indicating that the hardware block was operating in normal mode + * for some portion of the sampling period. + */ +#define KBASE_HWCNT_STATE_NORMAL ((blk_stt_t)(1u << 4)) + +/* Block state indicating that the hardware block was operating in protected mode + * for some portion of the sampling period. + */ +#define KBASE_HWCNT_STATE_PROTECTED ((blk_stt_t)(1u << 5)) + +/* For a valid block state with the above masks, only a maximum of + * KBASE_HWCNT_STATE_BITS can be set. + */ +#define KBASE_HWCNT_STATE_BITS (6) + +/* Mask to detect malformed block state bitmaps. */ +#define KBASE_HWCNT_STATE_MASK ((blk_stt_t)((1u << KBASE_HWCNT_STATE_BITS) - 1)) + /** * struct kbase_hwcnt_block_description - Description of one or more identical, * contiguous, Hardware Counter Blocks. @@ -133,31 +189,25 @@ struct kbase_hwcnt_block_description { }; /** - * struct kbase_hwcnt_group_description - Description of one or more identical, - * contiguous Hardware Counter Groups. - * @type: The arbitrary identifier used to identify the type of the group. - * @blk_cnt: The number of types of Hardware Counter Block in the group. - * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, - * describing each type of Hardware Counter Block in the group. + * struct kbase_hwcnt_avail_mask - Mask type for HW Counter availablility. + * @mask: Array of bitmask elements. */ -struct kbase_hwcnt_group_description { - u64 type; - size_t blk_cnt; - const struct kbase_hwcnt_block_description *blks; +struct kbase_hwcnt_avail_mask { + u64 mask[KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT]; }; -/** +/* * struct kbase_hwcnt_description - Description of a Hardware Counter System. - * @grp_cnt: The number of Hardware Counter Groups. - * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, - * describing each Hardware Counter Group in the system. + * @blk_cnt: The number of Hardware Counter Blocks. + * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, + * describing each Hardware Counter Blocks in the system. * @avail_mask: Flat Availability Mask for all block instances in the system. * @clk_cnt: The number of clock domains in the system. The maximum is 64. */ struct kbase_hwcnt_description { - size_t grp_cnt; - const struct kbase_hwcnt_group_description *grps; - u64 avail_mask; + size_t blk_cnt; + const struct kbase_hwcnt_block_description *blks; + struct kbase_hwcnt_avail_mask avail_mask; u8 clk_cnt; }; @@ -183,6 +233,12 @@ struct kbase_hwcnt_description { * @avail_mask_index: Index in bits into the parent's Availability Mask where * the Availability Masks of the Block Instances described * by this metadata start. + * @blk_stt_index: Index in bits into the parent's Block State Buffer + * where the Block State Masks of the Block Instances described + * by this metadata start. + * @blk_stt_stride: Stride in the underly block state tracking type between + * the Block State bytes corresponding to each of the + * Block Instances. */ struct kbase_hwcnt_block_metadata { u64 type; @@ -194,58 +250,148 @@ struct kbase_hwcnt_block_metadata { size_t dump_buf_index; size_t dump_buf_stride; size_t avail_mask_index; + size_t blk_stt_index; + size_t blk_stt_stride; }; /** - * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout - * of a group of blocks in a Hardware - * Counter System's Dump Buffers and Enable - * Maps. - * @type: The arbitrary identifier used to identify the type of the - * group. - * @blk_cnt: The number of types of Hardware Counter Block in the - * group. - * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, - * describing the physical layout of each type of Hardware - * Counter Block in the group. - * @enable_map_index: Index in u64s into the parent's Enable Map where the - * Enable Maps of the blocks within the group described by - * this metadata start. - * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the - * Dump Buffers of the blocks within the group described by - * metadata start. - * @avail_mask_index: Index in bits into the parent's Availability Mask where - * the Availability Masks of the blocks within the group - * described by this metadata start. + * kbase_hwcnt_set_avail_mask() - Set bitfield values into a large bitmask. Convenience function. + * + * @avail_mask: Pointer to destination HWC mask, which is comprised of an array of u64 elements + * @u0: Value of element 0. + * @u1: Value of element 1 */ -struct kbase_hwcnt_group_metadata { - u64 type; - size_t blk_cnt; - const struct kbase_hwcnt_block_metadata *blk_metadata; - size_t enable_map_index; - size_t dump_buf_index; - size_t avail_mask_index; -}; +static inline void kbase_hwcnt_set_avail_mask(struct kbase_hwcnt_avail_mask *avail_mask, u64 u0, + u64 u1) +{ + /* If KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT gets updated, we must modify the signature of + * kbase_hwcnt_set_avail_mask() so that all elements continue to be set. + */ + BUILD_BUG_ON(KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT != 2); + + avail_mask->mask[0] = u0; + avail_mask->mask[1] = u1; +} + +/** + * kbase_hwcnt_avail_masks_equal() - Compare two HWC availability masks + * @avail_mask0: First mask to compare + * @avail_mask1: Second mask to compare + * + * Return: 1 if masks are equal. Otherwise, 0. + */ +static inline bool kbase_hwcnt_avail_masks_equal(const struct kbase_hwcnt_avail_mask *avail_mask0, + const struct kbase_hwcnt_avail_mask *avail_mask1) +{ + return (!memcmp(avail_mask0, avail_mask1, sizeof(*avail_mask0))); +} + +/** + * kbase_hwcnt_avail_masks_equal_values() - Compare two HWC availability masks + * @avail_mask: Kask to compare + * @u0: First element of mask to compare against + * @u1: Second element of mask to compare against + * + * Return: 1 if masks are equal. Otherwise, 0. + */ +static inline bool +kbase_hwcnt_avail_masks_equal_values(const struct kbase_hwcnt_avail_mask *avail_mask, u64 u0, + u64 u1) +{ + BUILD_BUG_ON(KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT != 2); + return ((avail_mask->mask[0] == u0) && (avail_mask->mask[1] == u1)); +} + +/** + * kbase_hwcnt_cp_avail_mask - Copy one avail mask into another + * @dst_avail_mask: Destination mask + * @src_avail_mask: Source Mask + */ +static inline void kbase_hwcnt_cp_avail_mask(struct kbase_hwcnt_avail_mask *dst_avail_mask, + const struct kbase_hwcnt_avail_mask *src_avail_mask) +{ + memcpy(dst_avail_mask, src_avail_mask, sizeof(*dst_avail_mask)); +} + +/** + * kbase_hwcnt_set_avail_mask_bits() - Set a bitfield value into a large bitmask + * + * @avail_mask: Pointer to destination HWC mask, which is comprised of an array of u64 elements + * @offset_in_bits: The offset into which to place the value in the bitmask. The value being + * placed is expected to be fully contained by the array of bitmask elements. + * @length_in_bits: The length of the value being placed in the bitmask. Assumed to be no more + * than 64 bits in length. + * @value: Pointer to the source value to be written into the bitmask. + */ +static inline void kbase_hwcnt_set_avail_mask_bits(struct kbase_hwcnt_avail_mask *avail_mask, + size_t offset_in_bits, size_t length_in_bits, + u64 value) +{ + size_t arr_offset = offset_in_bits / 64; + size_t bits_set = 0; + + if (!length_in_bits) + return; + + WARN_ON(length_in_bits > 64); + if (WARN_ON((offset_in_bits + length_in_bits) > (KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT << 6))) + return; + + do { + size_t remaining_to_set = length_in_bits - bits_set; + size_t start_dest_bit_in_word = (offset_in_bits + bits_set) - (arr_offset * 64); + size_t bits_that_fit_into_this_word = + min(64 - start_dest_bit_in_word, remaining_to_set); + + uint64_t dest_mask, mask, source_mask; + uint64_t source_fragment; + + if (bits_that_fit_into_this_word == 64) { + mask = U64_MAX; + source_mask = U64_MAX; + dest_mask = U64_MAX; + } else { + mask = (1ULL << bits_that_fit_into_this_word) - 1; + source_mask = ((1ULL << (bits_that_fit_into_this_word)) - 1) << bits_set; + dest_mask = mask << start_dest_bit_in_word; + } + + source_fragment = (value & source_mask) >> bits_set; + + if (WARN_ON(arr_offset >= KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT)) + break; + + avail_mask->mask[arr_offset] &= ~dest_mask; + avail_mask->mask[arr_offset] |= + ((source_fragment & mask) << start_dest_bit_in_word); + + arr_offset++; + bits_set += bits_that_fit_into_this_word; + } while (bits_set < length_in_bits); +} /** * struct kbase_hwcnt_metadata - Metadata describing the memory layout * of Dump Buffers and Enable Maps within a * Hardware Counter System. - * @grp_cnt: The number of Hardware Counter Groups. - * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata, + * @blk_cnt: The number of Hardware Counter Blocks + * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, * describing the physical layout of each Hardware Counter - * Group in the system. + * Block in the system. * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. + * @blk_stt_bytes: The size in bytes of a Block State Buffer needed for + * the system. * @avail_mask: The Availability Mask for the system. * @clk_cnt: The number of clock domains in the system. */ struct kbase_hwcnt_metadata { - size_t grp_cnt; - const struct kbase_hwcnt_group_metadata *grp_metadata; + size_t blk_cnt; + const struct kbase_hwcnt_block_metadata *blk_metadata; size_t enable_map_bytes; size_t dump_buf_bytes; - u64 avail_mask; + size_t blk_stt_bytes; + struct kbase_hwcnt_avail_mask avail_mask; u8 clk_cnt; }; @@ -257,7 +403,7 @@ struct kbase_hwcnt_metadata { * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an * array of u64 bitfields, each bit of which enables one hardware * counter. - * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle + * @clk_enable_map: A u64 bitfield, each bit of which enables cycle * counter for a given clock domain. */ struct kbase_hwcnt_enable_map { @@ -274,27 +420,14 @@ struct kbase_hwcnt_enable_map { * metadata->dump_buf_bytes. * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed * for each clock domain. + * @blk_stt_buf: A pointer to an array of blk_stt_t values holding block state + * information for each block. */ struct kbase_hwcnt_dump_buffer { const struct kbase_hwcnt_metadata *metadata; u64 *dump_buf; u64 *clk_cnt_buf; -}; - -/** - * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array. - * @page_addr: Address of allocated pages. A single allocation is used for all - * Dump Buffers in the array. - * @page_order: The allocation order of the pages, the order is on a logarithmic - * scale. - * @buf_cnt: The number of allocated Dump Buffers. - * @bufs: Non-NULL pointer to the array of Dump Buffers. - */ -struct kbase_hwcnt_dump_buffer_array { - unsigned long page_addr; - unsigned int page_order; - size_t buf_cnt; - struct kbase_hwcnt_dump_buffer *bufs; + blk_stt_t *blk_stt_buf; }; /** @@ -316,232 +449,229 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); /** - * kbase_hwcnt_metadata_group_count() - Get the number of groups. + * kbase_hwcnt_block_state_set() - Set one or more block states + * for a block instance. + * @blk_stt: Pointer to destination block state instance + * @stt: Block state bitmask + */ +static inline void kbase_hwcnt_block_state_set(blk_stt_t *blk_stt, blk_stt_t stt) +{ + if (WARN_ON(stt & ~KBASE_HWCNT_STATE_MASK)) + return; + + *blk_stt = stt; +} + +/** + * kbase_hwcnt_block_state_append() - Adds one or more block states + * onto a block instance. + * @blk_stt: Pointer to destination block state instance + * @stt: Block state bitmask + */ +static inline void kbase_hwcnt_block_state_append(blk_stt_t *blk_stt, blk_stt_t stt) +{ + if (WARN_ON(stt & ~KBASE_HWCNT_STATE_MASK)) + return; + + *blk_stt |= stt; +} + +/** + * kbase_hwcnt_block_state_copy() - Copy block state between two block + * state instances. + * @dst_blk_stt: Pointer to destination block state instance + * @src_blk_stt: Pointer to source block state instance. + */ +static inline void kbase_hwcnt_block_state_copy(blk_stt_t *dst_blk_stt, + const blk_stt_t *src_blk_stt) +{ + kbase_hwcnt_block_state_set(dst_blk_stt, *src_blk_stt); +} + +/** + * kbase_hwcnt_block_state_accumulate() - Accumulate block state between two block + * state instances. + * @dst_blk_stt: Pointer to destination block state instance + * @src_blk_stt: Pointer to source block state instance. + */ +static inline void kbase_hwcnt_block_state_accumulate(blk_stt_t *dst_blk_stt, + const blk_stt_t *src_blk_stt) +{ + kbase_hwcnt_block_state_append(dst_blk_stt, *src_blk_stt); +} + +/** + * kbase_hwcnt_metadata_block_count() - Get the number of blocks in the metadata. * @metadata: Non-NULL pointer to metadata. * - * Return: Number of hardware counter groups described by metadata. + * Return: Number of blocks in the metadata. */ -static inline size_t kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata) +static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata) { if (WARN_ON(!metadata)) return 0; - return metadata->grp_cnt; -} - -/** - * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * - * Return: Type of the group grp. - */ -static inline u64 kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata, - size_t grp) -{ - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) - return 0; - - return metadata->grp_metadata[grp].type; -} - -/** - * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * - * Return: Number of blocks in group grp. - */ -static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata, - size_t grp) -{ - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) - return 0; - - return metadata->grp_metadata[grp].blk_cnt; + return metadata->blk_cnt; } /** * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block. * - * Return: Type of the block blk in group grp. + * Return: Type of the block blk. */ static inline u64 kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata, - size_t grp, size_t blk) + size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].blk_metadata[blk].type; + return metadata->blk_metadata[blk].type; } /** * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of * a block. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * - * Return: Number of instances of block blk in group grp. + * Return: Number of instances of block blk. */ static inline size_t -kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk) +kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt; + return metadata->blk_metadata[blk].inst_cnt; } /** * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter * headers. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * - * Return: Number of counter headers in each instance of block blk in group grp. + * Return: Number of counter headers in each instance of block blk. */ static inline size_t -kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk) +kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].blk_metadata[blk].hdr_cnt; + return metadata->blk_metadata[blk].hdr_cnt; } /** * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * - * Return: Number of counters in each instance of block blk in group grp. + * Return: Number of counters in each instance of block blk. */ static inline size_t -kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk) +kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].blk_metadata[blk].ctr_cnt; + return metadata->blk_metadata[blk].ctr_cnt; } /** * kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * - * Return: enable map stride in each instance of block blk in group grp. + * Return: enable map stride in each instance of block blk. */ static inline size_t kbase_hwcnt_metadata_block_enable_map_stride(const struct kbase_hwcnt_metadata *metadata, - size_t grp, size_t blk) + size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride; + return metadata->blk_metadata[blk].enable_map_stride; } /** * kbase_hwcnt_metadata_block_values_count() - Get the number of values. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * * Return: Number of headers plus counters in each instance of block blk - * in group grp. + * in the metadata. */ static inline size_t -kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk) +kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) + - kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); + return kbase_hwcnt_metadata_block_counters_count(metadata, blk) + + kbase_hwcnt_metadata_block_headers_count(metadata, blk); } /** * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in * the metadata. * @md: Non-NULL pointer to metadata. - * @grp: size_t variable used as group iterator. * @blk: size_t variable used as block iterator. * @blk_inst: size_t variable used as block instance iterator. * - * Iteration order is group, then block, then block instance (i.e. linearly - * through memory). + * Iteration order is block, then block instance (i.e. linearly through memory). */ -#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ - for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ - for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ - for ((blk_inst) = 0; \ - (blk_inst) < \ - kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); \ - (blk_inst)++) +#define kbase_hwcnt_metadata_for_each_block(md, blk, blk_inst) \ + for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md)); (blk)++) \ + for ((blk_inst) = 0; \ + (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (blk)); \ + (blk_inst)++) /** * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail * mask corresponding to the block. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * * Return: The bit index into the avail mask for the block. */ static inline size_t -kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk) +kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].avail_mask_index + - metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index; + return metadata->blk_metadata[blk].avail_mask_index; } /** * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is * available. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. * * Return: true if the block instance is available, else false. */ static inline bool -kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk, size_t blk_inst) +kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t blk, + size_t blk_inst) { size_t bit; + size_t mask_index; u64 mask; if (WARN_ON(!metadata)) return false; - bit = kbase_hwcnt_metadata_block_avail_bit(metadata, grp, blk) + blk_inst; - mask = 1ull << bit; + bit = kbase_hwcnt_metadata_block_avail_bit(metadata, blk) + blk_inst; + mask_index = bit >> 6; + mask = 1ull << (bit & 0x3f); - return (metadata->avail_mask & mask) != 0; + return (metadata->avail_mask.mask[mask_index] & mask) != 0; } /** @@ -568,31 +698,28 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block * instance's enable map. * @map: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. * * Return: u64* to the bitfield(s) used as the enable map for the * block instance. */ static inline u64 *kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map, - size_t grp, size_t blk, size_t blk_inst) + size_t blk, size_t blk_inst) { if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map)) return NULL; - if (WARN_ON(!map->metadata) || WARN_ON(grp >= map->metadata->grp_cnt) || - WARN_ON(blk >= map->metadata->grp_metadata[grp].blk_cnt) || - WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) + if (WARN_ON(!map->metadata) || WARN_ON(blk >= map->metadata->blk_cnt) || + WARN_ON(blk_inst >= map->metadata->blk_metadata[blk].inst_cnt)) return map->hwcnt_enable_map; - return map->hwcnt_enable_map + map->metadata->grp_metadata[grp].enable_map_index + - map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_index + - (map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride * blk_inst); + return map->hwcnt_enable_map + map->metadata->blk_metadata[blk].enable_map_index + + (map->metadata->blk_metadata[blk].enable_map_stride * blk_inst); } /** - * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required + * kbase_hwcnt_bitfield_count - Calculate the number of u64 bitfields required * to have at minimum one bit per value. * @val_cnt: Number of values. * @@ -604,24 +731,22 @@ static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) } /** - * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block. + * kbase_hwcnt_enable_map_block_disable_all - Disable all values in a block. * @dst: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. */ static inline void kbase_hwcnt_enable_map_block_disable_all(struct kbase_hwcnt_enable_map *dst, - size_t grp, size_t blk, size_t blk_inst) + size_t blk, size_t blk_inst) { size_t val_cnt; size_t bitfld_cnt; - u64 *const block_enable_map = - kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); + u64 *const block_enable_map = kbase_hwcnt_enable_map_block_instance(dst, blk, blk_inst); if (WARN_ON(!dst)) return; - val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk); + val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, blk); bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES); @@ -645,23 +770,21 @@ static inline void kbase_hwcnt_enable_map_disable_all(struct kbase_hwcnt_enable_ /** * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block. * @dst: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. */ static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_enable_map *dst, - size_t grp, size_t blk, size_t blk_inst) + size_t blk, size_t blk_inst) { size_t val_cnt; size_t bitfld_cnt; - u64 *const block_enable_map = - kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); + u64 *const block_enable_map = kbase_hwcnt_enable_map_block_instance(dst, blk, blk_inst); size_t bitfld_idx; if (WARN_ON(!dst)) return; - val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk); + val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, blk); bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { @@ -682,13 +805,13 @@ static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_en */ static inline void kbase_hwcnt_enable_map_enable_all(struct kbase_hwcnt_enable_map *dst) { - size_t grp, blk, blk_inst; + size_t blk, blk_inst; if (WARN_ON(!dst) || WARN_ON(!dst->metadata)) return; - kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) - kbase_hwcnt_enable_map_block_enable_all(dst, grp, blk, blk_inst); + kbase_hwcnt_metadata_for_each_block(dst->metadata, blk, blk_inst) + kbase_hwcnt_enable_map_block_enable_all(dst, blk, blk_inst); dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1; } @@ -751,27 +874,26 @@ static inline void kbase_hwcnt_enable_map_union(struct kbase_hwcnt_enable_map *d * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block * instance are enabled. * @enable_map: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. * * Return: true if any values in the block are enabled, else false. */ static inline bool -kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t grp, - size_t blk, size_t blk_inst) +kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t blk, + size_t blk_inst) { bool any_enabled = false; size_t val_cnt; size_t bitfld_cnt; const u64 *const block_enable_map = - kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); + kbase_hwcnt_enable_map_block_instance(enable_map, blk, blk_inst); size_t bitfld_idx; if (WARN_ON(!enable_map)) return false; - val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, grp, blk); + val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, blk); bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { @@ -796,7 +918,7 @@ kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable static inline bool kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_map) { - size_t grp, blk, blk_inst; + size_t blk, blk_inst; u64 clk_enable_map_mask; if (WARN_ON(!enable_map) || WARN_ON(!enable_map->metadata)) @@ -807,9 +929,9 @@ kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_m if (enable_map->metadata->clk_cnt > 0 && (enable_map->clk_enable_map & clk_enable_map_mask)) return true; - kbase_hwcnt_metadata_for_each_block(enable_map->metadata, grp, blk, blk_inst) + kbase_hwcnt_metadata_for_each_block(enable_map->metadata, blk, blk_inst) { - if (kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) + if (kbase_hwcnt_enable_map_block_enabled(enable_map, blk, blk_inst)) return true; } @@ -869,9 +991,8 @@ static inline void kbase_hwcnt_enable_map_block_disable_value(u64 *bitfld, size_ /** * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer. * @metadata: Non-NULL pointer to metadata describing the system. - * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be - * initialised to undefined values, so must be used as a copy dest, - * or cleared before use. + * @dump_buf: Non-NULL pointer to a zero-initialized dump buffer. + * The memory will be zero allocated * * Return: 0 on success, else error code. */ @@ -887,54 +1008,52 @@ int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, */ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); -/** - * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. - * @metadata: Non-NULL pointer to metadata describing the system. - * @n: Number of dump buffers to allocate - * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. - * - * A single zeroed contiguous page allocation will be used for all of the - * buffers inside the array, where: - * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, - struct kbase_hwcnt_dump_buffer_array *dump_bufs); - -/** - * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. - * @dump_bufs: Dump buffer array to be freed. - * - * Can be safely called on an all-zeroed dump buffer array structure, or on an - * already freed dump buffer array. - */ -void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs); - /** * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block * instance's dump buffer. * @buf: Non-NULL pointer to dump buffer. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. * * Return: u64* to the dump buffer for the block instance. */ static inline u64 *kbase_hwcnt_dump_buffer_block_instance(const struct kbase_hwcnt_dump_buffer *buf, - size_t grp, size_t blk, size_t blk_inst) + size_t blk, size_t blk_inst) { if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf)) return NULL; - if (WARN_ON(!buf->metadata) || WARN_ON(grp >= buf->metadata->grp_cnt) || - WARN_ON(blk >= buf->metadata->grp_metadata[grp].blk_cnt) || - WARN_ON(blk_inst >= buf->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) + if (WARN_ON(!buf->metadata) || WARN_ON(blk >= buf->metadata->blk_cnt) || + WARN_ON(blk_inst >= buf->metadata->blk_metadata[blk].inst_cnt)) return buf->dump_buf; - return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index + - buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + - (buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst); + return buf->dump_buf + buf->metadata->blk_metadata[blk].dump_buf_index + + (buf->metadata->blk_metadata[blk].dump_buf_stride * blk_inst); +} + +/** + * kbase_hwcnt_dump_buffer_block_state_instance() - Get the pointer to a block + * instance's block state mask. + * @buf: Non-NULL pointer to dump buffer. + * @blk: Index of the block in the metadata. + * @blk_inst: Index of the block instance in the block. + * + * Return: blk_stt_t* to the block state mask of the block instance in the dump + * buffer. + */ +static inline blk_stt_t * +kbase_hwcnt_dump_buffer_block_state_instance(const struct kbase_hwcnt_dump_buffer *buf, size_t blk, + size_t blk_inst) +{ + if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf)) + return NULL; + + if (WARN_ON(!buf->metadata) || WARN_ON(blk >= buf->metadata->blk_cnt) || + WARN_ON(blk_inst >= buf->metadata->blk_metadata[blk].inst_cnt)) + return buf->blk_stt_buf; + + return buf->blk_stt_buf + buf->metadata->blk_metadata[blk].blk_stt_index + + (buf->metadata->blk_metadata[blk].blk_stt_stride * blk_inst); } /** @@ -1228,4 +1347,19 @@ static inline bool kbase_hwcnt_clk_enable_map_enabled(const u64 clk_enable_map, return false; } +/** + * kbase_hwcnt_dump_buffer_block_state_update() - Update the enabled block instances' block states + * in dst. After the operation, all non-enabled or + * unavailable block instances will be unchanged. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @blk_stt_val: Mask of block states to update. Block states not set in this mask will still be + * preserved in dst. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_block_state_update(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + blk_stt_t blk_stt_val); + #endif /* _KBASE_HWCNT_TYPES_H_ */ diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c index d618764d3b32..89cca450ecf9 100644 --- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -255,7 +255,7 @@ static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *h /* Make the scratch enable map the union of all enable maps */ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); /* Set the counters with the new union enable map */ @@ -264,7 +264,7 @@ static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *h &hvirt->scratch_buf); /* Accumulate into only existing clients' accumulation bufs */ if (!errcode) - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); } @@ -315,7 +315,7 @@ static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualize struct kbase_hwcnt_virtualizer_client *pos; /* Make the scratch enable map the union of all enable maps */ kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); /* Set the counters with the new union enable map */ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, @@ -323,7 +323,7 @@ static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualize &hvirt->scratch_buf); /* Accumulate into remaining clients' accumulation bufs */ if (!errcode) { - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); @@ -373,7 +373,7 @@ static int kbasep_hwcnt_virtualizer_client_set_counters( /* Make the scratch enable map the union of all enable maps */ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) /* Ignore the enable map of the selected client */ if (pos != hvcli) kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); @@ -385,7 +385,7 @@ static int kbasep_hwcnt_virtualizer_client_set_counters( return errcode; /* Accumulate into all accumulation bufs except the selected client's */ - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) if (pos != hvcli) kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); @@ -503,7 +503,7 @@ static int kbasep_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer * return errcode; /* Accumulate into all accumulation bufs except the selected client's */ - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) if (pos != hvcli) kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); @@ -724,7 +724,7 @@ void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt) if (WARN_ON(hvirt->client_count != 0)) { struct kbase_hwcnt_virtualizer_client *pos, *n; - list_for_each_entry_safe (pos, n, &hvirt->clients, node) + list_for_each_entry_safe(pos, n, &hvirt->clients, node) kbase_hwcnt_virtualizer_client_destroy(pos); } diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c index 60b061ef623d..22ba78dee7d4 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -76,13 +76,12 @@ static s64 kbase_ipa_group_energy(s32 coeff, u64 counter_value) * @model_data: Pointer to counter model data * * Register IPA counter model as a client of kbase_ipa_control, which - * provides an interface to retreive the accumulated value of hardware + * provides an interface to retrieve the accumulated value of hardware * counters to calculate energy consumption. * * Return: 0 on success, or an error code. */ -static int -kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) +static int kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) { struct kbase_device *kbdev = model_data->kbdev; struct kbase_ipa_control_perf_counter *perf_counters; @@ -93,15 +92,13 @@ kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) /* Value for GPU_ACTIVE counter also needs to be queried. It is required * for the normalization of top-level and shader core counters. */ - model_data->num_counters = 1 + model_data->num_top_level_cntrs + - model_data->num_shader_cores_cntrs; + model_data->num_counters = + 1 + model_data->num_top_level_cntrs + model_data->num_shader_cores_cntrs; - perf_counters = kcalloc(model_data->num_counters, - sizeof(*perf_counters), GFP_KERNEL); + perf_counters = kcalloc(model_data->num_counters, sizeof(*perf_counters), GFP_KERNEL); if (!perf_counters) { - dev_err(kbdev->dev, - "Failed to allocate memory for perf_counters array"); + dev_err(kbdev->dev, "Failed to allocate memory for perf_counters array"); return -ENOMEM; } @@ -116,8 +113,7 @@ kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) cnt_idx++; for (i = 0; i < model_data->num_top_level_cntrs; ++i) { - const struct kbase_ipa_counter *counter = - &model_data->top_level_cntrs_def[i]; + const struct kbase_ipa_counter *counter = &model_data->top_level_cntrs_def[i]; perf_counters[cnt_idx].type = counter->counter_block_type; perf_counters[cnt_idx].idx = counter->counter_block_offset; @@ -127,8 +123,7 @@ kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) } for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) { - const struct kbase_ipa_counter *counter = - &model_data->shader_cores_cntrs_def[i]; + const struct kbase_ipa_counter *counter = &model_data->shader_cores_cntrs_def[i]; perf_counters[cnt_idx].type = counter->counter_block_type; perf_counters[cnt_idx].idx = counter->counter_block_offset; @@ -137,12 +132,10 @@ kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) cnt_idx++; } - err = kbase_ipa_control_register(kbdev, perf_counters, - model_data->num_counters, + err = kbase_ipa_control_register(kbdev, perf_counters, model_data->num_counters, &model_data->ipa_control_client); if (err) - dev_err(kbdev->dev, - "Failed to register IPA with kbase_ipa_control"); + dev_err(kbdev->dev, "Failed to register IPA with kbase_ipa_control"); kfree(perf_counters); return err; @@ -152,20 +145,17 @@ kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) * kbase_ipa_detach_ipa_control() - De-register from kbase_ipa_control. * @model_data: Pointer to counter model data */ -static void -kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data) +static void kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data) { if (model_data->ipa_control_client) { - kbase_ipa_control_unregister(model_data->kbdev, - model_data->ipa_control_client); + kbase_ipa_control_unregister(model_data->kbdev, model_data->ipa_control_client); model_data->ipa_control_client = NULL; } } static int calculate_coeff(struct kbase_ipa_counter_model_data *model_data, - const struct kbase_ipa_counter *const cnt_defs, - size_t num_counters, s32 *counter_coeffs, - u64 *counter_values, u32 active_cycles, u32 *coeffp) + const struct kbase_ipa_counter *const cnt_defs, size_t num_counters, + s32 *counter_coeffs, u64 *counter_values, u32 active_cycles, u32 *coeffp) { u64 coeff = 0, coeff_mul = 0; s64 total_energy = 0; @@ -184,21 +174,18 @@ static int calculate_coeff(struct kbase_ipa_counter_model_data *model_data, if (counter_value > MAX_COUNTER_INCREMENT) { dev_warn(model_data->kbdev->dev, - "Increment in counter %s more than expected", - cnt_defs[i].name); + "Increment in counter %s more than expected", cnt_defs[i].name); return -ERANGE; } - total_energy = - kbase_ipa_add_saturate(total_energy, group_energy); + total_energy = kbase_ipa_add_saturate(total_energy, group_energy); } /* Range: 0 <= coeff < 2^63 */ if (total_energy >= 0) coeff = total_energy; else - dev_dbg(model_data->kbdev->dev, - "Energy value came negative as %lld", total_energy); + dev_dbg(model_data->kbdev->dev, "Energy value came negative as %lld", total_energy); /* Range: 0 <= coeff < 2^63 (because active_cycles >= 1). However, this * can be constrained further: the value of counters that are being @@ -275,9 +262,8 @@ int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) * not be used when GPU enters protected mode, as IPA is supposed to * switch to the simple power model. */ - ret = kbase_ipa_control_query(kbdev, - model_data->ipa_control_client, - cnt_values_p, num_counters, NULL); + ret = kbase_ipa_control_query(kbdev, model_data->ipa_control_client, cnt_values_p, + num_counters, NULL); if (WARN_ON(ret)) return ret; @@ -293,9 +279,7 @@ int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) * that regular calls every 25-100 ms interval are expected. */ if (diff_ms > MAX_SAMPLE_INTERVAL_MS) { - dev_dbg(kbdev->dev, - "Last sample was taken %lld milli seconds ago", - diff_ms); + dev_dbg(kbdev->dev, "Last sample was taken %lld milli seconds ago", diff_ms); return -EOVERFLOW; } @@ -305,8 +289,7 @@ int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) * 0 <= active_cycles < 2^31 */ if (*cnt_values_p > U32_MAX) { - dev_warn(kbdev->dev, - "Increment in GPU_ACTIVE counter more than expected"); + dev_warn(kbdev->dev, "Increment in GPU_ACTIVE counter more than expected"); return -ERANGE; } @@ -325,18 +308,16 @@ int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) cnt_values_p++; ret = calculate_coeff(model_data, model_data->top_level_cntrs_def, - model_data->num_top_level_cntrs, - counter_coeffs_p, cnt_values_p, active_cycles, - &coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); + model_data->num_top_level_cntrs, counter_coeffs_p, cnt_values_p, + active_cycles, &coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); if (ret) return ret; cnt_values_p += model_data->num_top_level_cntrs; counter_coeffs_p += model_data->num_top_level_cntrs; ret = calculate_coeff(model_data, model_data->shader_cores_cntrs_def, - model_data->num_shader_cores_cntrs, - counter_coeffs_p, cnt_values_p, active_cycles, - &coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); + model_data->num_shader_cores_cntrs, counter_coeffs_p, cnt_values_p, + active_cycles, &coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); return ret; } @@ -351,26 +332,24 @@ void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model) lockdep_assert_held(&model->kbdev->ipa.lock); - ret = kbase_ipa_control_query(model->kbdev, - model_data->ipa_control_client, - cnt_values_p, num_counters, NULL); + ret = kbase_ipa_control_query(model->kbdev, model_data->ipa_control_client, cnt_values_p, + num_counters, NULL); WARN_ON(ret); } int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, - const struct kbase_ipa_counter *top_level_cntrs_def, - size_t num_top_level_cntrs, - const struct kbase_ipa_counter *shader_cores_cntrs_def, - size_t num_shader_cores_cntrs, - s32 reference_voltage) + const struct kbase_ipa_counter *top_level_cntrs_def, + size_t num_top_level_cntrs, + const struct kbase_ipa_counter *shader_cores_cntrs_def, + size_t num_shader_cores_cntrs, s32 reference_voltage) { struct kbase_ipa_counter_model_data *model_data; s32 *counter_coeffs_p; int err = 0; size_t i; - if (!model || !top_level_cntrs_def || !shader_cores_cntrs_def || - !num_top_level_cntrs || !num_shader_cores_cntrs) + if (!model || !top_level_cntrs_def || !shader_cores_cntrs_def || !num_top_level_cntrs || + !num_shader_cores_cntrs) return -EINVAL; model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); @@ -390,13 +369,12 @@ int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, counter_coeffs_p = model_data->counter_coeffs; for (i = 0; i < model_data->num_top_level_cntrs; ++i) { - const struct kbase_ipa_counter *counter = - &model_data->top_level_cntrs_def[i]; + const struct kbase_ipa_counter *counter = &model_data->top_level_cntrs_def[i]; *counter_coeffs_p = counter->coeff_default_value; - err = kbase_ipa_model_add_param_s32( - model, counter->name, counter_coeffs_p, 1, false); + err = kbase_ipa_model_add_param_s32(model, counter->name, counter_coeffs_p, 1, + false); if (err) goto exit; @@ -404,13 +382,12 @@ int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, } for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) { - const struct kbase_ipa_counter *counter = - &model_data->shader_cores_cntrs_def[i]; + const struct kbase_ipa_counter *counter = &model_data->shader_cores_cntrs_def[i]; *counter_coeffs_p = counter->coeff_default_value; - err = kbase_ipa_model_add_param_s32( - model, counter->name, counter_coeffs_p, 1, false); + err = kbase_ipa_model_add_param_s32(model, counter->name, counter_coeffs_p, 1, + false); if (err) goto exit; @@ -418,22 +395,19 @@ int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, } model_data->scaling_factor = DEFAULT_SCALING_FACTOR; - err = kbase_ipa_model_add_param_s32( - model, "scale", &model_data->scaling_factor, 1, false); + err = kbase_ipa_model_add_param_s32(model, "scale", &model_data->scaling_factor, 1, false); if (err) goto exit; model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", - &model_data->min_sample_cycles, 1, - false); + &model_data->min_sample_cycles, 1, false); if (err) goto exit; model_data->reference_voltage = reference_voltage; err = kbase_ipa_model_add_param_s32(model, "reference_voltage", - &model_data->reference_voltage, 1, - false); + &model_data->reference_voltage, 1, false); if (err) goto exit; diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h index 37d2efc59fcc..3cf81cb1d81a 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,7 +43,7 @@ struct kbase_ipa_counter_model_data; * @num_shader_cores_cntrs: Number of elements in @shader_cores_cntrs_def array. * @counter_coeffs: Buffer to store coefficient value used for HW counters * @counter_values: Buffer to store the accumulated value of HW counters - * retreived from kbase_ipa_control. + * retrieved from kbase_ipa_control. * @num_counters: Number of counters queried from kbase_ipa_control. * @reference_voltage: voltage, in mV, of the operating point used when * deriving the power model coefficients. Range approx @@ -142,11 +142,10 @@ void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model); * Return: 0 on success, error code otherwise */ int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, - const struct kbase_ipa_counter *top_level_cntrs_def, - size_t num_top_level_cntrs, - const struct kbase_ipa_counter *shader_cores_cntrs_def, - size_t num_shader_cores_cntrs, - s32 reference_voltage); + const struct kbase_ipa_counter *top_level_cntrs_def, + size_t num_top_level_cntrs, + const struct kbase_ipa_counter *shader_cores_cntrs_def, + size_t num_shader_cores_cntrs, s32 reference_voltage); /** * kbase_ipa_counter_common_model_term() - terminate ipa power model * @model: ipa power model to terminate diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c index 34515a934b33..6e2976d9bbf9 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,9 +39,7 @@ * incrementing every cycle over a ~100ms sample period at a high frequency, * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27. */ -static inline u32 kbase_ipa_read_hwcnt( - struct kbase_ipa_model_vinstr_data *model_data, - u32 offset) +static inline u32 kbase_ipa_read_hwcnt(struct kbase_ipa_model_vinstr_data *model_data, u32 offset) { u8 *p = (u8 *)model_data->dump_buf.dump_buf; u64 val = *(u64 *)&p[offset]; @@ -63,21 +61,19 @@ static inline s64 kbase_ipa_add_saturate(s64 a, s64 b) return rtn; } -s64 kbase_ipa_sum_all_shader_cores( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter) +s64 kbase_ipa_sum_all_shader_cores(struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, + u32 counter) { struct kbase_device *kbdev = model_data->kbdev; u64 core_mask; u32 base = 0; s64 ret = 0; - core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + core_mask = kbdev->gpu_props.coherency_info.group.core_mask; while (core_mask != 0ull) { if ((core_mask & 1ull) != 0ull) { /* 0 < counter_value < 2^27 */ - u32 counter_value = kbase_ipa_read_hwcnt(model_data, - base + counter); + u32 counter_value = kbase_ipa_read_hwcnt(model_data, base + counter); /* 0 < ret < 2^27 * max_num_cores = 2^32 */ ret = kbase_ipa_add_saturate(ret, counter_value); @@ -90,20 +86,18 @@ s64 kbase_ipa_sum_all_shader_cores( return ret * coeff; } -s64 kbase_ipa_sum_all_memsys_blocks( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter) +s64 kbase_ipa_sum_all_memsys_blocks(struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, + u32 counter) { struct kbase_device *kbdev = model_data->kbdev; - const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices; + const u32 num_blocks = kbdev->gpu_props.num_l2_slices; u32 base = 0; s64 ret = 0; u32 i; for (i = 0; i < num_blocks; i++) { /* 0 < counter_value < 2^27 */ - u32 counter_value = kbase_ipa_read_hwcnt(model_data, - base + counter); + u32 counter_value = kbase_ipa_read_hwcnt(model_data, base + counter); /* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */ ret = kbase_ipa_add_saturate(ret, counter_value); @@ -114,15 +108,13 @@ s64 kbase_ipa_sum_all_memsys_blocks( return ret * coeff; } -s64 kbase_ipa_single_counter( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter) +s64 kbase_ipa_single_counter(struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, u32 counter) { /* Range: 0 < counter_value < 2^27 */ const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter); /* Range: -2^49 < ret < 2^49 */ - return counter_value * (s64) coeff; + return counter_value * (s64)coeff; } int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) @@ -131,8 +123,7 @@ int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) struct kbase_device *kbdev = model_data->kbdev; struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt; struct kbase_hwcnt_enable_map enable_map; - const struct kbase_hwcnt_metadata *metadata = - kbase_hwcnt_virtualizer_metadata(hvirt); + const struct kbase_hwcnt_metadata *metadata = kbase_hwcnt_virtualizer_metadata(hvirt); if (!metadata) return -1; @@ -148,8 +139,7 @@ int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) /* Disable cycle counter only. */ enable_map.clk_enable_map = 0; - errcode = kbase_hwcnt_virtualizer_client_create( - hvirt, &enable_map, &model_data->hvirt_cli); + errcode = kbase_hwcnt_virtualizer_client_create(hvirt, &enable_map, &model_data->hvirt_cli); kbase_hwcnt_enable_map_free(&enable_map); if (errcode) { dev_err(kbdev->dev, "Failed to register IPA with virtualizer"); @@ -157,8 +147,7 @@ int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) return errcode; } - errcode = kbase_hwcnt_dump_buffer_alloc( - metadata, &model_data->dump_buf); + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &model_data->dump_buf); if (errcode) { dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); @@ -181,7 +170,7 @@ void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) { struct kbase_ipa_model_vinstr_data *model_data = - (struct kbase_ipa_model_vinstr_data *)model->model_data; + (struct kbase_ipa_model_vinstr_data *)model->model_data; s64 energy = 0; size_t i; u64 coeff = 0, coeff_mul = 0; @@ -189,8 +178,8 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) u32 active_cycles; int err = 0; - err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli, - &start_ts_ns, &end_ts_ns, &model_data->dump_buf); + err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli, &start_ts_ns, &end_ts_ns, + &model_data->dump_buf); if (err) goto err0; @@ -201,7 +190,7 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) */ active_cycles = model_data->get_active_cycles(model_data); - if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) { + if (active_cycles < (u32)max(model_data->min_sample_cycles, 0)) { err = -ENODATA; goto err0; } @@ -215,8 +204,7 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) for (i = 0; i < model_data->groups_def_num; i++) { const struct kbase_ipa_group *group = &model_data->groups_def[i]; s32 coeff = model_data->group_values[i]; - s64 group_energy = group->op(model_data, coeff, - group->counter_block_offset); + s64 group_energy = group->op(model_data, coeff, group->counter_block_offset); energy = kbase_ipa_add_saturate(energy, group_energy); } @@ -269,12 +257,13 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) err0: /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */ - *coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16); + *coeffp = clamp(coeff_mul, (u64)0, (u64)1 << 16); return err; } void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model) { + CSTD_UNUSED(model); /* Currently not implemented */ WARN_ON_ONCE(1); } @@ -301,37 +290,32 @@ int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, model_data->groups_def_num = ipa_group_size; model_data->get_active_cycles = get_active_cycles; - model->model_data = (void *) model_data; + model->model_data = (void *)model_data; for (i = 0; i < model_data->groups_def_num; ++i) { const struct kbase_ipa_group *group = &model_data->groups_def[i]; model_data->group_values[i] = group->default_value; err = kbase_ipa_model_add_param_s32(model, group->name, - &model_data->group_values[i], - 1, false); + &model_data->group_values[i], 1, false); if (err) goto exit; } model_data->scaling_factor = DEFAULT_SCALING_FACTOR; - err = kbase_ipa_model_add_param_s32(model, "scale", - &model_data->scaling_factor, - 1, false); + err = kbase_ipa_model_add_param_s32(model, "scale", &model_data->scaling_factor, 1, false); if (err) goto exit; model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", - &model_data->min_sample_cycles, - 1, false); + &model_data->min_sample_cycles, 1, false); if (err) goto exit; model_data->reference_voltage = reference_voltage; err = kbase_ipa_model_add_param_s32(model, "reference_voltage", - &model_data->reference_voltage, - 1, false); + &model_data->reference_voltage, 1, false); if (err) goto exit; @@ -348,7 +332,7 @@ exit: void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model) { struct kbase_ipa_model_vinstr_data *model_data = - (struct kbase_ipa_model_vinstr_data *)model->model_data; + (struct kbase_ipa_model_vinstr_data *)model->model_data; kbase_ipa_detach_vinstr(model_data); kfree(model_data); diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h index 6089610847b4..d1d1f7d3b57f 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,22 +27,20 @@ #include "hwcnt/mali_kbase_hwcnt_types.h" /* Maximum number of IPA groups for an IPA model. */ -#define KBASE_IPA_MAX_GROUP_DEF_NUM 16 +#define KBASE_IPA_MAX_GROUP_DEF_NUM 16 /* Number of bytes per hardware counter in a vinstr_buffer. */ #define KBASE_IPA_NR_BYTES_PER_CNT (sizeof(u64)) /* Number of hardware counters per block in a vinstr_buffer. */ -#define KBASE_IPA_NR_CNT_PER_BLOCK 64 +#define KBASE_IPA_NR_CNT_PER_BLOCK 64 /* Number of bytes per block in a vinstr_buffer. */ -#define KBASE_IPA_NR_BYTES_PER_BLOCK \ - (KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT) +#define KBASE_IPA_NR_BYTES_PER_BLOCK (KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT) struct kbase_ipa_model_vinstr_data; -typedef u32 -kbase_ipa_get_active_cycles_callback(struct kbase_ipa_model_vinstr_data *); +typedef u32 kbase_ipa_get_active_cycles_callback(struct kbase_ipa_model_vinstr_data *); /** * struct kbase_ipa_model_vinstr_data - IPA context per device @@ -94,10 +92,8 @@ struct kbase_ipa_model_vinstr_data { struct kbase_ipa_group { const char *name; s32 default_value; - s64 (*op)( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, - u32 counter_block_offset); + s64 (*op)(struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, + u32 counter_block_offset); u32 counter_block_offset; }; @@ -114,9 +110,8 @@ struct kbase_ipa_group { * * Return: Sum of counter values. Range: -2^54 < ret < 2^54 */ -s64 kbase_ipa_sum_all_shader_cores( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter); +s64 kbase_ipa_sum_all_shader_cores(struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, + u32 counter); /** * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks @@ -131,9 +126,8 @@ s64 kbase_ipa_sum_all_shader_cores( * * Return: Sum of counter values. Range: -2^51 < ret < 2^51 */ -s64 kbase_ipa_sum_all_memsys_blocks( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter); +s64 kbase_ipa_sum_all_memsys_blocks(struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, + u32 counter); /** * kbase_ipa_single_counter() - sum a single counter @@ -147,9 +141,8 @@ s64 kbase_ipa_sum_all_memsys_blocks( * * Return: Counter value. Range: -2^49 < ret < 2^49 */ -s64 kbase_ipa_single_counter( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter); +s64 kbase_ipa_single_counter(struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, + u32 counter); /** * kbase_ipa_attach_vinstr() - attach a vinstr_buffer to an IPA model. diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c index 21b4e52884c5..6c03f1b9ac4b 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,58 +23,65 @@ #include "mali_kbase.h" /* MEMSYS counter block offsets */ -#define L2_RD_MSG_IN_CU (13) -#define L2_RD_MSG_IN (16) -#define L2_WR_MSG_IN (18) -#define L2_SNP_MSG_IN (20) -#define L2_RD_MSG_OUT (22) -#define L2_READ_LOOKUP (26) -#define L2_EXT_READ_NOSNP (30) +#define L2_RD_MSG_IN_CU (13) +#define L2_RD_MSG_IN (16) +#define L2_WR_MSG_IN (18) +#define L2_SNP_MSG_IN (20) +#define L2_RD_MSG_OUT (22) +#define L2_READ_LOOKUP (26) +#define L2_EXT_READ_NOSNP (30) #define L2_EXT_WRITE_NOSNP_FULL (43) +#define L2_RD_MSG_IN_STALL (17) +#define L2_EXT_WRITE (42) /* SC counter block offsets */ -#define FRAG_STARVING (8) +#define FRAG_STARVING (8) #define FRAG_PARTIAL_QUADS_RAST (10) -#define FRAG_QUADS_EZS_UPDATE (13) -#define FULL_QUAD_WARPS (21) -#define EXEC_INSTR_FMA (27) -#define EXEC_INSTR_CVT (28) -#define EXEC_INSTR_SFU (29) -#define EXEC_INSTR_MSG (30) -#define TEX_FILT_NUM_OPS (39) -#define LS_MEM_READ_SHORT (45) -#define LS_MEM_WRITE_SHORT (47) -#define VARY_SLOT_16 (51) -#define BEATS_RD_LSC_EXT (57) -#define BEATS_RD_TEX (58) -#define BEATS_RD_TEX_EXT (59) -#define FRAG_QUADS_COARSE (68) +#define FRAG_QUADS_EZS_UPDATE (13) +#define FULL_QUAD_WARPS (21) +#define EXEC_INSTR_FMA (27) +#define EXEC_INSTR_CVT (28) +#define EXEC_INSTR_SFU (29) +#define EXEC_INSTR_MSG (30) +#define TEX_FILT_NUM_OPS (39) +#define LS_MEM_READ_SHORT (45) +#define LS_MEM_WRITE_SHORT (47) +#define VARY_SLOT_16 (51) +#define BEATS_RD_LSC_EXT (57) +#define BEATS_RD_TEX (58) +#define BEATS_RD_TEX_EXT (59) +#define FRAG_QUADS_COARSE (68) +#define EXEC_STARVE_ARITH (33) +#define TEX_TFCH_CLK_STALLED (37) +#define RT_RAYS_STARTED (84) +#define TEX_CFCH_NUM_L1_CT_OPERATIONS (90) +#define EXEC_INSTR_SLOT1 (118) +#define EXEC_ISSUE_SLOT_ANY (119) /* Tiler counter block offsets */ -#define IDVS_POS_SHAD_STALL (23) -#define PREFETCH_STALL (25) -#define VFETCH_POS_READ_WAIT (29) -#define VFETCH_VERTEX_WAIT (30) -#define PRIMASSY_STALL (32) -#define IDVS_VAR_SHAD_STALL (38) -#define ITER_STALL (40) -#define PMGR_PTR_RD_STALL (48) +#define IDVS_POS_SHAD_STALL (23) +#define PREFETCH_STALL (25) +#define VFETCH_POS_READ_WAIT (29) +#define VFETCH_VERTEX_WAIT (30) +#define PRIMASSY_STALL (32) +#define IDVS_VAR_SHAD_STALL (38) +#define ITER_STALL (40) +#define PMGR_PTR_RD_STALL (48) +#define PRIMASSY_POS_SHADER_WAIT (64) -#define COUNTER_DEF(cnt_name, coeff, cnt_idx, block_type) \ - { \ - .name = cnt_name, \ - .coeff_default_value = coeff, \ - .counter_block_offset = cnt_idx, \ - .counter_block_type = block_type, \ +#define COUNTER_DEF(cnt_name, coeff, cnt_idx, block_type) \ + { \ + .name = cnt_name, .coeff_default_value = coeff, .counter_block_offset = cnt_idx, \ + .counter_block_type = block_type, \ } -#define MEMSYS_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ +#define MEMSYS_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_MEMSYS) -#define SC_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ +#define SC_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_SHADER) -#define TILER_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ +#define TILER_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_TILER) /* Tables of description of HW counters used by IPA counter model. @@ -129,6 +136,16 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttix[] = { MEMSYS_COUNTER_DEF("l2_ext_read_nosnp", 512341, L2_EXT_READ_NOSNP), }; +static const struct kbase_ipa_counter ipa_top_level_cntrs_def_tkrx[] = { + TILER_COUNTER_DEF("primassy_pos_shader_wait", 93883, PRIMASSY_POS_SHADER_WAIT), + TILER_COUNTER_DEF("idvs_pos_shad_stall", -69197, IDVS_POS_SHAD_STALL), + + MEMSYS_COUNTER_DEF("l2_rd_msg_out", 176502, L2_RD_MSG_OUT), + MEMSYS_COUNTER_DEF("l2_ext_write_nosnp_full", 510351, L2_EXT_WRITE_NOSNP_FULL), + MEMSYS_COUNTER_DEF("l2_ext_write", -402377, L2_EXT_WRITE), + MEMSYS_COUNTER_DEF("l2_rd_msg_in_stall", -66545, L2_RD_MSG_IN_STALL), +}; + /* These tables provide a description of each performance counter * used by the shader cores counter model for energy estimation. */ @@ -179,36 +196,41 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttix[] = { SC_COUNTER_DEF("exec_instr_sfu", 31583, EXEC_INSTR_SFU), }; -#define IPA_POWER_MODEL_OPS(gpu, init_token) \ - const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ - .name = "mali-" #gpu "-power-model", \ - .init = kbase_ ## init_token ## _power_model_init, \ - .term = kbase_ipa_counter_common_model_term, \ - .get_dynamic_coeff = kbase_ipa_counter_dynamic_coeff, \ - .reset_counter_data = kbase_ipa_counter_reset_data, \ - }; \ - KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) +static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_tkrx[] = { + SC_COUNTER_DEF("exec_issue_slot_any", 299674, EXEC_ISSUE_SLOT_ANY), + SC_COUNTER_DEF("exec_starve_arith", 26817, EXEC_STARVE_ARITH), + SC_COUNTER_DEF("tex_cfch_num_l1_ct_operations", 226797, TEX_CFCH_NUM_L1_CT_OPERATIONS), + SC_COUNTER_DEF("exec_instr_slot1", -1185776, EXEC_INSTR_SLOT1), + SC_COUNTER_DEF("tex_tfch_clk_stalled", -147729, TEX_TFCH_CLK_STALLED), + SC_COUNTER_DEF("exec_instr_fma", 61968, EXEC_INSTR_FMA), + SC_COUNTER_DEF("rt_rays_started", -149038, RT_RAYS_STARTED), +}; -#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ - static int kbase_ ## gpu ## _power_model_init(\ - struct kbase_ipa_model *model) \ - { \ - BUILD_BUG_ON((1 + \ - ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu) +\ - ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu)) > \ - KBASE_IPA_MAX_COUNTER_DEF_NUM); \ - return kbase_ipa_counter_common_model_init(model, \ - ipa_top_level_cntrs_def_ ## gpu, \ - ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu), \ - ipa_shader_core_cntrs_def_ ## gpu, \ - ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu), \ - (reference_voltage)); \ - } \ +#define IPA_POWER_MODEL_OPS(gpu, init_token) \ + const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = { \ + .name = "mali-" #gpu "-power-model", \ + .init = kbase_##init_token##_power_model_init, \ + .term = kbase_ipa_counter_common_model_term, \ + .get_dynamic_coeff = kbase_ipa_counter_dynamic_coeff, \ + .reset_counter_data = kbase_ipa_counter_reset_data, \ + }; \ + KBASE_EXPORT_TEST_API(kbase_##gpu##_ipa_model_ops) + +#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ + static int kbase_##gpu##_power_model_init(struct kbase_ipa_model *model) \ + { \ + BUILD_BUG_ON((1 + ARRAY_SIZE(ipa_top_level_cntrs_def_##gpu) + \ + ARRAY_SIZE(ipa_shader_core_cntrs_def_##gpu)) > \ + KBASE_IPA_MAX_COUNTER_DEF_NUM); \ + return kbase_ipa_counter_common_model_init( \ + model, ipa_top_level_cntrs_def_##gpu, \ + ARRAY_SIZE(ipa_top_level_cntrs_def_##gpu), \ + ipa_shader_core_cntrs_def_##gpu, \ + ARRAY_SIZE(ipa_shader_core_cntrs_def_##gpu), (reference_voltage)); \ + } \ IPA_POWER_MODEL_OPS(gpu, gpu) - -#define ALIAS_POWER_MODEL(gpu, as_gpu) \ - IPA_POWER_MODEL_OPS(gpu, as_gpu) +#define ALIAS_POWER_MODEL(gpu, as_gpu) IPA_POWER_MODEL_OPS(gpu, as_gpu) /* Reference voltage value is 750 mV. */ STANDARD_POWER_MODEL(todx, 750); @@ -217,6 +239,9 @@ STANDARD_POWER_MODEL(tvax, 750); STANDARD_POWER_MODEL(ttux, 750); /* Reference voltage value is 550 mV. */ STANDARD_POWER_MODEL(ttix, 550); +STANDARD_POWER_MODEL(tkrx, 550); +/* Assuming LKRX is an alias of TKRX for IPA */ +ALIAS_POWER_MODEL(lkrx, tkrx); /* Assuming LODX is an alias of TODX for IPA */ ALIAS_POWER_MODEL(lodx, todx); @@ -228,20 +253,19 @@ ALIAS_POWER_MODEL(ltux, ttux); ALIAS_POWER_MODEL(ltix, ttix); static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { - &kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops, - &kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops, - &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops, - &kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops, + &kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops, &kbase_tgrx_ipa_model_ops, + &kbase_tvax_ipa_model_ops, &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops, + &kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops, &kbase_tkrx_ipa_model_ops, + &kbase_lkrx_ipa_model_ops, }; -const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( - struct kbase_device *kbdev, const char *name) +const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(struct kbase_device *kbdev, + const char *name) { - int i; + size_t i; for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) { - const struct kbase_ipa_model_ops *ops = - ipa_counter_model_ops[i]; + const struct kbase_ipa_model_ops *ops = ipa_counter_model_ops[i]; if (!strcmp(ops->name, name)) return ops; @@ -252,28 +276,29 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( return NULL; } -const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) +const char *kbase_ipa_counter_model_name_from_id(struct kbase_gpu_id_props *gpu_id) { - const u32 prod_id = - (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { - case GPU_ID2_PRODUCT_TODX: + switch (gpu_id->product_model) { + case GPU_ID_PRODUCT_TODX: return "mali-todx-power-model"; - case GPU_ID2_PRODUCT_LODX: + case GPU_ID_PRODUCT_LODX: return "mali-lodx-power-model"; - case GPU_ID2_PRODUCT_TGRX: + case GPU_ID_PRODUCT_TGRX: return "mali-tgrx-power-model"; - case GPU_ID2_PRODUCT_TVAX: + case GPU_ID_PRODUCT_TVAX: return "mali-tvax-power-model"; - case GPU_ID2_PRODUCT_TTUX: + case GPU_ID_PRODUCT_TTUX: return "mali-ttux-power-model"; - case GPU_ID2_PRODUCT_LTUX: + case GPU_ID_PRODUCT_LTUX: return "mali-ltux-power-model"; - case GPU_ID2_PRODUCT_TTIX: + case GPU_ID_PRODUCT_TTIX: return "mali-ttix-power-model"; - case GPU_ID2_PRODUCT_LTIX: + case GPU_ID_PRODUCT_LTIX: return "mali-ltix-power-model"; + case GPU_ID_PRODUCT_TKRX: + return "mali-tkrx-power-model"; + case GPU_ID_PRODUCT_LKRX: + return "mali-lkrx-power-model"; default: return NULL; } diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c index 2092db042dec..15d5ba4581a8 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c @@ -26,24 +26,24 @@ #include /* Performance counter blocks base offsets */ -#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) -#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) +#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) +#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) /* JM counter block offsets */ -#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) +#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) /* MEMSYS counter block offsets */ #define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25) /* SC counter block offsets */ -#define SC_EXEC_INSTR_FMA (KBASE_IPA_NR_BYTES_PER_CNT * 27) -#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28) -#define SC_EXEC_INSTR_MSG (KBASE_IPA_NR_BYTES_PER_CNT * 30) +#define SC_EXEC_INSTR_FMA (KBASE_IPA_NR_BYTES_PER_CNT * 27) +#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28) +#define SC_EXEC_INSTR_MSG (KBASE_IPA_NR_BYTES_PER_CNT * 30) #define SC_TEX_FILT_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 39) -#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40) +#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40) #define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42) -#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49) -#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) +#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49) +#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) /** * kbase_g7x_power_model_get_jm_counter() - get performance counter offset @@ -57,6 +57,7 @@ static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data, u32 counter_block_offset) { + CSTD_UNUSED(model_data); return JM_BASE + counter_block_offset; } @@ -72,6 +73,7 @@ static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_da static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data, u32 counter_block_offset) { + CSTD_UNUSED(model_data); /* The base address of Memory System performance counters is always the same, although their number * may vary based on the number of cores. For the moment it's ok to return a constant. */ @@ -91,13 +93,11 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da u32 counter_block_offset) { #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - const u32 sc_base = MEMSYS_BASE + - (KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * - KBASE_IPA_NR_BYTES_PER_BLOCK); + const u32 sc_base = + MEMSYS_BASE + (KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * KBASE_IPA_NR_BYTES_PER_BLOCK); #else - const u32 sc_base = MEMSYS_BASE + - (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * - KBASE_IPA_NR_BYTES_PER_BLOCK); + const u32 sc_base = MEMSYS_BASE + (model_data->kbdev->gpu_props.num_l2_slices * + KBASE_IPA_NR_BYTES_PER_BLOCK); #endif return sc_base + counter_block_offset; } @@ -112,15 +112,12 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da * * Return: Energy estimation for a single Memory System performance counter. */ -static s64 kbase_g7x_sum_all_memsys_blocks( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, - u32 counter_block_offset) +static s64 kbase_g7x_sum_all_memsys_blocks(struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter_block_offset) { u32 counter; - counter = kbase_g7x_power_model_get_memsys_counter(model_data, - counter_block_offset); + counter = kbase_g7x_power_model_get_memsys_counter(model_data, counter_block_offset); return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter); } @@ -135,15 +132,12 @@ static s64 kbase_g7x_sum_all_memsys_blocks( * Return: Energy estimation for a Shader Cores performance counter for all * cores. */ -static s64 kbase_g7x_sum_all_shader_cores( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, - u32 counter_block_offset) +static s64 kbase_g7x_sum_all_shader_cores(struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, + u32 counter_block_offset) { u32 counter; - counter = kbase_g7x_power_model_get_sc_counter(model_data, - counter_block_offset); + counter = kbase_g7x_power_model_get_sc_counter(model_data, counter_block_offset); return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter); } @@ -155,15 +149,12 @@ static s64 kbase_g7x_sum_all_shader_cores( * * Return: Energy estimation for a single Job Manager performance counter. */ -static s64 kbase_g7x_jm_single_counter( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, - u32 counter_block_offset) +static s64 kbase_g7x_jm_single_counter(struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, + u32 counter_block_offset) { u32 counter; - counter = kbase_g7x_power_model_get_jm_counter(model_data, - counter_block_offset); + counter = kbase_g7x_power_model_get_jm_counter(model_data, counter_block_offset); return kbase_ipa_single_counter(model_data, coeff, counter); } @@ -174,8 +165,7 @@ static s64 kbase_g7x_jm_single_counter( * Return: the number of cycles the GPU was active during the counter sampling * period. */ -static u32 kbase_g7x_get_active_cycles( - struct kbase_ipa_model_vinstr_data *model_data) +static u32 kbase_g7x_get_active_cycles(struct kbase_ipa_model_vinstr_data *model_data) { u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE); @@ -455,31 +445,27 @@ static const struct kbase_ipa_group ipa_groups_def_tbax[] = { }, }; -#define IPA_POWER_MODEL_OPS(gpu, init_token) \ - static const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = { \ - .name = "mali-" #gpu "-power-model", \ - .init = kbase_##init_token##_power_model_init, \ - .term = kbase_ipa_vinstr_common_model_term, \ - .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ - .reset_counter_data = kbase_ipa_vinstr_reset_data, \ +#define IPA_POWER_MODEL_OPS(gpu, init_token) \ + static const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = { \ + .name = "mali-" #gpu "-power-model", \ + .init = kbase_##init_token##_power_model_init, \ + .term = kbase_ipa_vinstr_common_model_term, \ + .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ + .reset_counter_data = kbase_ipa_vinstr_reset_data, \ } -#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ - static int kbase_ ## gpu ## _power_model_init(\ - struct kbase_ipa_model *model) \ - { \ - BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \ - KBASE_IPA_MAX_GROUP_DEF_NUM); \ - return kbase_ipa_vinstr_common_model_init(model, \ - ipa_groups_def_ ## gpu, \ - ARRAY_SIZE(ipa_groups_def_ ## gpu), \ - kbase_g7x_get_active_cycles, \ - (reference_voltage)); \ - } \ +#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ + static int kbase_##gpu##_power_model_init(struct kbase_ipa_model *model) \ + { \ + BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_##gpu) > KBASE_IPA_MAX_GROUP_DEF_NUM); \ + return kbase_ipa_vinstr_common_model_init(model, ipa_groups_def_##gpu, \ + ARRAY_SIZE(ipa_groups_def_##gpu), \ + kbase_g7x_get_active_cycles, \ + (reference_voltage)); \ + } \ IPA_POWER_MODEL_OPS(gpu, gpu) -#define ALIAS_POWER_MODEL(gpu, as_gpu) \ - IPA_POWER_MODEL_OPS(gpu, as_gpu) +#define ALIAS_POWER_MODEL(gpu, as_gpu) IPA_POWER_MODEL_OPS(gpu, as_gpu) STANDARD_POWER_MODEL(g71, 800); STANDARD_POWER_MODEL(g72, 800); @@ -496,26 +482,19 @@ ALIAS_POWER_MODEL(g52, g76); ALIAS_POWER_MODEL(tnax, g77); static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { - &kbase_g71_ipa_model_ops, - &kbase_g72_ipa_model_ops, - &kbase_g76_ipa_model_ops, - &kbase_g52_ipa_model_ops, - &kbase_g52_r1_ipa_model_ops, - &kbase_g51_ipa_model_ops, - &kbase_g77_ipa_model_ops, - &kbase_tnax_ipa_model_ops, - &kbase_tbex_ipa_model_ops, + &kbase_g71_ipa_model_ops, &kbase_g72_ipa_model_ops, &kbase_g76_ipa_model_ops, + &kbase_g52_ipa_model_ops, &kbase_g52_r1_ipa_model_ops, &kbase_g51_ipa_model_ops, + &kbase_g77_ipa_model_ops, &kbase_tnax_ipa_model_ops, &kbase_tbex_ipa_model_ops, &kbase_tbax_ipa_model_ops }; -const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( - struct kbase_device *kbdev, const char *name) +const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(struct kbase_device *kbdev, + const char *name) { - int i; + size_t i; for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) { - const struct kbase_ipa_model_ops *ops = - ipa_counter_model_ops[i]; + const struct kbase_ipa_model_ops *ops = ipa_counter_model_ops[i]; if (!strcmp(ops->name, name)) return ops; @@ -526,34 +505,30 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( return NULL; } -const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) +const char *kbase_ipa_counter_model_name_from_id(struct kbase_gpu_id_props *gpu_id) { - const u32 prod_id = - (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { - case GPU_ID2_PRODUCT_TMIX: + switch (gpu_id->product_model) { + case GPU_ID_PRODUCT_TMIX: return "mali-g71-power-model"; - case GPU_ID2_PRODUCT_THEX: + case GPU_ID_PRODUCT_THEX: return "mali-g72-power-model"; - case GPU_ID2_PRODUCT_TNOX: + case GPU_ID_PRODUCT_TNOX: return "mali-g76-power-model"; - case GPU_ID2_PRODUCT_TSIX: + case GPU_ID_PRODUCT_TSIX: return "mali-g51-power-model"; - case GPU_ID2_PRODUCT_TGOX: - if ((gpu_id & GPU_ID2_VERSION_MAJOR) == - (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) + case GPU_ID_PRODUCT_TGOX: + if (gpu_id->version_major == 0) /* g52 aliased to g76 power-model's ops */ return "mali-g52-power-model"; else return "mali-g52_r1-power-model"; - case GPU_ID2_PRODUCT_TNAX: + case GPU_ID_PRODUCT_TNAX: return "mali-tnax-power-model"; - case GPU_ID2_PRODUCT_TTRX: + case GPU_ID_PRODUCT_TTRX: return "mali-g77-power-model"; - case GPU_ID2_PRODUCT_TBEX: + case GPU_ID_PRODUCT_TBEX: return "mali-tbex-power-model"; - case GPU_ID2_PRODUCT_TBAX: + case GPU_ID_PRODUCT_TBAX: return "mali-tbax-power-model"; default: return NULL; diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c index b2e6bc459f22..e3b61d5afade 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -69,10 +69,9 @@ const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device * } KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find); -const char *kbase_ipa_model_name_from_id(u32 gpu_id) +const char *kbase_ipa_model_name_from_id(struct kbase_gpu_id_props *gpu_id) { - const char *model_name = - kbase_ipa_counter_model_name_from_id(gpu_id); + const char *model_name = kbase_ipa_counter_model_name_from_id(gpu_id); if (!model_name) return KBASE_IPA_FALLBACK_MODEL_NAME; @@ -81,8 +80,7 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id) } KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); -static struct device_node *get_model_dt_node(struct kbase_ipa_model *model, - bool dt_required) +static struct device_node *get_model_dt_node(struct kbase_ipa_model *model, bool dt_required) { struct device_node *model_dt_node = NULL; char compat_string[64]; @@ -94,26 +92,24 @@ static struct device_node *get_model_dt_node(struct kbase_ipa_model *model, * so take a reference on it first. */ of_node_get(model->kbdev->dev->of_node); - model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node, - NULL, compat_string); + model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node, NULL, compat_string); if (!model_dt_node && !model->missing_dt_node_warning) { if (dt_required) dev_warn(model->kbdev->dev, - "Couldn't find power_model DT node matching \'%s\'\n", - compat_string); + "Couldn't find power_model DT node matching \'%s\'\n", + compat_string); model->missing_dt_node_warning = true; } return model_dt_node; } -int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, - const char *name, s32 *addr, +int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, const char *name, s32 *addr, size_t num_elems, bool dt_required) { - int err = -EINVAL, i; - struct device_node *model_dt_node = get_model_dt_node(model, - dt_required); + int err = -EINVAL; + size_t i; + struct device_node *model_dt_node = get_model_dt_node(model, dt_required); char *origin; err = of_property_read_u32_array(model_dt_node, name, (u32 *)addr, num_elems); @@ -124,9 +120,8 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, if (err && dt_required) { memset(addr, 0, sizeof(s32) * num_elems); - dev_warn(model->kbdev->dev, - "Error %d, no DT entry: %s.%s = %zu*[0]\n", - err, model->ops->name, name, num_elems); + dev_warn(model->kbdev->dev, "Error %d, no DT entry: %s.%s = %zu*[0]\n", err, + model->ops->name, name, num_elems); origin = "zero"; } else if (err && !dt_required) { origin = "default"; @@ -144,17 +139,17 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, goto exit; } } else { - if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, i))) { + if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, + (uint32_t)i))) { err = -ENOMEM; goto exit; } } - dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", - model->ops->name, elem_name, addr[i], origin); + dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", model->ops->name, elem_name, + addr[i], origin); - err = kbase_ipa_model_param_add(model, elem_name, - &addr[i], sizeof(s32), + err = kbase_ipa_model_param_add(model, elem_name, &addr[i], sizeof(s32), PARAM_TYPE_S32); if (err) goto exit; @@ -163,18 +158,15 @@ exit: return err; } -int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, - const char *name, char *addr, +int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, const char *name, char *addr, size_t size, bool dt_required) { int err; - struct device_node *model_dt_node = get_model_dt_node(model, - dt_required); + struct device_node *model_dt_node = get_model_dt_node(model, dt_required); const char *string_prop_value = ""; char *origin; - err = of_property_read_string(model_dt_node, name, - &string_prop_value); + err = of_property_read_string(model_dt_node, name, &string_prop_value); /* We're done with model_dt_node now, so drop the reference taken in * get_model_dt_node()/of_find_compatible_node(). @@ -183,9 +175,8 @@ int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, if (err && dt_required) { strncpy(addr, "", size - 1); - dev_warn(model->kbdev->dev, - "Error %d, no DT entry: %s.%s = \'%s\'\n", - err, model->ops->name, name, addr); + dev_warn(model->kbdev->dev, "Error %d, no DT entry: %s.%s = \'%s\'\n", err, + model->ops->name, name, addr); err = 0; origin = "zero"; } else if (err && !dt_required) { @@ -197,11 +188,10 @@ int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, addr[size - 1] = '\0'; - dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n", - model->ops->name, name, string_prop_value, origin); + dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n", model->ops->name, name, + string_prop_value, origin); - err = kbase_ipa_model_param_add(model, name, addr, size, - PARAM_TYPE_STRING); + err = kbase_ipa_model_param_add(model, name, addr, size, PARAM_TYPE_STRING); return err; } @@ -242,9 +232,8 @@ struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, err = model->ops->init(model); if (err) { - dev_err(kbdev->dev, - "init of power model \'%s\' returned error %d\n", - ops->name, err); + dev_err(kbdev->dev, "init of power model \'%s\' returned error %d\n", ops->name, + err); kfree(model); return NULL; } @@ -274,7 +263,6 @@ static void kbase_ipa_term_locked(struct kbase_device *kbdev) int kbase_ipa_init(struct kbase_device *kbdev) { - const char *model_name; const struct kbase_ipa_model_ops *ops; struct kbase_ipa_model *default_model = NULL; @@ -297,23 +285,15 @@ int kbase_ipa_init(struct kbase_device *kbdev) } kbdev->ipa.fallback_model = default_model; - err = of_property_read_string(kbdev->dev->of_node, - "ipa-model", - &model_name); + err = of_property_read_string(kbdev->dev->of_node, "ipa-model", &model_name); if (err) { /* Attempt to load a match from GPU-ID */ - u32 gpu_id; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - model_name = kbase_ipa_model_name_from_id(gpu_id); - dev_dbg(kbdev->dev, - "Inferring model from GPU ID 0x%x: \'%s\'\n", - gpu_id, model_name); + model_name = kbase_ipa_model_name_from_id(&kbdev->gpu_props.gpu_id); + dev_dbg(kbdev->dev, "Inferring model from GPU Product ID 0x%x: \'%s\'\n", + kbdev->gpu_props.gpu_id.product_id, model_name); err = 0; } else { - dev_dbg(kbdev->dev, - "Using ipa-model parameter from DT: \'%s\'\n", - model_name); + dev_dbg(kbdev->dev, "Using ipa-model parameter from DT: \'%s\'\n", model_name); } if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) { @@ -321,9 +301,9 @@ int kbase_ipa_init(struct kbase_device *kbdev) kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops); if (!kbdev->ipa.configured_model) { dev_warn(kbdev->dev, - "Failed to initialize ipa-model: \'%s\'\n" - "Falling back on default model\n", - model_name); + "Failed to initialize ipa-model: \'%s\'\n" + "Falling back on default model\n", + model_name); kbdev->ipa.configured_model = default_model; } } else { @@ -336,8 +316,7 @@ end: if (err) kbase_ipa_term_locked(kbdev); else - dev_info(kbdev->dev, - "Using configured power model %s, and fallback %s\n", + dev_info(kbdev->dev, "Using configured power model %s, and fallback %s\n", kbdev->ipa.configured_model->ops->name, kbdev->ipa.fallback_model->ops->name); @@ -370,8 +349,7 @@ KBASE_EXPORT_TEST_API(kbase_ipa_term); * * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) */ -static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, - const u32 voltage) +static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, const u32 voltage) { /* Range: 2^8 < v2 < 2^16 m(V^2) */ const u32 v2 = (voltage * voltage) / 1000; @@ -388,7 +366,7 @@ static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, /* Range (working backwards from next line): 0 < v2fc < 2^23 uW. * Must be < 2^42 to avoid overflowing the return value. */ - const u64 v2fc = (u64) c * (u64) v2f; + const u64 v2fc = (u64)c * (u64)v2f; /* Range: 0 < v2fc / 1000 < 2^13 mW */ return div_u64(v2fc, 1000); @@ -417,7 +395,7 @@ static u32 kbase_scale_static_power(const u32 c, const u32 voltage) * Range (working backwards from next line): 0 < v3c_big < 2^33 nW. * The result should be < 2^52 to avoid overflowing the return value. */ - const u64 v3c_big = (u64) c * (u64) v3; + const u64 v3c_big = (u64)c * (u64)v3; /* Range: 0 < v3c_big / 1000000 < 2^13 mW */ return div_u64(v3c_big, 1000000); @@ -440,8 +418,7 @@ static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->ipa_protection_mode_switched || - kbdev->ipa.force_fallback_model) + if (kbdev->ipa_protection_mode_switched || kbdev->ipa.force_fallback_model) model = kbdev->ipa.fallback_model; else model = kbdev->ipa.configured_model; @@ -460,8 +437,7 @@ static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev) return model; } -static u32 get_static_power_locked(struct kbase_device *kbdev, - struct kbase_ipa_model *model, +static u32 get_static_power_locked(struct kbase_device *kbdev, struct kbase_ipa_model *model, unsigned long voltage) { u32 power = 0; @@ -476,26 +452,22 @@ static u32 get_static_power_locked(struct kbase_device *kbdev, if (model->ops->get_static_coeff) { err = model->ops->get_static_coeff(model, &power_coeff); if (!err) - power = kbase_scale_static_power(power_coeff, - (u32) voltage); + power = kbase_scale_static_power(power_coeff, (u32)voltage); } return power; } #if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE -static unsigned long kbase_get_static_power(struct devfreq *df, - unsigned long voltage) +#if defined(CONFIG_MALI_PWRSOFT_765) || KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE +static unsigned long kbase_get_static_power(struct devfreq *df, unsigned long voltage) #else static unsigned long kbase_get_static_power(unsigned long voltage) #endif { struct kbase_ipa_model *model; u32 power = 0; -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE +#if defined(CONFIG_MALI_PWRSOFT_765) || KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE struct kbase_device *kbdev = dev_get_drvdata(&df->dev); #else struct kbase_device *kbdev = kbase_find_device(-1); @@ -511,8 +483,7 @@ static unsigned long kbase_get_static_power(unsigned long voltage) mutex_unlock(&kbdev->ipa.lock); -#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ - KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) +#if !(defined(CONFIG_MALI_PWRSOFT_765) || KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) kbase_release_device(kbdev); #endif @@ -537,16 +508,16 @@ static unsigned long kbase_get_static_power(unsigned long voltage) * needed. * Nominal voltage shall always be same as the real voltage for top-level. */ -static void opp_translate_freq_voltage(struct kbase_device *kbdev, - unsigned long nominal_freq, - unsigned long nominal_voltage, - unsigned long *freqs, +static void opp_translate_freq_voltage(struct kbase_device *kbdev, unsigned long nominal_freq, + unsigned long nominal_voltage, unsigned long *freqs, unsigned long *volts) { #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) /* An arbitrary voltage and frequency value can be chosen for testing * in no mali configuration which may not match with any OPP level. */ + CSTD_UNUSED(kbdev); + freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_freq; volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_voltage; @@ -556,8 +527,9 @@ static void opp_translate_freq_voltage(struct kbase_device *kbdev, u64 core_mask; unsigned int i; - kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, - freqs, volts); + CSTD_UNUSED(nominal_voltage); + + kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, freqs, volts); CSTD_UNUSED(core_mask); /* Convert micro volts to milli volts */ @@ -565,33 +537,27 @@ static void opp_translate_freq_voltage(struct kbase_device *kbdev, volts[i] /= 1000; if (kbdev->nr_clocks == 1) { - freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = - freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; - volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = - volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; + freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; + volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; } #endif } #if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE -static unsigned long kbase_get_dynamic_power(struct devfreq *df, - unsigned long freq, +#if defined(CONFIG_MALI_PWRSOFT_765) || KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE +static unsigned long kbase_get_dynamic_power(struct devfreq *df, unsigned long freq, unsigned long voltage) #else -static unsigned long kbase_get_dynamic_power(unsigned long freq, - unsigned long voltage) +static unsigned long kbase_get_dynamic_power(unsigned long freq, unsigned long voltage) #endif { struct kbase_ipa_model *model; - unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; - unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; - u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; + unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = { 0 }; + unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = { 0 }; + u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = { 0 }; u32 power = 0; int err = 0; -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE +#if defined(CONFIG_MALI_PWRSOFT_765) || KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE struct kbase_device *kbdev = dev_get_drvdata(&df->dev); #else struct kbase_device *kbdev = kbase_find_device(-1); @@ -609,29 +575,25 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq, if (!err) { opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts); - power = kbase_scale_dynamic_power( - power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], - freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], - volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); + power = kbase_scale_dynamic_power(power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], + freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], + volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); /* Here unlike kbase_get_real_power(), shader core frequency is * used for the scaling as simple power model is used to obtain * the value of dynamic coefficient (which is a fixed value * retrieved from the device tree). */ - power += kbase_scale_dynamic_power( - power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], - freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], - volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); + power += kbase_scale_dynamic_power(power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], + freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], + volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); } else - dev_err_ratelimited(kbdev->dev, - "Model %s returned error code %d\n", + dev_err_ratelimited(kbdev->dev, "Model %s returned error code %d\n", model->ops->name, err); mutex_unlock(&kbdev->ipa.lock); -#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ - KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) +#if !(defined(CONFIG_MALI_PWRSOFT_765) || KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) kbase_release_device(kbdev); #endif @@ -639,14 +601,13 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq, } #endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ -int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, - unsigned long freq, +int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, unsigned long freq, unsigned long voltage) { struct kbase_ipa_model *model; - unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; - unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; - u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; + unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = { 0 }; + unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = { 0 }; + u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = { 0 }; struct kbasep_pm_metrics diff; u64 total_time; bool skip_utilization_scaling = false; @@ -682,10 +643,9 @@ int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts); - *power = kbase_scale_dynamic_power( - power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], - freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], - volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); + *power = kbase_scale_dynamic_power(power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], + freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], + volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); if (power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]) { unsigned long freq = freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]; @@ -698,30 +658,25 @@ int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, if (model != kbdev->ipa.fallback_model) freq = freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; - *power += kbase_scale_dynamic_power( - power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], - freq, volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); + *power += kbase_scale_dynamic_power(power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], + freq, volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); } if (!skip_utilization_scaling) { /* time_busy / total_time cannot be >1, so assigning the 64-bit * result of div_u64 to *power cannot overflow. */ - total_time = diff.time_busy + (u64) diff.time_idle; - *power = div_u64(*power * (u64) diff.time_busy, - max(total_time, 1ull)); + total_time = diff.time_busy + (u64)diff.time_idle; + *power = div_u64(*power * (u64)diff.time_busy, max(total_time, 1ull)); } - *power += get_static_power_locked(kbdev, model, - volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); + *power += get_static_power_locked(kbdev, model, volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); return err; } KBASE_EXPORT_TEST_API(kbase_get_real_power_locked); -int kbase_get_real_power(struct devfreq *df, u32 *power, - unsigned long freq, - unsigned long voltage) +int kbase_get_real_power(struct devfreq *df, u32 *power, unsigned long freq, unsigned long voltage) { int ret; struct kbase_device *kbdev = dev_get_drvdata(&df->dev); @@ -742,8 +697,7 @@ struct devfreq_cooling_power kbase_ipa_power_model_ops = { .get_static_power = &kbase_get_static_power, .get_dynamic_power = &kbase_get_dynamic_power, #endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE +#if defined(CONFIG_MALI_PWRSOFT_765) || KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE .get_real_power = &kbase_get_real_power, #endif }; @@ -764,8 +718,7 @@ void kbase_ipa_reset_data(struct kbase_device *kbdev) struct kbasep_pm_metrics diff; struct kbase_ipa_model *model; - kbase_pm_get_dvfs_metrics( - kbdev, &kbdev->ipa.last_metrics, &diff); + kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff); model = get_current_model(kbdev); if (model != kbdev->ipa.fallback_model) diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h index c875ffb4990e..954cd69d0c7b 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -75,8 +75,7 @@ struct kbase_ipa_model { * * Return: 0 on success, or an error code */ -int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, - const char *name, s32 *addr, +int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, const char *name, s32 *addr, size_t num_elems, bool dt_required); /** @@ -92,8 +91,7 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, * * Return: 0 on success, or an error code */ -int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, - const char *name, char *addr, +int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, const char *name, char *addr, size_t size, bool dt_required); struct kbase_ipa_model_ops { @@ -204,8 +202,8 @@ const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device * * Return: Pointer to counter model's 'ops' structure, or NULL if the lookup * failed. */ -const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( - struct kbase_device *kbdev, const char *name); +const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(struct kbase_device *kbdev, + const char *name); /** * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID @@ -214,7 +212,7 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( * Return: The name of the appropriate counter-based model, or the name of the * fallback model if no counter model exists. */ -const char *kbase_ipa_model_name_from_id(u32 gpu_id); +const char *kbase_ipa_model_name_from_id(struct kbase_gpu_id_props *gpu_id); /** * kbase_ipa_counter_model_name_from_id - Find the best counter model for a @@ -224,7 +222,7 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id); * Return: The name of the appropriate counter-based model, or NULL if the * no counter model exists. */ -const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id); +const char *kbase_ipa_counter_model_name_from_id(struct kbase_gpu_id_props *gpu_id); /** * kbase_ipa_init_model - Initilaize the particular IPA model @@ -237,7 +235,7 @@ const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id); * Return: pointer to kbase_ipa_model on success, NULL on error */ struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, - const struct kbase_ipa_model_ops *ops); + const struct kbase_ipa_model_ops *ops); /** * kbase_ipa_term_model - Terminate the particular IPA model * @model: pointer to the IPA model object, already initialized @@ -267,16 +265,13 @@ void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev); * * Return: 0 on success, or an error code. */ -int kbase_get_real_power(struct devfreq *df, u32 *power, - unsigned long freq, - unsigned long voltage); +int kbase_get_real_power(struct devfreq *df, u32 *power, unsigned long freq, unsigned long voltage); /* Called by kbase_get_real_power() to invoke the power models. * Must be called with kbdev->ipa.lock held. * This function is only exposed for use by unit tests. */ -int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, - unsigned long freq, +int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, unsigned long freq, unsigned long voltage); extern struct devfreq_cooling_power kbase_ipa_power_model_ops; @@ -297,7 +292,8 @@ void kbase_ipa_reset_data(struct kbase_device *kbdev); #else /* !(defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) -{ } +{ +} #endif /* (defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c index a0963bbb2068..97ab8b8e39c1 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,7 +45,7 @@ static int param_int_get(void *data, u64 *val) struct kbase_ipa_model_param *param = data; mutex_lock(¶m->model->kbdev->ipa.lock); - *(s64 *) val = *param->addr.s32p; + *(s64 *)val = *param->addr.s32p; mutex_unlock(¶m->model->kbdev->ipa.lock); return 0; @@ -55,7 +55,7 @@ static int param_int_set(void *data, u64 val) { struct kbase_ipa_model_param *param = data; struct kbase_ipa_model *model = param->model; - s64 sval = (s64) val; + s64 sval = (s64)val; s32 old_val; int err = 0; @@ -75,8 +75,8 @@ static int param_int_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n"); -static ssize_t param_string_get(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) +static ssize_t param_string_get(struct file *file, char __user *user_buf, size_t count, + loff_t *ppos) { struct kbase_ipa_model_param *param = file->private_data; ssize_t ret; @@ -84,15 +84,14 @@ static ssize_t param_string_get(struct file *file, char __user *user_buf, mutex_lock(¶m->model->kbdev->ipa.lock); len = strnlen(param->addr.str, param->size - 1) + 1; - ret = simple_read_from_buffer(user_buf, count, ppos, - param->addr.str, len); + ret = simple_read_from_buffer(user_buf, count, ppos, param->addr.str, len); mutex_unlock(¶m->model->kbdev->ipa.lock); return ret; } -static ssize_t param_string_set(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) +static ssize_t param_string_set(struct file *file, const char __user *user_buf, size_t count, + loff_t *ppos) { struct kbase_ipa_model_param *param = file->private_data; struct kbase_ipa_model *model = param->model; @@ -101,6 +100,8 @@ static ssize_t param_string_set(struct file *file, const char __user *user_buf, size_t buf_size; int err; + CSTD_UNUSED(ppos); + mutex_lock(&model->kbdev->ipa.lock); if (count > param->size) { @@ -149,9 +150,8 @@ static const struct file_operations fops_string = { .llseek = default_llseek, }; -int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, - void *addr, size_t size, - enum kbase_ipa_model_param_type type) +int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, void *addr, + size_t size, enum kbase_ipa_model_param_type type) { struct kbase_ipa_model_param *param; @@ -213,10 +213,8 @@ static int force_fallback_model_set(void *data, u64 val) return 0; } -DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model, - force_fallback_model_get, - force_fallback_model_set, - "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model, force_fallback_model_get, force_fallback_model_set, + "%llu\n"); static int current_power_get(void *data, u64 *val) { @@ -228,9 +226,8 @@ static int current_power_get(void *data, u64 *val) /* The current model assumes that there's no more than one voltage * regulator currently available in the system. */ - kbase_get_real_power(df, &power, - kbdev->current_nominal_freq, - (kbdev->current_voltages[0] / 1000)); + kbase_get_real_power(df, &power, kbdev->current_nominal_freq, + (kbdev->current_voltages[0] / 1000)); kbase_pm_context_idle(kbdev); *val = power; @@ -246,21 +243,17 @@ static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model) lockdep_assert_held(&model->kbdev->ipa.lock); - dir = debugfs_create_dir(model->ops->name, - model->kbdev->mali_debugfs_directory); + dir = debugfs_create_dir(model->ops->name, model->kbdev->mali_debugfs_directory); if (IS_ERR_OR_NULL(dir)) { - dev_err(model->kbdev->dev, - "Couldn't create mali debugfs %s directory", + dev_err(model->kbdev->dev, "Couldn't create mali debugfs %s directory", model->ops->name); return; } list_for_each(it, &model->params) { struct kbase_ipa_model_param *param = - list_entry(it, - struct kbase_ipa_model_param, - link); + list_entry(it, struct kbase_ipa_model_param, link); const struct file_operations *fops = NULL; switch (param->type) { @@ -273,18 +266,15 @@ static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model) } if (unlikely(!fops)) { - dev_err(model->kbdev->dev, - "Type not set for %s parameter %s\n", + dev_err(model->kbdev->dev, "Type not set for %s parameter %s\n", model->ops->name, param->name); } else { - debugfs_create_file(param->name, 0644, - dir, param, fops); + debugfs_create_file(param->name, 0644, dir, param, fops); } } } -void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, - const char *name, s32 val) +void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, const char *name, s32 val) { struct kbase_ipa_model_param *param; @@ -295,8 +285,7 @@ void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, if (param->type == PARAM_TYPE_S32) { *param->addr.s32p = val; } else { - dev_err(model->kbdev->dev, - "Wrong type for %s parameter %s\n", + dev_err(model->kbdev->dev, "Wrong type for %s parameter %s\n", model->ops->name, param->name); } break; @@ -315,10 +304,10 @@ void kbase_ipa_debugfs_init(struct kbase_device *kbdev) kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model); kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model); - debugfs_create_file("ipa_current_power", 0444, - kbdev->mali_debugfs_directory, kbdev, ¤t_power); - debugfs_create_file("ipa_force_fallback_model", 0644, - kbdev->mali_debugfs_directory, kbdev, &force_fallback_model); + debugfs_create_file("ipa_current_power", 0444, kbdev->mali_debugfs_directory, kbdev, + ¤t_power); + debugfs_create_file("ipa_force_fallback_model", 0644, kbdev->mali_debugfs_directory, kbdev, + &force_fallback_model); mutex_unlock(&kbdev->ipa.lock); } diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h index f690367757c9..3b8abfd0f328 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,8 @@ enum kbase_ipa_model_param_type { #if IS_ENABLED(CONFIG_DEBUG_FS) void kbase_ipa_debugfs_init(struct kbase_device *kbdev); -int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, - void *addr, size_t size, - enum kbase_ipa_model_param_type type); +int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, void *addr, + size_t size, enum kbase_ipa_model_param_type type); void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model); /** @@ -46,25 +45,25 @@ void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model); * kernel space. Normally it is expected that parameter values will * instead be set via debugfs. */ -void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, - const char *name, s32 val); +void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, const char *name, s32 val); #else /* CONFIG_DEBUG_FS */ -static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, - const char *name, void *addr, - size_t size, +static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, + void *addr, size_t size, enum kbase_ipa_model_param_type type) { return 0; } static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) -{ } +{ +} -static inline void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, - const char *name, s32 val) -{ } +static inline void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, const char *name, + s32 val) +{ +} #endif /* CONFIG_DEBUG_FS */ #endif /* _KBASE_IPA_DEBUGFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c index 8557fe8723cf..71dbc27fc025 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,10 +52,10 @@ static int dummy_temp; -static int kbase_simple_power_model_get_dummy_temp( - struct thermal_zone_device *tz, - int *temp) +static int kbase_simple_power_model_get_dummy_temp(struct thermal_zone_device *tz, int *temp) { + CSTD_UNUSED(tz); + *temp = READ_ONCE(dummy_temp); return 0; } @@ -64,8 +64,7 @@ static int kbase_simple_power_model_get_dummy_temp( #ifdef thermal_zone_get_temp #undef thermal_zone_get_temp #endif -#define thermal_zone_get_temp(tz, temp) \ - kbase_simple_power_model_get_dummy_temp(tz, temp) +#define thermal_zone_get_temp(tz, temp) kbase_simple_power_model_get_dummy_temp(tz, temp) void kbase_simple_power_model_set_dummy_temp(int temp) { @@ -130,16 +129,16 @@ static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) * Deg^-N, so we need to multiply the last coefficient by 1000. * Range: -2^63 < res_big < 2^63 */ - const s64 res_big = ts[3] * t3 /* +/- 2^62 */ - + ts[2] * t2 /* +/- 2^55 */ - + ts[1] * t /* +/- 2^48 */ - + ts[0] * (s64)1000; /* +/- 2^41 */ + const s64 res_big = ts[3] * t3 /* +/- 2^62 */ + + ts[2] * t2 /* +/- 2^55 */ + + ts[1] * t /* +/- 2^48 */ + + ts[0] * (s64)1000; /* +/- 2^41 */ /* Range: -2^60 < res_unclamped < 2^60 */ s64 res_unclamped = div_s64(res_big, 1000); /* Clamp to range of 0x to 10x the static power */ - return clamp(res_unclamped, (s64) 0, (s64) 10000000); + return clamp(res_unclamped, (s64)0, (s64)10000000); } /* We can't call thermal_zone_get_temp() directly in model_static_coeff(), @@ -148,8 +147,7 @@ static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) */ static int poll_temperature(void *data) { - struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *) data; + struct kbase_ipa_model_simple_data *model_data = (struct kbase_ipa_model_simple_data *)data; int temp; set_freezable(); @@ -162,8 +160,9 @@ static int poll_temperature(void *data) ret = thermal_zone_get_temp(tz, &temp); if (ret) { - pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", - ret); + pr_warn_ratelimited( + "Error reading temperature for gpu thermal zone: %d\n", + ret); temp = FALLBACK_STATIC_TEMPERATURE; } } else { @@ -184,22 +183,21 @@ static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp) { u32 temp_scaling_factor; struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *) model->model_data; + (struct kbase_ipa_model_simple_data *)model->model_data; u64 coeff_big; int temp; temp = READ_ONCE(model_data->current_temperature); /* Range: 0 <= temp_scaling_factor < 2^24 */ - temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts, - temp); + temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts, temp); /* * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This * means static_coefficient must be in range * 0 <= static_coefficient < 2^28. */ - coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor; + coeff_big = (u64)model_data->static_coefficient * (u64)temp_scaling_factor; *coeffp = div_u64(coeff_big, 1000000); return 0; @@ -208,7 +206,7 @@ static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp) static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) { struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *) model->model_data; + (struct kbase_ipa_model_simple_data *)model->model_data; #if MALI_USE_CSF /* On CSF GPUs, the dynamic power for top-level and shader cores is @@ -220,8 +218,7 @@ static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) */ coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = model_data->dynamic_coefficient / TOP_LEVEL_DYN_COEFF_SCALER; - coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = - model_data->dynamic_coefficient; + coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = model_data->dynamic_coefficient; #else *coeffp = model_data->dynamic_coefficient; #endif @@ -233,7 +230,7 @@ static int add_params(struct kbase_ipa_model *model) { int err = 0; struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *)model->model_data; + (struct kbase_ipa_model_simple_data *)model->model_data; err = kbase_ipa_model_add_param_s32(model, "static-coefficient", (s32 *)&model_data->static_coefficient, 1, true); @@ -245,21 +242,18 @@ static int add_params(struct kbase_ipa_model *model) if (err) goto end; - err = kbase_ipa_model_add_param_s32(model, "ts", - model_data->ts, 4, true); + err = kbase_ipa_model_add_param_s32(model, "ts", model_data->ts, 4, true); if (err) goto end; - err = kbase_ipa_model_add_param_string(model, "thermal-zone", - model_data->tz_name, + err = kbase_ipa_model_add_param_string(model, "thermal-zone", model_data->tz_name, sizeof(model_data->tz_name), true); if (err) goto end; model_data->temperature_poll_interval_ms = 200; err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms", - &model_data->temperature_poll_interval_ms, - 1, false); + &model_data->temperature_poll_interval_ms, 1, false); end: return err; @@ -270,16 +264,14 @@ static int kbase_simple_power_model_init(struct kbase_ipa_model *model) int err; struct kbase_ipa_model_simple_data *model_data; - model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data), - GFP_KERNEL); + model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data), GFP_KERNEL); if (!model_data) return -ENOMEM; - model->model_data = (void *) model_data; + model->model_data = (void *)model_data; model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE; - model_data->poll_temperature_thread = kthread_run(poll_temperature, - (void *) model_data, + model_data->poll_temperature_thread = kthread_run(poll_temperature, (void *)model_data, "mali-simple-power-model-temp-poll"); if (IS_ERR(model_data->poll_temperature_thread)) { err = PTR_ERR(model_data->poll_temperature_thread); @@ -300,7 +292,7 @@ static int kbase_simple_power_model_init(struct kbase_ipa_model *model) static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model) { struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *)model->model_data; + (struct kbase_ipa_model_simple_data *)model->model_data; struct thermal_zone_device *tz; lockdep_assert_held(&model->kbdev->ipa.lock); @@ -352,7 +344,7 @@ static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model) static void kbase_simple_power_model_term(struct kbase_ipa_model *model) { struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *)model->model_data; + (struct kbase_ipa_model_simple_data *)model->model_data; kthread_stop(model_data->poll_temperature_thread); @@ -360,11 +352,11 @@ static void kbase_simple_power_model_term(struct kbase_ipa_model *model) } struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = { - .name = "mali-simple-power-model", - .init = &kbase_simple_power_model_init, - .recalculate = &kbase_simple_power_model_recalculate, - .term = &kbase_simple_power_model_term, - .get_dynamic_coeff = &model_dynamic_coeff, - .get_static_coeff = &model_static_coeff, + .name = "mali-simple-power-model", + .init = &kbase_simple_power_model_init, + .recalculate = &kbase_simple_power_model_recalculate, + .term = &kbase_simple_power_model_term, + .get_dynamic_coeff = &model_dynamic_coeff, + .get_static_coeff = &model_static_coeff, }; KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops); diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h index debc3ad25fd7..ddd58eb64b82 100644 --- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h @@ -39,7 +39,7 @@ * the GPU actually being reset to give other contexts time for their jobs * to be soft-stopped and removed from the hardware before resetting. */ -#define ZAP_TIMEOUT 1000 +#define ZAP_TIMEOUT 1000 /* * Prevent soft-stops from occurring in scheduling situations @@ -70,29 +70,29 @@ #define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 /* Atom has been previously soft-stopped */ -#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED (1<<1) +#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED (1 << 1) /* Atom has been previously retried to execute */ -#define KBASE_KATOM_FLAGS_RERUN (1<<2) +#define KBASE_KATOM_FLAGS_RERUN (1 << 2) /* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps * to disambiguate short-running job chains during soft/hard stopping of jobs */ -#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) +#define KBASE_KATOM_FLAGS_JOBCHAIN (1 << 3) /* Atom has been previously hard-stopped. */ -#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) +#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1 << 4) /* Atom has caused us to enter disjoint state */ -#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) +#define KBASE_KATOM_FLAG_IN_DISJOINT (1 << 5) /* Atom blocked on cross-slot dependency */ -#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) +#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1 << 7) /* Atom has fail dependency on cross-slot dependency */ -#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) +#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1 << 8) /* Atom is currently in the list of atoms blocked on cross-slot dependencies */ -#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) +#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1 << 9) /* Atom requires GPU to be in protected mode */ -#define KBASE_KATOM_FLAG_PROTECTED (1<<11) +#define KBASE_KATOM_FLAG_PROTECTED (1 << 11) /* Atom has been stored in runnable_tree */ -#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) +#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1 << 12) /* Atom is waiting for L2 caches to power up in order to enter protected mode */ -#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) +#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1 << 13) /* SW related flags about types of JS_COMMAND action * NOTE: These must be masked off by JS_COMMAND_MASK @@ -102,7 +102,7 @@ #define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 /* Bitmask of all SW related flags */ -#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) +#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) #if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) #error "JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK." \ @@ -112,8 +112,7 @@ /* Soft-stop command that causes a Disjoint event. This of course isn't * entirely masked off by JS_COMMAND_MASK */ -#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ - (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) +#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) #define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT @@ -132,15 +131,17 @@ * @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT * to be updated on HW side so a Job Slot is * considered free. - * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in - * the enum. + * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. + * @KBASE_DEFAULT_TIMEOUT: Fallthrough in case an invalid timeout is + * passed. */ enum kbase_timeout_selector { MMU_AS_INACTIVE_WAIT_TIMEOUT, JM_DEFAULT_JS_FREE_TIMEOUT, /* Must be the last in the enum */ - KBASE_TIMEOUT_SELECTOR_COUNT + KBASE_TIMEOUT_SELECTOR_COUNT, + KBASE_DEFAULT_TIMEOUT = JM_DEFAULT_JS_FREE_TIMEOUT }; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -163,7 +164,6 @@ enum kbase_timeout_selector { * for the faulty atom. */ struct base_job_fault_event { - u32 event_code; struct kbase_jd_atom *katom; struct work_struct job_fault_work; @@ -203,8 +203,7 @@ kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) * * Return: the type of dependency there is on the dependee atom. */ -static inline u8 kbase_jd_katom_dep_type( - const struct kbase_jd_atom_dependency *dep) +static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) { return dep->dep_type; } @@ -216,9 +215,8 @@ static inline u8 kbase_jd_katom_dep_type( * @a: pointer to the dependee atom. * @type: type of dependency there is on the dependee atom. */ -static inline void kbase_jd_katom_dep_set( - const struct kbase_jd_atom_dependency *const_dep, - struct kbase_jd_atom *a, u8 type) +static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep, + struct kbase_jd_atom *a, u8 type) { struct kbase_jd_atom_dependency *dep; @@ -233,8 +231,7 @@ static inline void kbase_jd_katom_dep_set( * * @const_dep: pointer to the dependency info structure to be setup. */ -static inline void kbase_jd_katom_dep_clear( - const struct kbase_jd_atom_dependency *const_dep) +static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep) { struct kbase_jd_atom_dependency *dep; @@ -625,8 +622,7 @@ struct kbase_jd_atom { u32 age; }; -static inline bool kbase_jd_katom_is_protected( - const struct kbase_jd_atom *katom) +static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom) { return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); } @@ -859,10 +855,6 @@ struct jsctx_queue { * @pf_data: Data relating to Page fault. * @bf_data: Data relating to Bus fault. * @current_setup: Stores the MMU configuration for this address space. - * @is_unresponsive: Flag to indicate MMU is not responding. - * Set if a MMU command isn't completed within - * &kbase_device:mmu_as_inactive_wait_time_ms. - * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. */ struct kbase_as { int number; @@ -872,7 +864,6 @@ struct kbase_as { struct kbase_fault pf_data; struct kbase_fault bf_data; struct kbase_mmu_setup current_setup; - bool is_unresponsive; }; #endif /* _KBASE_JM_DEFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h index 53819caaf616..8955b0473155 100644 --- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,7 +41,7 @@ * * Return: 0 on success, error code otherwise. */ -int kbasep_js_devdata_init(struct kbase_device * const kbdev); +int kbasep_js_devdata_init(struct kbase_device *const kbdev); /** * kbasep_js_devdata_halt - Halt the Job Scheduler. @@ -135,8 +135,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx); static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, unsigned int js, int sched_prio) { - struct kbase_jsctx_slot_tracking *slot_tracking = - &kctx->slot_tracking[js]; + struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); WARN(!slot_tracking->atoms_pulled_pri[sched_prio], @@ -144,8 +143,8 @@ static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, js, sched_prio); slot_tracking->blocked |= ((kbase_js_prio_bitmap_t)1) << sched_prio; - KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_BLOCKED, kctx, - NULL, 0, js, (unsigned int)sched_prio); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_BLOCKED, kctx, NULL, 0, js, + (unsigned int)sched_prio); } /* kbase_jsctx_atoms_pulled - Return number of atoms pulled on a context @@ -239,8 +238,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. * */ -void kbasep_js_remove_job(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_jd_atom *atom); +void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *atom); /** * kbasep_js_remove_cancelled_job - Completely remove a job chain from the @@ -271,9 +270,8 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, * should call kbase_js_sched_all() to try to run more jobs and * false otherwise. */ -bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_jd_atom *katom); +bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *katom); /** * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a @@ -303,8 +301,8 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be * obtained internally) */ -void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, bool has_pm_ref); +void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, + bool has_pm_ref); /** * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy, @@ -358,8 +356,7 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, * obtained internally) * */ -void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx); +void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); /** * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of @@ -383,9 +380,8 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, * kbasep_js_runpool_release_ctx() */ void kbasep_js_runpool_release_ctx_and_katom_retained_state( - struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbasep_js_atom_retained_state *katom_retained_state); + struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state); /** * kbasep_js_runpool_release_ctx_nolock - Variant of kbase_js_runpool_release_ctx() @@ -398,8 +394,7 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state( * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not * attempt to schedule new contexts. */ -void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx); +void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx); /** * kbasep_js_schedule_privileged_ctx - Schedule in a privileged context @@ -425,8 +420,7 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, * be used internally. * */ -void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx); +void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); /** * kbasep_js_release_privileged_ctx - Release a privileged context, @@ -445,8 +439,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, * obtained internally) * */ -void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx); +void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); /** * kbase_js_try_run_jobs - Try to submit the next job on each slot @@ -506,8 +499,7 @@ void kbasep_js_resume(struct kbase_device *kbdev); * * Return: true if the context requires to be enqueued, otherwise false. */ -bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom); +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_atom *katom); /** * kbase_js_pull - Pull an atom from a context in the job scheduler for @@ -557,8 +549,7 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); * * Return: true if the context is now idle (no jobs pulled) false otherwise. */ -bool kbase_js_complete_atom_wq(struct kbase_context *kctx, - struct kbase_jd_atom *katom); +bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom); /** * kbase_js_complete_atom - Complete an atom. @@ -574,8 +565,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * Return: a atom that has now been unblocked and can now be run, or NULL * if none */ -struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, - ktime_t *end_timestamp); +struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp); /** * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot @@ -631,8 +621,7 @@ void kbase_js_zap_context(struct kbase_context *kctx); * * Return: true if atom is valid false otherwise. */ -bool kbase_js_is_atom_valid(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); +bool kbase_js_is_atom_valid(struct kbase_device *kbdev, struct kbase_jd_atom *katom); /** * kbase_js_set_timeouts - update all JS timeouts with user specified data @@ -686,9 +675,8 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx); * * Return: true if the context is allowed to submit jobs, false otherwise. */ -static inline bool kbasep_js_is_submit_allowed( - struct kbasep_js_device_data *js_devdata, - struct kbase_context *kctx) +static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) { u16 test_bit; bool is_allowed; @@ -699,11 +687,11 @@ static inline bool kbasep_js_is_submit_allowed( kctx->as_nr, atomic_read(&kctx->flags))) return false; - test_bit = (u16) (1u << kctx->as_nr); + test_bit = (u16)(1u << kctx->as_nr); - is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); + is_allowed = (bool)(js_devdata->runpool_irq.submit_allowed & test_bit); dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %pK (as=%d)", - is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); + is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); return is_allowed; } @@ -717,9 +705,8 @@ static inline bool kbasep_js_is_submit_allowed( * * The caller must hold hwaccess_lock. */ -static inline void kbasep_js_set_submit_allowed( - struct kbasep_js_device_data *js_devdata, - struct kbase_context *kctx) +static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) { u16 set_bit; @@ -729,10 +716,9 @@ static inline void kbasep_js_set_submit_allowed( kctx->as_nr, atomic_read(&kctx->flags))) return; - set_bit = (u16) (1u << kctx->as_nr); + set_bit = (u16)(1u << kctx->as_nr); - dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %pK (as=%d)", - kctx, kctx->as_nr); + dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %pK (as=%d)", kctx, kctx->as_nr); js_devdata->runpool_irq.submit_allowed |= set_bit; } @@ -748,9 +734,8 @@ static inline void kbasep_js_set_submit_allowed( * * The caller must hold hwaccess_lock. */ -static inline void kbasep_js_clear_submit_allowed( - struct kbasep_js_device_data *js_devdata, - struct kbase_context *kctx) +static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) { u16 clear_bit; u16 clear_mask; @@ -761,11 +746,10 @@ static inline void kbasep_js_clear_submit_allowed( kctx->as_nr, atomic_read(&kctx->flags))) return; - clear_bit = (u16) (1u << kctx->as_nr); + clear_bit = (u16)(1u << kctx->as_nr); clear_mask = ~clear_bit; - dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %pK (as=%d)", - kctx, kctx->as_nr); + dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %pK (as=%d)", kctx, kctx->as_nr); js_devdata->runpool_irq.submit_allowed &= clear_mask; } @@ -780,12 +764,11 @@ static inline void kbasep_js_clear_submit_allowed( * atom-related work to be done on releasing with * kbasep_js_runpool_release_ctx_and_katom_retained_state() */ -static inline void kbasep_js_atom_retained_state_init_invalid( - struct kbasep_js_atom_retained_state *retained_state) +static inline void +kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state) { retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; - retained_state->core_req = - KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; + retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; } /** @@ -796,9 +779,9 @@ static inline void kbasep_js_atom_retained_state_init_invalid( * Copy atom state that can be made available after kbase_jd_done_nolock() is called * on that atom. */ -static inline void kbasep_js_atom_retained_state_copy( - struct kbasep_js_atom_retained_state *retained_state, - const struct kbase_jd_atom *katom) +static inline void +kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, + const struct kbase_jd_atom *katom) { retained_state->event_code = katom->event_code; retained_state->core_req = katom->core_req; @@ -822,15 +805,12 @@ static inline void kbasep_js_atom_retained_state_copy( * * Return: false if the atom has not finished, true otherwise. */ -static inline bool kbasep_js_has_atom_finished( - const struct kbasep_js_atom_retained_state *katom_retained_state) +static inline bool +kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state) { - return (bool) (katom_retained_state->event_code != - BASE_JD_EVENT_STOPPED && - katom_retained_state->event_code != - BASE_JD_EVENT_REMOVED_FROM_NEXT && - katom_retained_state->event_code != - BASE_JD_EVENT_END_RP_DONE); + return (bool)(katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && + katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT && + katom_retained_state->event_code != BASE_JD_EVENT_END_RP_DONE); } /** @@ -847,8 +827,8 @@ static inline bool kbasep_js_has_atom_finished( static inline bool kbasep_js_atom_retained_state_is_valid( const struct kbasep_js_atom_retained_state *katom_retained_state) { - return (bool) (katom_retained_state->core_req != - KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); + return (bool)(katom_retained_state->core_req != + KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); } /** @@ -860,9 +840,8 @@ static inline bool kbasep_js_atom_retained_state_is_valid( * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. * * The caller must hold the kbasep_js_device_data::runpool_mutex */ -static inline void kbase_js_runpool_inc_context_count( - struct kbase_device *kbdev, - struct kbase_context *kctx) +static inline void kbase_js_runpool_inc_context_count(struct kbase_device *kbdev, + struct kbase_context *kctx) { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; @@ -893,9 +872,8 @@ static inline void kbase_js_runpool_inc_context_count( * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. * * The caller must hold the kbasep_js_device_data::runpool_mutex */ -static inline void kbase_js_runpool_dec_context_count( - struct kbase_device *kbdev, - struct kbase_context *kctx) +static inline void kbase_js_runpool_dec_context_count(struct kbase_device *kbdev, + struct kbase_context *kctx) { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; @@ -931,11 +909,9 @@ static inline void kbase_js_sched_all(struct kbase_device *kbdev) kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); } -extern const int -kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; +extern const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; -extern const base_jd_prio -kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; +extern const base_jd_prio kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; /** * kbasep_js_atom_prio_to_sched_prio - Convert atom priority (base_jd_prio) @@ -1021,7 +997,7 @@ base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio pr * This is for making consistent decisions about the ordering of atoms when we * need to do pre-emption on a slot, which includes stopping existing atoms * when a new atom is ready to run, and also which other atoms to remove from - * the slot when the atom in JSn_HEAD is being pre-empted. + * the slot when the atom in JS_HEAD is being pre-empted. * * This only handles @katom_a and @katom_b being for the same job slot, as * pre-emption only operates within a slot. @@ -1036,9 +1012,8 @@ base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio pr * A false return value does not distinguish between "no ordering relation" and * "@katom_a should run after @katom_b". */ -bool kbase_js_atom_runs_before(struct kbase_device *kbdev, - const struct kbase_jd_atom *katom_a, +bool kbase_js_atom_runs_before(struct kbase_device *kbdev, const struct kbase_jd_atom *katom_a, const struct kbase_jd_atom *katom_b, const kbase_atom_ordering_flag_t order_flags); -#endif /* _KBASE_JM_JS_H_ */ +#endif /* _KBASE_JM_JS_H_ */ diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h index 465cf7ec05bd..c76f60d66d4c 100644 --- a/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h @@ -30,15 +30,13 @@ struct kbase_device; struct kbase_jd_atom; - typedef u32 kbase_context_flags; /* * typedef kbasep_js_ctx_job_cb - Callback function run on all of a context's * jobs registered with the Job Scheduler */ -typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); +typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev, struct kbase_jd_atom *katom); /* * @brief Maximum number of jobs that can be submitted to a job slot whilst @@ -302,10 +300,8 @@ struct kbasep_js_device_data { s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; } runpool_irq; struct semaphore schedule_sem; - struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS] - [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS] - [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; s8 nr_user_contexts_running; s8 nr_all_contexts_running; base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; @@ -325,7 +321,7 @@ struct kbasep_js_device_data { #ifdef CONFIG_MALI_BIFROST_DEBUG bool softstop_always; -#endif /* CONFIG_MALI_BIFROST_DEBUG */ +#endif /* CONFIG_MALI_BIFROST_DEBUG */ int init_status; u32 nr_contexts_pullable; atomic_t nr_contexts_runnable; @@ -342,6 +338,30 @@ struct kbasep_js_device_data { * * the kbasep_js_kctx_info::runpool substructure */ struct mutex runpool_mutex; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @gpu_metrics_timer: High-resolution timer used to periodically emit the GPU metrics + * tracepoints for applications that are using the GPU. The timer is + * needed for the long duration handling so that the length of work + * period is within the allowed limit. + */ + struct hrtimer gpu_metrics_timer; + + /** + * @gpu_metrics_timer_needed: Flag to indicate if the @gpu_metrics_timer is needed. + * The timer won't be started after the expiry if the flag + * isn't set. + */ + bool gpu_metrics_timer_needed; + + /** + * @gpu_metrics_timer_running: Flag to indicate if the @gpu_metrics_timer is running. + * The flag is set to false when the timer is cancelled or + * is not restarted after the expiry. + */ + bool gpu_metrics_timer_running; +#endif }; /** @@ -403,7 +423,6 @@ struct kbasep_js_atom_retained_state { int sched_priority; /* Core group atom was executed on */ u32 device_nr; - }; /* diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h index 11aedef80109..1f32fc9dd553 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,7 @@ /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, * please update base/tools/hwconfig_generator/hwc_{issues,features}.py - * For more information see base/tools/hwconfig_generator/README + * For more information see base/tools/docs/hwconfig_generator.md */ #ifndef _BASE_HWCONFIG_FEATURES_H_ @@ -35,12 +35,13 @@ enum base_hw_feature { BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_ASN_HASH, + BASE_HW_FEATURE_L2_SLICE_HASH, BASE_HW_FEATURE_GPU_SLEEP, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_PBHA_HWU, BASE_HW_FEATURE_LARGE_PAGE_ALLOC, + BASE_HW_FEATURE_THREAD_TLS_ALLOC, BASE_HW_FEATURE_END }; @@ -49,67 +50,47 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_gener }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tMIx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tHEx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tSIx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tDVx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tNOx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_TLS_HASHING, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[] = { - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_TLS_HASHING, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_CORE_FEATURES, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CORE_FEATURES, + BASE_HW_FEATURE_THREAD_TLS_ALLOC, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTRx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tNAx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tBEx[] = { @@ -133,40 +114,27 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[ }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_CORE_FEATURES, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_CORE_FEATURES, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = { - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_ASN_HASH, - BASE_HW_FEATURE_GPU_SLEEP, - BASE_HW_FEATURE_CORE_FEATURES, - BASE_HW_FEATURE_END + BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_L2_SLICE_HASH, BASE_HW_FEATURE_GPU_SLEEP, + BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTIx[] = { @@ -174,12 +142,20 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTIx[ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_ASN_HASH, + BASE_HW_FEATURE_L2_SLICE_HASH, BASE_HW_FEATURE_GPU_SLEEP, BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_PBHA_HWU, BASE_HW_FEATURE_END }; +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tKRx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_L2_SLICE_HASH, BASE_HW_FEATURE_GPU_SLEEP, + BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_PBHA_HWU, + BASE_HW_FEATURE_LARGE_PAGE_ALLOC, BASE_HW_FEATURE_END +}; + #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h index 0fbdec0bb0b6..a61861fcb677 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h @@ -21,7 +21,7 @@ /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, * please update base/tools/hwconfig_generator/hwc_{issues,features}.py - * For more information see base/tools/hwconfig_generator/README + * For more information see base/tools/docs/hwconfig_generator.md */ #ifndef _BASE_HWCONFIG_ISSUES_H_ @@ -66,768 +66,430 @@ enum base_hw_issue { BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TITANHW_2952, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_generic[] = { - BASE_HW_ISSUE_END -}; +__attribute__(( + unused)) static const enum base_hw_issue base_hw_issues_generic[] = { BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_TMIX_8463, BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_TMIX_8463, BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_TMIX_8463, BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMIx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHEx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TSIX_1792, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TSIX_1792, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TSIX_1792, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TSIX_1792, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSIx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDVx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TNOX_1194, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNOx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TNOX_1194, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TGOX_R1_1234, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TGOX_R1_1234, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGOx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTRx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_3076, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_GPU2017_1336, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNAx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBEx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TTRX_3485, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3212, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, + BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3212, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, + BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TURSEHW_1997, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_1997, + BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TURSEHW_1997, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_1997, + BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_GPU2019_3878, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2019_3901, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_END + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2922, + BASE_HW_ISSUE_TITANHW_2952, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TURSEHW_2716, - BASE_HW_ISSUE_GPU2021PRO_290, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_TITANHW_2679, - BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2922, + BASE_HW_ISSUE_TITANHW_2952, BASE_HW_ISSUE_END +}; + +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tKRx_r0p0[] = { + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_END +}; + +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tKRx[] = { + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_END }; diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h index 542e8f63fb5b..29c032adf15f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase.h +++ b/drivers/gpu/arm/bifrost/mali_kbase.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,6 +47,7 @@ #include #include +#include /* * Include mali_kbase_defs.h first as this provides types needed by other local @@ -56,9 +57,7 @@ #include "debug/mali_kbase_debug_ktrace.h" #include "context/mali_kbase_context.h" -#include "mali_kbase_strings.h" #include "mali_kbase_mem_lowlevel.h" -#include "mali_kbase_utility.h" #include "mali_kbase_mem.h" #include "mmu/mali_kbase_mmu.h" #include "mali_kbase_gpu_memory_debugfs.h" @@ -82,6 +81,9 @@ #include "mali_linux_trace.h" +#define KBASE_DRV_NAME "mali" +#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" + #if MALI_USE_CSF #include "csf/mali_kbase_csf.h" @@ -108,6 +110,14 @@ struct kbase_device *kbase_device_alloc(void); * been setup before calling kbase_device_init */ +/** + * kbase_device_misc_init() - Miscellaneous initialization for kbase device + * @kbdev: Pointer to the kbase device + * + * This function must be called only when a kbase device is initialized. + * + * Return: 0 on success + */ int kbase_device_misc_init(struct kbase_device *kbdev); void kbase_device_misc_term(struct kbase_device *kbdev); @@ -162,17 +172,23 @@ void kbase_release_device(struct kbase_device *kbdev); * Return: if successful, address of the unmapped area aligned as required; * error code (negative) in case of failure; */ -unsigned long kbase_context_get_unmapped_area(struct kbase_context *kctx, - const unsigned long addr, const unsigned long len, - const unsigned long pgoff, const unsigned long flags); - +unsigned long kbase_context_get_unmapped_area(struct kbase_context *kctx, const unsigned long addr, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags); int assign_irqs(struct kbase_device *kbdev); int kbase_sysfs_init(struct kbase_device *kbdev); void kbase_sysfs_term(struct kbase_device *kbdev); - +/** + * kbase_protected_mode_init() - Initialize kbase device for protected mode. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function must be called only when a kbase device is initialized. + * + * Return: 0 on success. + */ int kbase_protected_mode_init(struct kbase_device *kbdev); void kbase_protected_mode_term(struct kbase_device *kbdev); @@ -194,7 +210,6 @@ int kbase_device_pm_init(struct kbase_device *kbdev); */ void kbase_device_pm_term(struct kbase_device *kbdev); - int power_control_init(struct kbase_device *kbdev); void power_control_term(struct kbase_device *kbdev); @@ -207,7 +222,9 @@ static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) return 0; } -static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } +static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) +{ +} #endif /* CONFIG_DEBUG_FS */ int registers_map(struct kbase_device *kbdev); @@ -217,6 +234,14 @@ int kbase_device_coherency_init(struct kbase_device *kbdev); #if !MALI_USE_CSF +/** + * kbase_jd_init() - Initialize kbase context for job dispatcher. + * @kctx: Pointer to the kbase context to be initialized. + * + * This function must be called only when a kbase context is instantiated. + * + * Return: 0 on success. + */ int kbase_jd_init(struct kbase_context *kctx); void kbase_jd_exit(struct kbase_context *kctx); @@ -231,9 +256,8 @@ void kbase_jd_exit(struct kbase_context *kctx); * * Return: 0 on success or error code */ -int kbase_jd_submit(struct kbase_context *kctx, - void __user *user_addr, u32 nr_atoms, u32 stride, - bool uk6_atom); +int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_atoms, u32 stride, + bool uk6_atom); /** * kbase_jd_done_worker - Handle a job completion @@ -255,7 +279,7 @@ int kbase_jd_submit(struct kbase_context *kctx, void kbase_jd_done_worker(struct work_struct *data); void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, - kbasep_js_atom_done_code done_code); + kbasep_js_atom_done_code done_code); void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); void kbase_jd_zap_context(struct kbase_context *kctx); @@ -306,7 +330,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done); * The hwaccess_lock must be held when calling this function. */ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, - struct kbase_jd_atom *katom); + struct kbase_jd_atom *katom); /** * kbase_job_slot_softstop_start_rp() - Soft-stop the atom at the start @@ -321,8 +345,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, * * Return: 0 if successful, otherwise a negative error code. */ -int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, - struct kbase_va_region *reg); +int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, struct kbase_va_region *reg); /** * kbase_job_slot_softstop - Soft-stop the specified job slot @@ -337,7 +360,7 @@ int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, * Where possible any job in the next register is evicted before the soft-stop. */ void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom); + struct kbase_jd_atom *target_katom); void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, struct kbase_jd_atom *target_katom, u32 sw_flags); @@ -363,7 +386,7 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js * state when the soft/hard-stop action is complete */ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); /** * kbase_job_check_leave_disjoint - potentially leave disjoint state @@ -373,16 +396,15 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, * Work out whether to leave disjoint state when finishing an atom that was * originated by kbase_job_check_enter_disjoint(). */ -void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, - struct kbase_jd_atom *target_katom); +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, struct kbase_jd_atom *target_katom); #endif /* !MALI_USE_CSF */ -void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); +void kbase_event_post(struct kbase_context *kctx, struct kbase_jd_atom *event); #if !MALI_USE_CSF -int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); +int kbase_event_dequeue(struct kbase_context *kctx, struct base_jd_event_v2 *uevent); #endif /* !MALI_USE_CSF */ -int kbase_event_pending(struct kbase_context *ctx); +int kbase_event_pending(struct kbase_context *kctx); int kbase_event_init(struct kbase_context *kctx); void kbase_event_close(struct kbase_context *kctx); void kbase_event_cleanup(struct kbase_context *kctx); @@ -397,8 +419,7 @@ void kbase_event_wakeup(struct kbase_context *kctx); * which is to be validated. * Return: 0 if jit allocation is valid; negative error code otherwise */ -int kbasep_jit_alloc_validate(struct kbase_context *kctx, - struct base_jit_alloc_info *info); +int kbasep_jit_alloc_validate(struct kbase_context *kctx, struct base_jit_alloc_info *info); /** * kbase_jit_retry_pending_alloc() - Retry blocked just-in-time memory @@ -416,14 +437,13 @@ void kbase_jit_retry_pending_alloc(struct kbase_context *kctx); * @buffer: Pointer to the memory location allocated for the object * of the type struct @kbase_debug_copy_buffer. */ -static inline void kbase_free_user_buffer( - struct kbase_debug_copy_buffer *buffer) +static inline void kbase_free_user_buffer(struct kbase_debug_copy_buffer *buffer) { struct page **pages = buffer->extres_pages; - int nr_pages = buffer->nr_extres_pages; + uint nr_pages = buffer->nr_extres_pages; if (pages) { - int i; + uint i; for (i = 0; i < nr_pages; i++) { struct page *pg = pages[i]; @@ -445,9 +465,7 @@ void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); #if IS_ENABLED(CONFIG_SYNC_FILE) void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom); #endif -int kbase_soft_event_update(struct kbase_context *kctx, - u64 event, - unsigned char new_status); +int kbase_soft_event_update(struct kbase_context *kctx, u64 event, unsigned char new_status); void kbasep_soft_job_timeout_worker(struct timer_list *timer); void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); @@ -461,9 +479,9 @@ void kbasep_as_do_poke(struct work_struct *work); * * @kbdev: The kbase device structure for the device * - * The caller should ensure that either kbdev->pm.active_count_lock is held, or - * a dmb was executed recently (to ensure the value is most - * up-to-date). However, without a lock the value could change afterwards. + * The caller should ensure that either kbase_device::kbase_pm_device_data::lock is held, + * or a dmb was executed recently (to ensure the value is most up-to-date). + * However, without a lock the value could change afterwards. * * Return: * * false if a suspend is not in progress @@ -474,6 +492,22 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) return kbdev->pm.suspending; } +/** + * kbase_pm_is_resuming - Check whether System resume of GPU device is in progress. + * + * @kbdev: The kbase device structure for the device + * + * The caller should ensure that either kbase_device::kbase_pm_device_data::lock is held, + * or a dmb was executed recently (to ensure the value is most up-to-date). + * However, without a lock the value could change afterwards. + * + * Return: true if System resume is in progress, otherwise false. + */ +static inline bool kbase_pm_is_resuming(struct kbase_device *kbdev) +{ + return kbdev->pm.resuming; +} + #ifdef CONFIG_MALI_ARBITER_SUPPORT /* * Check whether a gpu lost is in progress @@ -501,8 +535,7 @@ static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev) * state. Once in gpu lost state new GPU jobs will no longer be * scheduled. */ -static inline void kbase_pm_set_gpu_lost(struct kbase_device *kbdev, - bool gpu_lost) +static inline void kbase_pm_set_gpu_lost(struct kbase_device *kbdev, bool gpu_lost) { const int new_val = (gpu_lost ? 1 : 0); const int cur_val = atomic_xchg(&kbdev->pm.gpu_lost, new_val); @@ -527,9 +560,11 @@ static inline bool kbase_pm_is_active(struct kbase_device *kbdev) } /** - * kbase_pm_lowest_gpu_freq_init() - Find the lowest frequency that the GPU can - * run as using the device tree, and save this - * within kbdev. + * kbase_pm_gpu_freq_init() - Find the lowest frequency that the GPU can + * run as using the device tree, then query the + * GPU properties to find out the highest GPU + * frequency and store both of them within the + * @kbase_device. * @kbdev: Pointer to kbase device. * * This function could be called from kbase_clk_rate_trace_manager_init, @@ -537,9 +572,9 @@ static inline bool kbase_pm_is_active(struct kbase_device *kbdev) * dev_pm_opp_of_add_table() has been called to initialize the OPP table, * which occurs in power_control_init(). * - * Return: 0 in any case. + * Return: 0 on success, negative error code on failure. */ -int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev); +int kbase_pm_gpu_freq_init(struct kbase_device *kbdev); /** * kbase_pm_metrics_start - Start the utilization metrics timer @@ -608,8 +643,7 @@ int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev); * * Return: the atom's ID. */ -static inline int kbase_jd_atom_id(struct kbase_context *kctx, - const struct kbase_jd_atom *katom) +static inline int kbase_jd_atom_id(struct kbase_context *kctx, const struct kbase_jd_atom *katom) { int result; @@ -629,8 +663,7 @@ static inline int kbase_jd_atom_id(struct kbase_context *kctx, * * Return: Pointer to struct kbase_jd_atom associated with the supplied ID */ -static inline struct kbase_jd_atom *kbase_jd_atom_from_id( - struct kbase_context *kctx, int id) +static inline struct kbase_jd_atom *kbase_jd_atom_from_id(struct kbase_context *kctx, int id) { return &kctx->jctx.atoms[id]; } @@ -661,6 +694,8 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id( * The disjoint event counter is also incremented immediately whenever a job is soft stopped * and during context creation. * + * This function must be called only when a kbase device is initialized. + * * Return: 0 on success and non-zero value on failure. */ void kbase_disjoint_init(struct kbase_device *kbdev); @@ -746,7 +781,111 @@ void kbase_device_pcm_dev_term(struct kbase_device *const kbdev); #define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 #if !defined(UINT64_MAX) - #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) +#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) #endif +/** + * kbase_file_fops_count() - Get the kfile::fops_count value + * + * @kfile: Pointer to the object representing the mali device file. + * + * The value is read with kfile::lock held. + * + * Return: sampled value of kfile::fops_count. + */ +static inline u32 kbase_file_fops_count(struct kbase_file *kfile) +{ + u32 fops_count; + + spin_lock(&kfile->lock); + fops_count = kfile->fops_count; + spin_unlock(&kfile->lock); + + return fops_count; +} + +/** + * kbase_file_inc_fops_count_unless_closed() - Increment the kfile::fops_count value if the + * kfile::owner is still set. + * + * @kfile: Pointer to the object representing the /dev/malixx device file instance. + * + * Return: true if the increment was done otherwise false. + */ +static inline bool kbase_file_inc_fops_count_unless_closed(struct kbase_file *kfile) +{ + bool count_incremented = false; + + spin_lock(&kfile->lock); + if (kfile->owner) { + kfile->fops_count++; + count_incremented = true; + } + spin_unlock(&kfile->lock); + + return count_incremented; +} + +/** + * kbase_file_dec_fops_count() - Decrement the kfile::fops_count value + * + * @kfile: Pointer to the object representing the /dev/malixx device file instance. + * + * This function shall only be called to decrement kfile::fops_count if a successful call + * to kbase_file_inc_fops_count_unless_closed() was made previously by the current thread. + * + * The function would enqueue the kfile::destroy_kctx_work if the process that originally + * created the file instance has closed its copy and no Kbase handled file operations are + * in progress and no memory mappings are present for the file instance. + */ +static inline void kbase_file_dec_fops_count(struct kbase_file *kfile) +{ + spin_lock(&kfile->lock); + WARN_ON_ONCE(kfile->fops_count <= 0); + kfile->fops_count--; + if (unlikely(!kfile->fops_count && !kfile->owner && !kfile->map_count)) { + queue_work(system_wq, &kfile->destroy_kctx_work); +#if IS_ENABLED(CONFIG_DEBUG_FS) + wake_up(&kfile->zero_fops_count_wait); +#endif + } + spin_unlock(&kfile->lock); +} + +/** + * kbase_file_inc_cpu_mapping_count() - Increment the kfile::map_count value. + * + * @kfile: Pointer to the object representing the /dev/malixx device file instance. + * + * This function shall be called when the memory mapping on /dev/malixx device file + * instance is created. The kbase_file::setup_state shall be KBASE_FILE_COMPLETE. + */ +static inline void kbase_file_inc_cpu_mapping_count(struct kbase_file *kfile) +{ + spin_lock(&kfile->lock); + kfile->map_count++; + spin_unlock(&kfile->lock); +} + +/** + * kbase_file_dec_cpu_mapping_count() - Decrement the kfile::map_count value + * + * @kfile: Pointer to the object representing the /dev/malixx device file instance. + * + * This function is called to decrement kfile::map_count value when the memory mapping + * on /dev/malixx device file is closed. + * The function would enqueue the kfile::destroy_kctx_work if the process that originally + * created the file instance has closed its copy and there are no mappings present and no + * Kbase handled file operations are in progress for the file instance. + */ +static inline void kbase_file_dec_cpu_mapping_count(struct kbase_file *kfile) +{ + spin_lock(&kfile->lock); + WARN_ON_ONCE(kfile->map_count <= 0); + kfile->map_count--; + if (unlikely(!kfile->map_count && !kfile->owner && !kfile->fops_count)) + queue_work(system_wq, &kfile->destroy_kctx_work); + spin_unlock(&kfile->lock); +} + #endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c index 10dbeee02e40..959277ffb629 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,27 +30,26 @@ static int kbase_as_fault_read(struct seq_file *sfile, void *data) { - uintptr_t as_no = (uintptr_t) sfile->private; + uintptr_t as_no = (uintptr_t)sfile->private; struct list_head *entry; const struct list_head *kbdev_list; struct kbase_device *kbdev = NULL; + CSTD_UNUSED(data); + kbdev_list = kbase_device_get_list(); list_for_each(entry, kbdev_list) { kbdev = list_entry(entry, struct kbase_device, entry); if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { - /* don't show this one again until another fault occors */ kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no); /* output the last page fault addr */ - seq_printf(sfile, "%llu\n", - (u64) kbdev->as[as_no].pf_data.addr); + seq_printf(sfile, "%llu\n", (u64)kbdev->as[as_no].pf_data.addr); } - } kbase_device_put_list(kbdev_list); @@ -90,20 +89,22 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces); KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].pf_data.addr) == sizeof(u64)); - debugfs_directory = debugfs_create_dir("address_spaces", - kbdev->mali_debugfs_directory); + debugfs_directory = debugfs_create_dir("address_spaces", kbdev->mali_debugfs_directory); if (IS_ERR_OR_NULL(debugfs_directory)) { - dev_warn(kbdev->dev, - "unable to create address_spaces debugfs directory"); + dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory"); } else { - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + for (i = 0; i < (uint)kbdev->nr_hw_address_spaces; i++) { if (likely(scnprintf(as_name, ARRAY_SIZE(as_name), "as%u", i))) debugfs_create_file(as_name, 0444, debugfs_directory, (void *)(uintptr_t)i, &as_fault_fops); } } +#else + CSTD_UNUSED(kbdev); #endif /* CONFIG_MALI_BIFROST_DEBUG */ +#else + CSTD_UNUSED(kbdev); #endif /* CONFIG_DEBUG_FS */ } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h index 59bbcf6f4ff7..b07207ec524f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2016, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,14 +35,19 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); * @kbdev: Pointer to kbase_device * @as_no: The address space the fault occurred on */ -static inline void -kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) +static inline void kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) { #if IS_ENABLED(CONFIG_DEBUG_FS) #ifdef CONFIG_MALI_BIFROST_DEBUG kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); +#else + CSTD_UNUSED(kbdev); + CSTD_UNUSED(as_no); #endif /* CONFIG_DEBUG_FS */ +#else + CSTD_UNUSED(kbdev); + CSTD_UNUSED(as_no); #endif /* CONFIG_MALI_BIFROST_DEBUG */ } -#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/ +#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c index af51ed8c2dec..4675025baaf8 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -49,16 +49,15 @@ u32 kbase_cache_enabled(u32 flags, u32 nr_pages) return cache_flags; } - -void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) +void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, size_t size, + enum dma_data_direction dir) { dma_sync_single_for_device(kbdev->dev, handle, size, dir); } KBASE_EXPORT_TEST_API(kbase_sync_single_for_device); -void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) +void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, + enum dma_data_direction dir) { dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h index 598d245383fd..1d9f00c560e7 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2013, 2015, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,4 +44,4 @@ */ u32 kbase_cache_enabled(u32 flags, u32 nr_pages); -#endif /* _KBASE_CACHE_POLICY_H_ */ +#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_caps.h b/drivers/gpu/arm/bifrost/mali_kbase_caps.h index 6aa31f3d3a79..a92569d31f06 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_caps.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_caps.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -67,4 +67,4 @@ static inline bool mali_kbase_supports_mem_protected(unsigned long api_version) return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED); } -#endif /* __KBASE_CAPS_H_ */ +#endif /* __KBASE_CAPS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c index 6a1e7e4c5ff5..c4acbf6881f3 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,8 +25,7 @@ #include #include -static u64 kbasep_ccswe_cycle_at_no_lock( - struct kbase_ccswe *self, u64 timestamp_ns) +static u64 kbasep_ccswe_cycle_at_no_lock(struct kbase_ccswe *self, u64 timestamp_ns) { s64 diff_s, diff_ns; u32 gpu_freq; @@ -39,8 +38,7 @@ static u64 kbasep_ccswe_cycle_at_no_lock( diff_s = div_s64(diff_ns, NSEC_PER_SEC); diff_ns -= diff_s * NSEC_PER_SEC; - return self->cycles_elapsed + diff_s * gpu_freq - + div_s64(diff_ns * gpu_freq, NSEC_PER_SEC); + return self->cycles_elapsed + diff_s * gpu_freq + div_s64(diff_ns * gpu_freq, NSEC_PER_SEC); } void kbase_ccswe_init(struct kbase_ccswe *self) @@ -62,8 +60,7 @@ u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns) return result; } -void kbase_ccswe_freq_change( - struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq) +void kbase_ccswe_freq_change(struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq) { unsigned long flags; @@ -75,8 +72,7 @@ void kbase_ccswe_freq_change( /* If this is the first frequency change, cycles_elapsed is zero. */ if (self->timestamp_ns) - self->cycles_elapsed = kbasep_ccswe_cycle_at_no_lock( - self, timestamp_ns); + self->cycles_elapsed = kbasep_ccswe_cycle_at_no_lock(self, timestamp_ns); self->timestamp_ns = timestamp_ns; self->prev_gpu_freq = self->gpu_freq; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h index f7fcf7780b35..ce148ed537c5 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -82,8 +82,7 @@ u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns); * frequency change. The function is to be called at the frequency * change moment (not later). */ -void kbase_ccswe_freq_change( - struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq); +void kbase_ccswe_freq_change(struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq); /** * kbase_ccswe_reset() - reset estimator state diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config.h b/drivers/gpu/arm/bifrost/mali_kbase_config.h index ecfdb2876618..d5dd49055b00 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_config.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_config.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2017, 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,7 +29,6 @@ #include #include #include -#include /* Forward declaration of struct kbase_device */ struct kbase_device; @@ -153,8 +152,7 @@ struct kbase_platform_funcs_conf { * Context: The caller must hold the hwaccess_lock. Function must be * runnable in an interrupt context. */ - void (*platform_handler_atom_complete_func)( - struct kbase_jd_atom *katom); + void (*platform_handler_atom_complete_func)(struct kbase_jd_atom *katom); #endif }; @@ -232,7 +230,7 @@ struct kbase_pm_callback_conf { * * @return 0 on success, else int error code. */ - int (*power_runtime_init_callback)(struct kbase_device *kbdev); + int (*power_runtime_init_callback)(struct kbase_device *kbdev); /** Callback for handling runtime power management termination. * @@ -376,8 +374,7 @@ struct kbase_clk_rate_trace_op_conf { * Kbase will use this function pointer to enumerate the existence of a * GPU clock on the given index. */ - void *(*enumerate_gpu_clk)(struct kbase_device *kbdev, - unsigned int index); + void *(*enumerate_gpu_clk)(struct kbase_device *kbdev, unsigned int index); /** * @get_gpu_clk_rate: Get the current rate for an enumerated clock. @@ -386,8 +383,7 @@ struct kbase_clk_rate_trace_op_conf { * * Returns current rate of the GPU clock in unit of Hz. */ - unsigned long (*get_gpu_clk_rate)(struct kbase_device *kbdev, - void *gpu_clk_handle); + unsigned long (*get_gpu_clk_rate)(struct kbase_device *kbdev, void *gpu_clk_handle); /** * @gpu_clk_notifier_register: Register a clock rate change notifier. @@ -405,8 +401,8 @@ struct kbase_clk_rate_trace_op_conf { * The callback function expects the pointer of type * 'struct kbase_gpu_clk_notifier_data' as the third argument. */ - int (*gpu_clk_notifier_register)(struct kbase_device *kbdev, - void *gpu_clk_handle, struct notifier_block *nb); + int (*gpu_clk_notifier_register)(struct kbase_device *kbdev, void *gpu_clk_handle, + struct notifier_block *nb); /** * @gpu_clk_notifier_unregister: Unregister clock rate change notifier @@ -419,8 +415,8 @@ struct kbase_clk_rate_trace_op_conf { * was previously registered to get notified of the change in rate * of clock corresponding to @gpu_clk_handle. */ - void (*gpu_clk_notifier_unregister)(struct kbase_device *kbdev, - void *gpu_clk_handle, struct notifier_block *nb); + void (*gpu_clk_notifier_unregister)(struct kbase_device *kbdev, void *gpu_clk_handle, + struct notifier_block *nb); }; #if IS_ENABLED(CONFIG_OF) @@ -440,9 +436,9 @@ struct kbase_io_memory_region { * @brief Specifies I/O related resources like IRQs and memory region for I/O operations. */ struct kbase_io_resources { - u32 job_irq_number; - u32 mmu_irq_number; - u32 gpu_irq_number; + u32 job_irq_number; + u32 mmu_irq_number; + u32 gpu_irq_number; struct kbase_io_memory_region io_memory_region; }; @@ -580,4 +576,4 @@ int kbase_platform_register(void); void kbase_platform_unregister(void); #endif -#endif /* _KBASE_CONFIG_H_ */ +#endif /* _KBASE_CONFIG_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h index 14493a77e1ea..9dc134373dc3 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h @@ -115,7 +115,7 @@ enum { /* Default scheduling tick granuality (can be overridden by platform header) */ #ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS -#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ +#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ #endif /* Default minimum number of scheduling ticks before jobs are soft-stopped. @@ -123,21 +123,21 @@ enum { * This defines the time-slice for a job (which may be different from that of a * context) */ -#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ +#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ /* Default minimum number of scheduling ticks before CL jobs are soft-stopped. */ -#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ +#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ /* Default minimum number of scheduling ticks before jobs are hard-stopped */ -#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ +#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ /* Default minimum number of scheduling ticks before CL jobs are hard-stopped. */ -#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ +#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ /* Default minimum number of scheduling ticks before jobs are hard-stopped * during dumping */ -#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ +#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ /* Default timeout for some software jobs, after which the software event wait * jobs will be cancelled. @@ -147,17 +147,17 @@ enum { /* Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" job */ -#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ +#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ /* Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" CL job. */ -#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ +#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ /* Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" job during dumping. */ -#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ +#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ /* Nominal reference frequency that was used to obtain all following * <...>_TIMEOUT_CYCLES macros, in kHz. @@ -172,6 +172,7 @@ enum { * * This is also the default timeout to be used when an invalid timeout * selector is used to retrieve the timeout on CSF GPUs. + * This shouldn't be used as a timeout for the CSG suspend request. * * Based on 75000ms timeout at nominal 100MHz, as is required for Android - based * on scaling from a 50MHz GPU system. @@ -185,17 +186,16 @@ enum { */ #define CSF_PM_TIMEOUT_CYCLES (250000000) -/* Waiting timeout in clock cycles for GPU reset to complete. +/* Waiting timeout in clock cycles for a CSG to be suspended. * - * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system + * Based on 30s timeout at 100MHz, scaled from 5s at 600Mhz GPU frequency. + * More cycles (1s @ 100Mhz = 100000000) are added up to ensure that + * host timeout is always bigger than FW timeout. */ -#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000) +#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (3100000000ull) -/* Waiting timeout in clock cycles for all active CSGs to be suspended. - * - * Based on 1500ms timeout at 100MHz, scaled from a 50MHz GPU system. - */ -#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (150000000) +/* Waiting timeout in clock cycles for GPU reset to complete. */ +#define CSF_GPU_RESET_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES * 2) /* Waiting timeout in clock cycles for GPU firmware to boot. * @@ -209,6 +209,23 @@ enum { */ #define CSF_FIRMWARE_PING_TIMEOUT_CYCLES (600000000ull) +/* Waiting timeout for a KCPU queue's fence signal blocked to long, in clock cycles. + * + * Based on 10s timeout at 100MHz, scaled from a 50MHz GPU system. + */ +#if IS_ENABLED(CONFIG_MALI_IS_FPGA) +#define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (2500000000ull) +#else +#define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (1000000000ull) +#endif + +/* Waiting timeout for task execution on an endpoint. Based on the + * DEFAULT_PROGRESS_TIMEOUT. + * + * Based on 25s timeout at 100Mhz, scaled from a 500MHz GPU system. + */ +#define DEFAULT_PROGRESS_TIMEOUT_CYCLES (2500000000ull) + #else /* MALI_USE_CSF */ /* A default timeout in clock cycles to be used when an invalid timeout @@ -231,7 +248,7 @@ enum { */ #define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000) -#endif /* MALI_USE_CSF */ +#endif /* !MALI_USE_CSF */ /* Default timeslice that a context is scheduled in for, in nanoseconds. * @@ -248,7 +265,11 @@ enum { * is enabled the value will be read from there, otherwise this should be * overridden by defining GPU_FREQ_KHZ_MAX in the platform file. */ +#ifdef GPU_FREQ_KHZ_MAX +#define DEFAULT_GPU_FREQ_KHZ_MAX GPU_FREQ_KHZ_MAX +#else #define DEFAULT_GPU_FREQ_KHZ_MAX (5000) +#endif /* GPU_FREQ_KHZ_MAX */ /* Default timeout for task execution on an endpoint * @@ -275,4 +296,10 @@ enum { * It corresponds to 0.5s in GPU @ 100Mhz. */ #define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024) + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +/* Default value of the time interval at which GPU metrics tracepoints are emitted. */ +#define DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS (500000000u) /* 500 ms */ +#endif + #endif /* _KBASE_CONFIG_DEFAULTS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c index 12d6cc8963a3..be5ca4563a7c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c @@ -21,8 +21,9 @@ #include #include -#include +#include #include +#include #include #ifdef CONFIG_MALI_BIFROST_DEVFREQ #include @@ -54,12 +55,11 @@ #include "hwcnt/mali_kbase_hwcnt_context.h" #include "hwcnt/mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_kinstr_prfcnt.h" -#include "mali_kbase_vinstr.h" #if MALI_USE_CSF #include "csf/mali_kbase_csf_firmware.h" #include "csf/mali_kbase_csf_tiler_heap.h" #include "csf/mali_kbase_csf_csg_debugfs.h" -#include "csf/mali_kbase_csf_cpu_queue_debugfs.h" +#include "csf/mali_kbase_csf_cpu_queue.h" #include "csf/mali_kbase_csf_event.h" #endif #ifdef CONFIG_MALI_ARBITER_SUPPORT @@ -94,7 +94,7 @@ #include #include #include -#include /* is_compat_task/in_compat_syscall */ +#include /* is_compat_task/in_compat_syscall */ #include #include #include @@ -128,9 +128,8 @@ * @major: Kernel major version * @minor: Kernel minor version */ -#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ - (((minor) & 0xFFF) << 8) | \ - ((0 & 0xFF) << 0)) +#define KBASE_API_VERSION(major, minor) \ + ((((major)&0xFFF) << 20) | (((minor)&0xFFF) << 8) | ((0 & 0xFF) << 0)) /** * struct mali_kbase_capability_def - kbase capabilities table @@ -151,15 +150,15 @@ struct mali_kbase_capability_def { */ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = { #if MALI_USE_CSF - { 1, 0 }, /* SYSTEM_MONITOR */ - { 1, 0 }, /* JIT_PRESSURE_LIMIT */ - { 1, 0 }, /* MEM_GROW_ON_GPF */ - { 1, 0 } /* MEM_PROTECTED */ + { 1, 0 }, /* SYSTEM_MONITOR */ + { 1, 0 }, /* JIT_PRESSURE_LIMIT */ + { 1, 0 }, /* MEM_GROW_ON_GPF */ + { 1, 0 } /* MEM_PROTECTED */ #else - { 11, 15 }, /* SYSTEM_MONITOR */ - { 11, 25 }, /* JIT_PRESSURE_LIMIT */ - { 11, 2 }, /* MEM_GROW_ON_GPF */ - { 11, 2 } /* MEM_PROTECTED */ + { 11, 15 }, /* SYSTEM_MONITOR */ + { 11, 25 }, /* JIT_PRESSURE_LIMIT */ + { 11, 2 }, /* MEM_GROW_ON_GPF */ + { 11, 2 } /* MEM_PROTECTED */ #endif }; @@ -175,6 +174,8 @@ static inline int kbase_devfreq_opp_helper(struct dev_pm_set_opp_data *data) } #endif +static void kbase_file_destroy_kctx_worker(struct work_struct *work); + /** * mali_kbase_supports_cap - Query whether a kbase capability is supported * @@ -217,9 +218,10 @@ bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap) * * Return: Address of an object representing a simulated device file, or NULL * on failure. + * + * Note: This function always gets called in Userspace context. */ -static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, - struct file *const filp) +static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, struct file *const filp) { struct kbase_file *const kfile = kmalloc(sizeof(*kfile), GFP_KERNEL); @@ -229,6 +231,17 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, kfile->kctx = NULL; kfile->api_version = 0; atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN); + /* Store the pointer to the file table structure of current process. */ + kfile->owner = current->files; + INIT_WORK(&kfile->destroy_kctx_work, kbase_file_destroy_kctx_worker); + spin_lock_init(&kfile->lock); + kfile->fops_count = 0; + kfile->map_count = 0; + typecheck(typeof(kfile->map_count), typeof(current->mm->map_count)); +#if IS_ENABLED(CONFIG_DEBUG_FS) + init_waitqueue_head(&kfile->zero_fops_count_wait); +#endif + init_waitqueue_head(&kfile->event_queue); } return kfile; } @@ -248,8 +261,8 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, * * Return: 0 if successful, otherwise a negative error code. */ -static int kbase_file_set_api_version(struct kbase_file *const kfile, - u16 const major, u16 const minor) +static int kbase_file_set_api_version(struct kbase_file *const kfile, u16 const major, + u16 const minor) { if (WARN_ON(!kfile)) return -EINVAL; @@ -257,8 +270,8 @@ static int kbase_file_set_api_version(struct kbase_file *const kfile, /* setup pending, try to signal that we'll do the setup, * if setup was already in progress, err this call */ - if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_VSN, - KBASE_FILE_VSN_IN_PROGRESS) != KBASE_FILE_NEED_VSN) + if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_VSN, KBASE_FILE_VSN_IN_PROGRESS) != + KBASE_FILE_NEED_VSN) return -EPERM; /* save the proposed version number for later use */ @@ -305,8 +318,34 @@ static unsigned long kbase_file_get_api_version(struct kbase_file *const kfile) * * Return: 0 if a new context was created, otherwise a negative error code. */ -static int kbase_file_create_kctx(struct kbase_file *kfile, - base_context_create_flags flags); +static int kbase_file_create_kctx(struct kbase_file *kfile, base_context_create_flags flags); + +/** + * kbase_file_inc_fops_count_if_allowed - Increment the kfile::fops_count value if the file + * operation is allowed for the current process. + * + * @kfile: Pointer to the object representing the /dev/malixx device file instance. + * + * The function shall be called at the beginning of certain file operation methods + * implemented for @kbase_fops, like ioctl, poll, read and mmap. + * + * kbase_file_dec_fops_count() shall be called if the increment was done. + * + * Return: true if the increment was done otherwise false. + * + * Note: This function shall always be called in Userspace context. + */ +static bool kbase_file_inc_fops_count_if_allowed(struct kbase_file *const kfile) +{ + /* Disallow file operations from the other process that shares the instance + * of /dev/malixx file i.e. 'kfile' or disallow file operations if parent + * process has closed the file instance. + */ + if (unlikely(kfile->owner != current->files)) + return false; + + return kbase_file_inc_fops_count_unless_closed(kfile); +} /** * kbase_file_get_kctx_if_setup_complete - Get a kernel base context @@ -320,63 +359,126 @@ static int kbase_file_create_kctx(struct kbase_file *kfile, * * Return: Address of the kernel base context associated with the @kfile, or * NULL if no context exists. + * + * Note: This function shall always be called in Userspace context. */ -static struct kbase_context *kbase_file_get_kctx_if_setup_complete( - struct kbase_file *const kfile) +static struct kbase_context *kbase_file_get_kctx_if_setup_complete(struct kbase_file *const kfile) { - if (WARN_ON(!kfile) || - atomic_read(&kfile->setup_state) != KBASE_FILE_COMPLETE || - WARN_ON(!kfile->kctx)) + if (WARN_ON(!kfile) || atomic_read(&kfile->setup_state) != KBASE_FILE_COMPLETE || + WARN_ON(!kfile->kctx)) return NULL; return kfile->kctx; } +/** + * kbase_file_destroy_kctx - Destroy the Kbase context created for @kfile. + * + * @kfile: A device file created by kbase_file_new() + */ +static void kbase_file_destroy_kctx(struct kbase_file *const kfile) +{ + if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_COMPLETE, KBASE_FILE_DESTROY_CTX) != + KBASE_FILE_COMPLETE) + return; + +#if IS_ENABLED(CONFIG_DEBUG_FS) + kbasep_mem_profile_debugfs_remove(kfile->kctx); + kbase_context_debugfs_term(kfile->kctx); +#endif + + kbase_destroy_context(kfile->kctx); + dev_dbg(kfile->kbdev->dev, "Deleted kbase context"); +} + +/** + * kbase_file_destroy_kctx_worker - Work item to destroy the Kbase context. + * + * @work: Pointer to the kfile::destroy_kctx_work. + * + * The work item shall only be enqueued if the context termination could not + * be done from @kbase_flush(). + */ +static void kbase_file_destroy_kctx_worker(struct work_struct *work) +{ + struct kbase_file *kfile = container_of(work, struct kbase_file, destroy_kctx_work); + + WARN_ON_ONCE(kfile->owner); + WARN_ON_ONCE(kfile->map_count); + WARN_ON_ONCE(kfile->fops_count); + + kbase_file_destroy_kctx(kfile); +} + +/** + * kbase_file_destroy_kctx_on_flush - Try destroy the Kbase context from the flush() + * method of @kbase_fops. + * + * @kfile: A device file created by kbase_file_new() + */ +static void kbase_file_destroy_kctx_on_flush(struct kbase_file *const kfile) +{ + bool can_destroy_context = false; + + spin_lock(&kfile->lock); + kfile->owner = NULL; + /* To destroy the context from flush() method, unlike the release() + * method, need to synchronize manually against the other threads in + * the current process that could be operating on the /dev/malixx file. + * + * Only destroy the context if all the memory mappings on the + * /dev/malixx file instance have been closed. If there are mappings + * present then the context would be destroyed later when the last + * mapping is closed. + * Also, only destroy the context if no file operations are in progress. + */ + can_destroy_context = !kfile->map_count && !kfile->fops_count; + spin_unlock(&kfile->lock); + + if (likely(can_destroy_context)) { + WARN_ON_ONCE(work_pending(&kfile->destroy_kctx_work)); + kbase_file_destroy_kctx(kfile); + } +} + /** * kbase_file_delete - Destroy an object representing a device file * * @kfile: A device file created by kbase_file_new() * - * If any context was created for the @kfile then it is destroyed. + * If any context was created for the @kfile and is still alive, then it is destroyed. */ static void kbase_file_delete(struct kbase_file *const kfile) { - struct kbase_device *kbdev = NULL; - if (WARN_ON(!kfile)) return; - kfile->filp->private_data = NULL; - kbdev = kfile->kbdev; - - if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) { - struct kbase_context *kctx = kfile->kctx; - + /* All the CPU mappings on the device file should have been closed */ + WARN_ON_ONCE(kfile->map_count); #if IS_ENABLED(CONFIG_DEBUG_FS) - kbasep_mem_profile_debugfs_remove(kctx); + /* There could still be file operations due to the debugfs file (mem_view) */ + wait_event(kfile->zero_fops_count_wait, !kbase_file_fops_count(kfile)); +#else + /* There shall not be any file operations in progress on the device file */ + WARN_ON_ONCE(kfile->fops_count); #endif - kbase_context_debugfs_term(kctx); - - kbase_destroy_context(kctx); - - dev_dbg(kbdev->dev, "deleted base context\n"); - } - - kbase_release_device(kbdev); + kfile->filp->private_data = NULL; + cancel_work_sync(&kfile->destroy_kctx_work); + /* Destroy the context if it wasn't done earlier from the flush() method. */ + kbase_file_destroy_kctx(kfile); + kbase_release_device(kfile->kbdev); kfree(kfile); } -static int kbase_api_handshake(struct kbase_file *kfile, - struct kbase_ioctl_version_check *version) +static int kbase_api_handshake(struct kbase_file *kfile, struct kbase_ioctl_version_check *version) { int err = 0; switch (version->major) { case BASE_UK_VERSION_MAJOR: /* set minor to be the lowest common */ - version->minor = min_t(int, BASE_UK_VERSION_MINOR, - (int)version->minor); + version->minor = min_t(int, BASE_UK_VERSION_MINOR, (int)version->minor); break; default: /* We return our actual version regardless if it @@ -399,32 +501,31 @@ static int kbase_api_handshake(struct kbase_file *kfile, * (with job submission disabled) but we don't support that usage. */ if (!mali_kbase_supports_system_monitor(kbase_file_get_api_version(kfile))) - err = kbase_file_create_kctx(kfile, - BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED); + err = kbase_file_create_kctx(kfile, BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED); return err; } static int kbase_api_handshake_dummy(struct kbase_file *kfile, - struct kbase_ioctl_version_check *version) + struct kbase_ioctl_version_check *version) { + CSTD_UNUSED(kfile); + CSTD_UNUSED(version); + return -EPERM; } -static int kbase_api_kinstr_prfcnt_enum_info( - struct kbase_file *kfile, - struct kbase_ioctl_kinstr_prfcnt_enum_info *prfcnt_enum_info) +static int +kbase_api_kinstr_prfcnt_enum_info(struct kbase_file *kfile, + struct kbase_ioctl_kinstr_prfcnt_enum_info *prfcnt_enum_info) { - return kbase_kinstr_prfcnt_enum_info(kfile->kbdev->kinstr_prfcnt_ctx, - prfcnt_enum_info); + return kbase_kinstr_prfcnt_enum_info(kfile->kbdev->kinstr_prfcnt_ctx, prfcnt_enum_info); } -static int kbase_api_kinstr_prfcnt_setup( - struct kbase_file *kfile, - union kbase_ioctl_kinstr_prfcnt_setup *prfcnt_setup) +static int kbase_api_kinstr_prfcnt_setup(struct kbase_file *kfile, + union kbase_ioctl_kinstr_prfcnt_setup *prfcnt_setup) { - return kbase_kinstr_prfcnt_setup(kfile->kbdev->kinstr_prfcnt_ctx, - prfcnt_setup); + return kbase_kinstr_prfcnt_setup(kfile->kbdev->kinstr_prfcnt_ctx, prfcnt_setup); } static struct kbase_device *to_kbase_device(struct device *dev) @@ -441,7 +542,7 @@ int assign_irqs(struct kbase_device *kbdev) #endif struct platform_device *pdev; - int i; + size_t i; if (!kbdev) return -ENODEV; @@ -507,12 +608,15 @@ void kbase_release_device(struct kbase_device *kbdev) EXPORT_SYMBOL(kbase_release_device); #if IS_ENABLED(CONFIG_DEBUG_FS) -static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) +static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, + loff_t *off) { struct kbase_context *kctx = f->private_data; int err; bool value; + CSTD_UNUSED(off); + err = kstrtobool_from_user(ubuf, size, &value); if (err) return err; @@ -546,13 +650,15 @@ static const struct file_operations kbase_infinite_cache_fops = { .read = read_ctx_infinite_cache, }; -static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf, - size_t size, loff_t *off) +static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf, size_t size, + loff_t *off) { struct kbase_context *kctx = f->private_data; int err; bool value; + CSTD_UNUSED(off); + err = kstrtobool_from_user(ubuf, size, &value); if (err) return err; @@ -574,8 +680,7 @@ static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf, return size; } -static ssize_t read_ctx_force_same_va(struct file *f, char __user *ubuf, - size_t size, loff_t *off) +static ssize_t read_ctx_force_same_va(struct file *f, char __user *ubuf, size_t size, loff_t *off) { struct kbase_context *kctx = f->private_data; char buf[32]; @@ -598,7 +703,7 @@ static const struct file_operations kbase_force_same_va_fops = { #endif /* CONFIG_DEBUG_FS */ static int kbase_file_create_kctx(struct kbase_file *const kfile, - base_context_create_flags const flags) + base_context_create_flags const flags) { struct kbase_device *kbdev = NULL; struct kbase_context *kctx = NULL; @@ -612,14 +717,13 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, /* setup pending, try to signal that we'll do the setup, * if setup was already in progress, err this call */ - if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_CTX, - KBASE_FILE_CTX_IN_PROGRESS) != KBASE_FILE_NEED_CTX) + if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_CTX, KBASE_FILE_CTX_IN_PROGRESS) != + KBASE_FILE_NEED_CTX) return -EPERM; kbdev = kfile->kbdev; - kctx = kbase_create_context(kbdev, in_compat_syscall(), - flags, kfile->api_version, kfile->filp); + kctx = kbase_create_context(kbdev, in_compat_syscall(), flags, kfile->api_version, kfile); /* if bad flags, will stay stuck in setup mode */ if (!kctx) @@ -634,17 +738,16 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, mutex_init(&kctx->mem_profile_lock); - kctx->kctx_dentry = debugfs_create_dir(kctx_name, - kbdev->debugfs_ctx_directory); + kctx->kctx_dentry = debugfs_create_dir(kctx_name, kbdev->debugfs_ctx_directory); if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { /* we don't treat this as a fail - just warn about it */ dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n"); } else { - debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, - kctx, &kbase_infinite_cache_fops); - debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry, - kctx, &kbase_force_same_va_fops); + debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, kctx, + &kbase_infinite_cache_fops); + debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry, kctx, + &kbase_force_same_va_fops); kbase_context_debugfs_init(kctx); } @@ -701,20 +804,50 @@ static int kbase_release(struct inode *inode, struct file *filp) { struct kbase_file *const kfile = filp->private_data; + CSTD_UNUSED(inode); + kbase_file_delete(kfile); return 0; } -static int kbase_api_set_flags(struct kbase_file *kfile, - struct kbase_ioctl_set_flags *flags) +/** + * kbase_flush - Function implementing the flush() method of @kbase_fops. + * + * @filp: Pointer to the /dev/malixx device file instance. + * @id: Pointer to the file table structure of current process. + * If @filp is being shared by multiple processes then @id can differ + * from kfile::owner. + * + * This function is called everytime the copy of @filp is closed. So if 3 processes + * are sharing the @filp then this function would be called 3 times and only after + * that kbase_release() would get called. + * + * Return: 0 if successful, otherwise a negative error code. + * + * Note: This function always gets called in Userspace context when the + * file is closed. + */ +static int kbase_flush(struct file *filp, fl_owner_t id) +{ + struct kbase_file *const kfile = filp->private_data; + + /* Try to destroy the context if the flush() method has been called for the + * process that created the instance of /dev/malixx file i.e. 'kfile'. + */ + if (kfile->owner == id) + kbase_file_destroy_kctx_on_flush(kfile); + + return 0; +} + +static int kbase_api_set_flags(struct kbase_file *kfile, struct kbase_ioctl_set_flags *flags) { int err = 0; unsigned long const api_version = kbase_file_get_api_version(kfile); struct kbase_context *kctx = NULL; /* Validate flags */ - if (flags->create_flags != - (flags->create_flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS)) + if (flags->create_flags != (flags->create_flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS)) return -EINVAL; /* For backward compatibility, the context may have been created before @@ -736,7 +869,7 @@ static int kbase_api_set_flags(struct kbase_file *kfile, return -EPERM; #if MALI_USE_CSF - /* On CSF GPUs Job Manager interface isn't used to submit jobs + /* On CSF GPUs Job Manager interface isn't used to submit jobs * (there are no job slots). So the legacy job manager path to * submit jobs needs to remain disabled for CSF GPUs. */ @@ -745,11 +878,9 @@ static int kbase_api_set_flags(struct kbase_file *kfile, mutex_lock(&js_kctx_info->ctx.jsctx_mutex); spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); /* Translate the flags */ - if ((flags->create_flags & - BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) + if ((flags->create_flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); #endif @@ -759,17 +890,15 @@ static int kbase_api_set_flags(struct kbase_file *kfile, } #if !MALI_USE_CSF -static int kbase_api_job_submit(struct kbase_context *kctx, - struct kbase_ioctl_job_submit *submit) +static int kbase_api_job_submit(struct kbase_context *kctx, struct kbase_ioctl_job_submit *submit) { - return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr), - submit->nr_atoms, - submit->stride, false); + return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr), submit->nr_atoms, + submit->stride, false); } #endif /* !MALI_USE_CSF */ static int kbase_api_get_gpuprops(struct kbase_file *kfile, - struct kbase_ioctl_get_gpuprops *get_props) + struct kbase_ioctl_get_gpuprops *get_props) { struct kbase_gpu_props *kprops = &kfile->kbdev->gpu_props; int err; @@ -784,9 +913,8 @@ static int kbase_api_get_gpuprops(struct kbase_file *kfile, if (get_props->size < kprops->prop_buffer_size) return -EINVAL; - err = copy_to_user(u64_to_user_ptr(get_props->buffer), - kprops->prop_buffer, - kprops->prop_buffer_size); + err = copy_to_user(u64_to_user_ptr(get_props->buffer), kprops->prop_buffer, + kprops->prop_buffer_size); if (err) return -EFAULT; return kprops->prop_buffer_size; @@ -861,8 +989,7 @@ static int kbase_api_mem_alloc_ex(struct kbase_context *kctx, * In both cases, the executable and fixed/fixable zones, and * the executable+fixed/fixable zone, are all above this range. */ - if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && - kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { + if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { if (!gpu_executable && !fixed_or_fixable) flags |= BASE_MEM_SAME_VA; } @@ -879,9 +1006,7 @@ static int kbase_api_mem_alloc_ex(struct kbase_context *kctx, if (gpu_executable || fixed_or_fixable) return -ENOMEM; - flags |= (BASE_MEM_SAME_VA | - BASE_MEM_CACHED_CPU | - BASE_MEM_COHERENT_SYSTEM); + flags |= (BASE_MEM_SAME_VA | BASE_MEM_CACHED_CPU | BASE_MEM_COHERENT_SYSTEM); } reg = kbase_mem_alloc(kctx, alloc_ex->in.va_pages, alloc_ex->in.commit_pages, @@ -955,35 +1080,25 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem } #endif /* MALI_USE_CSF */ -static int kbase_api_mem_query(struct kbase_context *kctx, - union kbase_ioctl_mem_query *query) +static int kbase_api_mem_query(struct kbase_context *kctx, union kbase_ioctl_mem_query *query) { - return kbase_mem_query(kctx, query->in.gpu_addr, - query->in.query, &query->out.value); + return kbase_mem_query(kctx, query->in.gpu_addr, query->in.query, &query->out.value); } -static int kbase_api_mem_free(struct kbase_context *kctx, - struct kbase_ioctl_mem_free *free) +static int kbase_api_mem_free(struct kbase_context *kctx, struct kbase_ioctl_mem_free *free) { return kbase_mem_free(kctx, free->gpu_addr); } #if !MALI_USE_CSF -static int kbase_api_kinstr_jm_fd(struct kbase_context *kctx, - union kbase_kinstr_jm_fd *arg) +static int kbase_api_kinstr_jm_fd(struct kbase_context *kctx, union kbase_kinstr_jm_fd *arg) { return kbase_kinstr_jm_get_fd(kctx->kinstr_jm, arg); } #endif -static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_reader_setup *setup) -{ - return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); -} - static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, - union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo) + union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo) { u32 flags = timeinfo->in.request_flags; struct timespec64 ts = { 0 }; @@ -993,9 +1108,9 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, kbase_pm_context_active(kctx->kbdev); kbase_backend_get_gpu_time(kctx->kbdev, - (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) ? &cycle_cnt : NULL, - (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) ? ×tamp : NULL, - (flags & BASE_TIMEINFO_MONOTONIC_FLAG) ? &ts : NULL); + (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) ? &cycle_cnt : NULL, + (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) ? ×tamp : NULL, + (flags & BASE_TIMEINFO_MONOTONIC_FLAG) ? &ts : NULL); if (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) timeinfo->out.timestamp = timestamp; @@ -1014,15 +1129,14 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, } #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -static int kbase_api_hwcnt_set(struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_values *values) +static int kbase_api_hwcnt_set(struct kbase_context *kctx, struct kbase_ioctl_hwcnt_values *values) { return gpu_model_set_dummy_prfcnt_user_sample(u64_to_user_ptr(values->data), values->size); } #endif /* CONFIG_MALI_BIFROST_NO_MALI */ static int kbase_api_disjoint_query(struct kbase_context *kctx, - struct kbase_ioctl_disjoint_query *query) + struct kbase_ioctl_disjoint_query *query) { query->counter = kbase_disjoint_event_get(kctx->kbdev); @@ -1030,10 +1144,12 @@ static int kbase_api_disjoint_query(struct kbase_context *kctx, } static int kbase_api_get_ddk_version(struct kbase_context *kctx, - struct kbase_ioctl_get_ddk_version *version) + struct kbase_ioctl_get_ddk_version *version) { int ret; - int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); + uint len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); + + CSTD_UNUSED(kctx); if (version->version_buffer == 0) return len; @@ -1041,9 +1157,8 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx, if (version->size < len) return -EOVERFLOW; - ret = copy_to_user(u64_to_user_ptr(version->version_buffer), - KERNEL_SIDE_DDK_VERSION_STRING, - sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); + ret = copy_to_user(u64_to_user_ptr(version->version_buffer), KERNEL_SIDE_DDK_VERSION_STRING, + sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); if (ret) return -EFAULT; @@ -1052,9 +1167,9 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx, } static int kbase_api_mem_jit_init(struct kbase_context *kctx, - struct kbase_ioctl_mem_jit_init *jit_init) + struct kbase_ioctl_mem_jit_init *jit_init) { - int i; + size_t i; for (i = 0; i < sizeof(jit_init->padding); i++) { /* Ensure all padding bytes are 0 for potential future @@ -1064,53 +1179,44 @@ static int kbase_api_mem_jit_init(struct kbase_context *kctx, return -EINVAL; } - return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, - jit_init->max_allocations, jit_init->trim_level, - jit_init->group_id, jit_init->phys_pages); + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, jit_init->max_allocations, + jit_init->trim_level, jit_init->group_id, + jit_init->phys_pages); } static int kbase_api_mem_exec_init(struct kbase_context *kctx, - struct kbase_ioctl_mem_exec_init *exec_init) + struct kbase_ioctl_mem_exec_init *exec_init) { return kbase_region_tracker_init_exec(kctx, exec_init->va_pages); } -static int kbase_api_mem_sync(struct kbase_context *kctx, - struct kbase_ioctl_mem_sync *sync) +static int kbase_api_mem_sync(struct kbase_context *kctx, struct kbase_ioctl_mem_sync *sync) { - struct basep_syncset sset = { - .mem_handle.basep.handle = sync->handle, - .user_addr = sync->user_addr, - .size = sync->size, - .type = sync->type - }; + struct basep_syncset sset = { .mem_handle.basep.handle = sync->handle, + .user_addr = sync->user_addr, + .size = sync->size, + .type = sync->type }; return kbase_sync_now(kctx, &sset); } static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx, - union kbase_ioctl_mem_find_cpu_offset *find) + union kbase_ioctl_mem_find_cpu_offset *find) { - return kbasep_find_enclosing_cpu_mapping_offset( - kctx, - find->in.cpu_addr, - find->in.size, - &find->out.offset); + return kbasep_find_enclosing_cpu_mapping_offset(kctx, find->in.cpu_addr, find->in.size, + &find->out.offset); } -static int kbase_api_mem_find_gpu_start_and_offset(struct kbase_context *kctx, - union kbase_ioctl_mem_find_gpu_start_and_offset *find) +static int +kbase_api_mem_find_gpu_start_and_offset(struct kbase_context *kctx, + union kbase_ioctl_mem_find_gpu_start_and_offset *find) { return kbasep_find_enclosing_gpu_mapping_start_and_offset( - kctx, - find->in.gpu_addr, - find->in.size, - &find->out.start, - &find->out.offset); + kctx, find->in.gpu_addr, find->in.size, &find->out.start, &find->out.offset); } static int kbase_api_get_context_id(struct kbase_context *kctx, - struct kbase_ioctl_get_context_id *info) + struct kbase_ioctl_get_context_id *info) { info->id = kctx->id; @@ -1118,7 +1224,7 @@ static int kbase_api_get_context_id(struct kbase_context *kctx, } static int kbase_api_tlstream_acquire(struct kbase_context *kctx, - struct kbase_ioctl_tlstream_acquire *acquire) + struct kbase_ioctl_tlstream_acquire *acquire) { return kbase_timeline_io_acquire(kctx->kbdev, acquire->flags); } @@ -1130,14 +1236,12 @@ static int kbase_api_tlstream_flush(struct kbase_context *kctx) return 0; } -static int kbase_api_mem_commit(struct kbase_context *kctx, - struct kbase_ioctl_mem_commit *commit) +static int kbase_api_mem_commit(struct kbase_context *kctx, struct kbase_ioctl_mem_commit *commit) { return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages); } -static int kbase_api_mem_alias(struct kbase_context *kctx, - union kbase_ioctl_mem_alias *alias) +static int kbase_api_mem_alias(struct kbase_context *kctx, union kbase_ioctl_mem_alias *alias) { struct base_mem_aliasing_info *ai; u64 flags; @@ -1150,9 +1254,8 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, if (!ai) return -ENOMEM; - err = copy_from_user(ai, - u64_to_user_ptr(alias->in.aliasing_info), - sizeof(*ai) * alias->in.nents); + err = copy_from_user(ai, u64_to_user_ptr(alias->in.aliasing_info), + sizeof(*ai) * alias->in.nents); if (err) { vfree(ai); return -EFAULT; @@ -1164,9 +1267,8 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, return -EINVAL; } - alias->out.gpu_va = kbase_mem_alias(kctx, &flags, - alias->in.stride, alias->in.nents, - ai, &alias->out.va_pages); + alias->out.gpu_va = kbase_mem_alias(kctx, &flags, alias->in.stride, alias->in.nents, ai, + &alias->out.va_pages); alias->out.flags = flags; @@ -1178,8 +1280,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, return 0; } -static int kbase_api_mem_import(struct kbase_context *kctx, - union kbase_ioctl_mem_import *import) +static int kbase_api_mem_import(struct kbase_context *kctx, union kbase_ioctl_mem_import *import) { int ret; u64 flags = import->in.flags; @@ -1187,13 +1288,9 @@ static int kbase_api_mem_import(struct kbase_context *kctx, if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) return -ENOMEM; - ret = kbase_mem_import(kctx, - import->in.type, - u64_to_user_ptr(import->in.phandle), - import->in.padding, - &import->out.gpu_va, - &import->out.va_pages, - &flags); + ret = kbase_mem_import(kctx, import->in.type, u64_to_user_ptr(import->in.phandle), + import->in.padding, &import->out.gpu_va, &import->out.va_pages, + &flags); import->out.flags = flags; @@ -1201,25 +1298,26 @@ static int kbase_api_mem_import(struct kbase_context *kctx, } static int kbase_api_mem_flags_change(struct kbase_context *kctx, - struct kbase_ioctl_mem_flags_change *change) + struct kbase_ioctl_mem_flags_change *change) { if (change->flags & BASEP_MEM_FLAGS_KERNEL_ONLY) return -ENOMEM; - return kbase_mem_flags_change(kctx, change->gpu_va, - change->flags, change->mask); + return kbase_mem_flags_change(kctx, change->gpu_va, change->flags, change->mask); } static int kbase_api_stream_create(struct kbase_context *kctx, - struct kbase_ioctl_stream_create *stream) + struct kbase_ioctl_stream_create *stream) { #if IS_ENABLED(CONFIG_SYNC_FILE) int fd, ret; + CSTD_UNUSED(kctx); + /* Name must be NULL-terminated and padded with NULLs, so check last * character is NULL */ - if (stream->name[sizeof(stream->name)-1] != 0) + if (stream->name[sizeof(stream->name) - 1] != 0) return -EINVAL; ret = kbase_sync_fence_stream_create(stream->name, &fd); @@ -1228,13 +1326,17 @@ static int kbase_api_stream_create(struct kbase_context *kctx, return ret; return fd; #else + CSTD_UNUSED(kctx); + CSTD_UNUSED(stream); + return -ENOENT; #endif } static int kbase_api_fence_validate(struct kbase_context *kctx, - struct kbase_ioctl_fence_validate *validate) + struct kbase_ioctl_fence_validate *validate) { + CSTD_UNUSED(kctx); #if IS_ENABLED(CONFIG_SYNC_FILE) return kbase_sync_fence_validate(validate->fd); #else @@ -1243,7 +1345,7 @@ static int kbase_api_fence_validate(struct kbase_context *kctx, } static int kbase_api_mem_profile_add(struct kbase_context *kctx, - struct kbase_ioctl_mem_profile_add *data) + struct kbase_ioctl_mem_profile_add *data) { char *buf; int err; @@ -1263,8 +1365,7 @@ static int kbase_api_mem_profile_add(struct kbase_context *kctx, if (!buf) return -ENOMEM; - err = copy_from_user(buf, u64_to_user_ptr(data->buffer), - data->len); + err = copy_from_user(buf, u64_to_user_ptr(data->buffer), data->len); if (err) { kfree(buf); return -EFAULT; @@ -1275,7 +1376,7 @@ static int kbase_api_mem_profile_add(struct kbase_context *kctx, #if !MALI_USE_CSF static int kbase_api_soft_event_update(struct kbase_context *kctx, - struct kbase_ioctl_soft_event_update *update) + struct kbase_ioctl_soft_event_update *update) { if (update->flags != 0) return -EINVAL; @@ -1285,7 +1386,7 @@ static int kbase_api_soft_event_update(struct kbase_context *kctx, #endif /* !MALI_USE_CSF */ static int kbase_api_sticky_resource_map(struct kbase_context *kctx, - struct kbase_ioctl_sticky_resource_map *map) + struct kbase_ioctl_sticky_resource_map *map) { int ret; u64 i; @@ -1294,8 +1395,7 @@ static int kbase_api_sticky_resource_map(struct kbase_context *kctx, if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX) return -EOVERFLOW; - ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address), - sizeof(u64) * map->count); + ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address), sizeof(u64) * map->count); if (ret != 0) return -EFAULT; @@ -1323,7 +1423,7 @@ static int kbase_api_sticky_resource_map(struct kbase_context *kctx, } static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, - struct kbase_ioctl_sticky_resource_unmap *unmap) + struct kbase_ioctl_sticky_resource_unmap *unmap) { int ret; u64 i; @@ -1332,8 +1432,7 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX) return -EOVERFLOW; - ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address), - sizeof(u64) * unmap->count); + ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address), sizeof(u64) * unmap->count); if (ret != 0) return -EFAULT; @@ -1355,11 +1454,10 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, #if MALI_UNIT_TEST static int kbase_api_tlstream_stats(struct kbase_context *kctx, - struct kbase_ioctl_tlstream_stats *stats) + struct kbase_ioctl_tlstream_stats *stats) { - kbase_timeline_stats(kctx->kbdev->timeline, - &stats->bytes_collected, - &stats->bytes_generated); + kbase_timeline_stats(kctx->kbdev->timeline, &stats->bytes_collected, + &stats->bytes_generated); return 0; } @@ -1373,7 +1471,7 @@ static int kbasep_cs_event_signal(struct kbase_context *kctx) } static int kbasep_cs_queue_register(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_register *reg) + struct kbase_ioctl_cs_queue_register *reg) { kctx->jit_group_id = BASE_MEM_GROUP_DEFAULT; @@ -1381,7 +1479,7 @@ static int kbasep_cs_queue_register(struct kbase_context *kctx, } static int kbasep_cs_queue_register_ex(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_register_ex *reg) + struct kbase_ioctl_cs_queue_register_ex *reg) { kctx->jit_group_id = BASE_MEM_GROUP_DEFAULT; @@ -1389,34 +1487,32 @@ static int kbasep_cs_queue_register_ex(struct kbase_context *kctx, } static int kbasep_cs_queue_terminate(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_terminate *term) + struct kbase_ioctl_cs_queue_terminate *term) { kbase_csf_queue_terminate(kctx, term); return 0; } -static int kbasep_cs_queue_bind(struct kbase_context *kctx, - union kbase_ioctl_cs_queue_bind *bind) +static int kbasep_cs_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind) { return kbase_csf_queue_bind(kctx, bind); } -static int kbasep_cs_queue_kick(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_kick *kick) +static int kbasep_cs_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick) { return kbase_csf_queue_kick(kctx, kick); } -static int kbasep_cs_queue_group_create_1_6( - struct kbase_context *kctx, - union kbase_ioctl_cs_queue_group_create_1_6 *create) +static int kbasep_cs_queue_group_create_1_6(struct kbase_context *kctx, + union kbase_ioctl_cs_queue_group_create_1_6 *create) { + int ret; + size_t i; union kbase_ioctl_cs_queue_group_create new_create = { .in = { .tiler_mask = create->in.tiler_mask, - .fragment_mask = - create->in.fragment_mask, + .fragment_mask = create->in.fragment_mask, .compute_mask = create->in.compute_mask, .cs_min = create->in.cs_min, .priority = create->in.priority, @@ -1425,47 +1521,92 @@ static int kbasep_cs_queue_group_create_1_6( .compute_max = create->in.compute_max, } }; - int ret = kbase_csf_queue_group_create(kctx, &new_create); + for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { + if (create->in.padding[i] != 0) { + dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); + return -EINVAL; + } + } + + ret = kbase_csf_queue_group_create(kctx, &new_create); create->out.group_handle = new_create.out.group_handle; create->out.group_uid = new_create.out.group_uid; return ret; } -static int kbasep_cs_queue_group_create(struct kbase_context *kctx, - union kbase_ioctl_cs_queue_group_create *create) + +static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx, + union kbase_ioctl_cs_queue_group_create_1_18 *create) { + int ret; + size_t i; + union kbase_ioctl_cs_queue_group_create + new_create = { .in = { + .tiler_mask = create->in.tiler_mask, + .fragment_mask = create->in.fragment_mask, + .compute_mask = create->in.compute_mask, + .cs_min = create->in.cs_min, + .priority = create->in.priority, + .tiler_max = create->in.tiler_max, + .fragment_max = create->in.fragment_max, + .compute_max = create->in.compute_max, + .csi_handlers = create->in.csi_handlers, + .dvs_buf = create->in.dvs_buf, + } }; + + for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { + if (create->in.padding[i] != 0) { + dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); + return -EINVAL; + } + } + + ret = kbase_csf_queue_group_create(kctx, &new_create); + + create->out.group_handle = new_create.out.group_handle; + create->out.group_uid = new_create.out.group_uid; + + return ret; +} + +static int kbasep_cs_queue_group_create(struct kbase_context *kctx, + union kbase_ioctl_cs_queue_group_create *create) +{ + if (create->in.reserved != 0) { + dev_warn(kctx->kbdev->dev, "Invalid reserved field not 0 in queue group create\n"); + return -EINVAL; + } return kbase_csf_queue_group_create(kctx, create); } static int kbasep_cs_queue_group_terminate(struct kbase_context *kctx, - struct kbase_ioctl_cs_queue_group_term *term) + struct kbase_ioctl_cs_queue_group_term *term) { kbase_csf_queue_group_terminate(kctx, term->group_handle); return 0; } -static int kbasep_kcpu_queue_new(struct kbase_context *kctx, - struct kbase_ioctl_kcpu_queue_new *new) +static int kbasep_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_new *new) { return kbase_csf_kcpu_queue_new(kctx, new); } static int kbasep_kcpu_queue_delete(struct kbase_context *kctx, - struct kbase_ioctl_kcpu_queue_delete *delete) + struct kbase_ioctl_kcpu_queue_delete *delete) { return kbase_csf_kcpu_queue_delete(kctx, delete); } static int kbasep_kcpu_queue_enqueue(struct kbase_context *kctx, - struct kbase_ioctl_kcpu_queue_enqueue *enqueue) + struct kbase_ioctl_kcpu_queue_enqueue *enqueue) { return kbase_csf_kcpu_queue_enqueue(kctx, enqueue); } static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, - union kbase_ioctl_cs_tiler_heap_init *heap_init) + union kbase_ioctl_cs_tiler_heap_init *heap_init) { if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) return -EINVAL; @@ -1495,13 +1636,13 @@ static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx, } static int kbasep_cs_tiler_heap_term(struct kbase_context *kctx, - struct kbase_ioctl_cs_tiler_heap_term *heap_term) + struct kbase_ioctl_cs_tiler_heap_term *heap_term) { return kbase_csf_tiler_heap_term(kctx, heap_term->gpu_heap_va); } static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, - union kbase_ioctl_cs_get_glb_iface *param) + union kbase_ioctl_cs_get_glb_iface *param) { struct basep_cs_stream_control *stream_data = NULL; struct basep_cs_group_control *group_data = NULL; @@ -1513,8 +1654,7 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, if (max_group_num > MAX_SUPPORTED_CSGS) return -EINVAL; - if (max_total_stream_num > - MAX_SUPPORTED_CSGS * MAX_SUPPORTED_STREAMS_PER_GROUP) + if (max_total_stream_num > MAX_SUPPORTED_CSGS * MAX_SUPPORTED_STREAMS_PER_GROUP) return -EINVAL; user_groups = u64_to_user_ptr(param->in.groups_ptr); @@ -1524,8 +1664,7 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, if (!user_groups) err = -EINVAL; else { - group_data = kcalloc(max_group_num, - sizeof(*group_data), GFP_KERNEL); + group_data = kcalloc(max_group_num, sizeof(*group_data), GFP_KERNEL); if (!group_data) err = -ENOMEM; } @@ -1535,8 +1674,8 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, if (!user_streams) err = -EINVAL; else { - stream_data = kcalloc(max_total_stream_num, - sizeof(*stream_data), GFP_KERNEL); + stream_data = + kcalloc(max_total_stream_num, sizeof(*stream_data), GFP_KERNEL); if (!stream_data) err = -ENOMEM; } @@ -1544,21 +1683,19 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, if (!err) { param->out.total_stream_num = kbase_csf_firmware_get_glb_iface( - kctx->kbdev, group_data, max_group_num, stream_data, - max_total_stream_num, ¶m->out.glb_version, - ¶m->out.features, ¶m->out.group_num, + kctx->kbdev, group_data, max_group_num, stream_data, max_total_stream_num, + ¶m->out.glb_version, ¶m->out.features, ¶m->out.group_num, ¶m->out.prfcnt_size, ¶m->out.instr_features); if (copy_to_user(user_groups, group_data, - MIN(max_group_num, param->out.group_num) * - sizeof(*group_data))) + MIN(max_group_num, param->out.group_num) * sizeof(*group_data))) err = -EFAULT; } if (!err) if (copy_to_user(user_streams, stream_data, - MIN(max_total_stream_num, param->out.total_stream_num) * - sizeof(*stream_data))) + MIN(max_total_stream_num, param->out.total_stream_num) * + sizeof(*stream_data))) err = -EFAULT; kfree(group_data); @@ -1567,10 +1704,9 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, } static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx, - struct kbase_ioctl_cs_cpu_queue_info *cpu_queue_info) + struct kbase_ioctl_cs_cpu_queue_info *cpu_queue_info) { - return kbase_csf_cpu_queue_dump(kctx, cpu_queue_info->buffer, - cpu_queue_info->size); + return kbase_csf_cpu_queue_dump_buffer(kctx, cpu_queue_info->buffer, cpu_queue_info->size); } static int kbase_ioctl_read_user_page(struct kbase_context *kctx, @@ -1591,7 +1727,7 @@ static int kbase_ioctl_read_user_page(struct kbase_context *kctx, if (!kbdev->pm.backend.gpu_powered) user_page->out.val_lo = POWER_DOWN_LATEST_FLUSH_VALUE; else - user_page->out.val_lo = kbase_reg_read(kbdev, USER_REG(LATEST_FLUSH)); + user_page->out.val_lo = kbase_reg_read32(kbdev, USER_ENUM(LATEST_FLUSH)); user_page->out.val_hi = 0; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -1599,8 +1735,9 @@ static int kbase_ioctl_read_user_page(struct kbase_context *kctx, } #endif /* MALI_USE_CSF */ -static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx, - struct kbase_ioctl_context_priority_check *priority_check) +static int +kbasep_ioctl_context_priority_check(struct kbase_context *kctx, + struct kbase_ioctl_context_priority_check *priority_check) { #if MALI_USE_CSF priority_check->priority = kbase_csf_priority_check(kctx->kbdev, priority_check->priority); @@ -1612,76 +1749,71 @@ static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx, return 0; } -#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ - do { \ - int ret; \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ - dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ - ret = function(arg); \ - dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ - #function); \ - return ret; \ +#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ + do { \ + int ret; \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + ret = function(arg); \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ + return ret; \ } while (0) -#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ - do { \ - type param; \ - int ret, err; \ - dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - err = copy_from_user(¶m, uarg, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - ret = function(arg, ¶m); \ - dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ - #function); \ - return ret; \ +#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + err = copy_from_user(¶m, uarg, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + ret = function(arg, ¶m); \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ + return ret; \ } while (0) -#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ - do { \ - type param; \ - int ret, err; \ - dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - memset(¶m, 0, sizeof(param)); \ - ret = function(arg, ¶m); \ - err = copy_to_user(uarg, ¶m, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ - #function); \ - return ret; \ +#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + memset(¶m, 0, sizeof(param)); \ + ret = function(arg, ¶m); \ + err = copy_to_user(uarg, ¶m, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ + return ret; \ } while (0) -#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ - do { \ - type param; \ - int ret, err; \ - dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ - BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ)); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - err = copy_from_user(¶m, uarg, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - ret = function(arg, ¶m); \ - err = copy_to_user(uarg, ¶m, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ - #function); \ - return ret; \ +#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ)); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + err = copy_from_user(¶m, uarg, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + ret = function(arg, ¶m); \ + err = copy_to_user(uarg, ¶m, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \ + return ret; \ } while (0) -static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx, - struct kbase_ioctl_set_limited_core_count *set_limited_core_count) +static int kbasep_ioctl_set_limited_core_count( + struct kbase_context *kctx, + struct kbase_ioctl_set_limited_core_count *set_limited_core_count) { - const u64 shader_core_mask = - kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER); - const u64 limited_core_mask = - ((u64)1 << (set_limited_core_count->max_core_count)) - 1; + const u64 shader_core_mask = kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER); + const u64 limited_core_mask = ((u64)1 << (set_limited_core_count->max_core_count)) - 1; if ((shader_core_mask & limited_core_mask) == 0) { /* At least one shader core must be available after applying the mask */ @@ -1692,9 +1824,8 @@ static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx, return 0; } -static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsigned long arg) { - struct kbase_file *const kfile = filp->private_data; struct kbase_context *kctx = NULL; struct kbase_device *kbdev = kfile->kbdev; void __user *uarg = (void __user *)arg; @@ -1702,38 +1833,31 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /* Only these ioctls are available until setup is complete */ switch (cmd) { case KBASE_IOCTL_VERSION_CHECK: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK, - kbase_api_handshake, - struct kbase_ioctl_version_check, - kfile); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK, kbase_api_handshake, + struct kbase_ioctl_version_check, kfile); break; case KBASE_IOCTL_VERSION_CHECK_RESERVED: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK_RESERVED, - kbase_api_handshake_dummy, - struct kbase_ioctl_version_check, - kfile); + kbase_api_handshake_dummy, + struct kbase_ioctl_version_check, kfile); break; case KBASE_IOCTL_SET_FLAGS: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, - kbase_api_set_flags, - struct kbase_ioctl_set_flags, - kfile); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, kbase_api_set_flags, + struct kbase_ioctl_set_flags, kfile); break; case KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO: - KBASE_HANDLE_IOCTL_INOUT( - KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO, - kbase_api_kinstr_prfcnt_enum_info, - struct kbase_ioctl_kinstr_prfcnt_enum_info, kfile); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO, + kbase_api_kinstr_prfcnt_enum_info, + struct kbase_ioctl_kinstr_prfcnt_enum_info, kfile); break; case KBASE_IOCTL_KINSTR_PRFCNT_SETUP: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_PRFCNT_SETUP, kbase_api_kinstr_prfcnt_setup, - union kbase_ioctl_kinstr_prfcnt_setup, - kfile); + union kbase_ioctl_kinstr_prfcnt_setup, kfile); break; case KBASE_IOCTL_GET_GPUPROPS: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, kbase_api_get_gpuprops, @@ -1749,24 +1873,18 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { #if !MALI_USE_CSF case KBASE_IOCTL_JOB_SUBMIT: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, - kbase_api_job_submit, - struct kbase_ioctl_job_submit, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, kbase_api_job_submit, + struct kbase_ioctl_job_submit, kctx); break; #endif /* !MALI_USE_CSF */ #if !MALI_USE_CSF case KBASE_IOCTL_POST_TERM: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, - kbase_api_post_term, - kctx); + KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, kbase_api_post_term, kctx); break; #endif /* !MALI_USE_CSF */ case KBASE_IOCTL_MEM_ALLOC: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, - kbase_api_mem_alloc, - union kbase_ioctl_mem_alloc, - kctx); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, kbase_api_mem_alloc, + union kbase_ioctl_mem_alloc, kctx); break; #if MALI_USE_CSF case KBASE_IOCTL_MEM_ALLOC_EX: @@ -1775,265 +1893,190 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; #endif case KBASE_IOCTL_MEM_QUERY: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY, - kbase_api_mem_query, - union kbase_ioctl_mem_query, - kctx); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY, kbase_api_mem_query, + union kbase_ioctl_mem_query, kctx); break; case KBASE_IOCTL_MEM_FREE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE, - kbase_api_mem_free, - struct kbase_ioctl_mem_free, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE, kbase_api_mem_free, + struct kbase_ioctl_mem_free, kctx); break; case KBASE_IOCTL_DISJOINT_QUERY: - KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY, - kbase_api_disjoint_query, - struct kbase_ioctl_disjoint_query, - kctx); + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY, kbase_api_disjoint_query, + struct kbase_ioctl_disjoint_query, kctx); break; case KBASE_IOCTL_GET_DDK_VERSION: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION, - kbase_api_get_ddk_version, - struct kbase_ioctl_get_ddk_version, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION, kbase_api_get_ddk_version, + struct kbase_ioctl_get_ddk_version, kctx); break; case KBASE_IOCTL_MEM_JIT_INIT: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, - kbase_api_mem_jit_init, - struct kbase_ioctl_mem_jit_init, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, kbase_api_mem_jit_init, + struct kbase_ioctl_mem_jit_init, kctx); break; case KBASE_IOCTL_MEM_EXEC_INIT: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT, - kbase_api_mem_exec_init, - struct kbase_ioctl_mem_exec_init, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT, kbase_api_mem_exec_init, + struct kbase_ioctl_mem_exec_init, kctx); break; case KBASE_IOCTL_MEM_SYNC: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, - kbase_api_mem_sync, - struct kbase_ioctl_mem_sync, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, kbase_api_mem_sync, + struct kbase_ioctl_mem_sync, kctx); break; case KBASE_IOCTL_MEM_FIND_CPU_OFFSET: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET, - kbase_api_mem_find_cpu_offset, - union kbase_ioctl_mem_find_cpu_offset, - kctx); + kbase_api_mem_find_cpu_offset, + union kbase_ioctl_mem_find_cpu_offset, kctx); break; case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET, - kbase_api_mem_find_gpu_start_and_offset, - union kbase_ioctl_mem_find_gpu_start_and_offset, - kctx); + kbase_api_mem_find_gpu_start_and_offset, + union kbase_ioctl_mem_find_gpu_start_and_offset, kctx); break; case KBASE_IOCTL_GET_CONTEXT_ID: - KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID, - kbase_api_get_context_id, - struct kbase_ioctl_get_context_id, - kctx); + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID, kbase_api_get_context_id, + struct kbase_ioctl_get_context_id, kctx); break; case KBASE_IOCTL_TLSTREAM_ACQUIRE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE, - kbase_api_tlstream_acquire, - struct kbase_ioctl_tlstream_acquire, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE, kbase_api_tlstream_acquire, + struct kbase_ioctl_tlstream_acquire, kctx); break; case KBASE_IOCTL_TLSTREAM_FLUSH: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH, - kbase_api_tlstream_flush, - kctx); + KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH, kbase_api_tlstream_flush, kctx); break; case KBASE_IOCTL_MEM_COMMIT: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT, - kbase_api_mem_commit, - struct kbase_ioctl_mem_commit, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT, kbase_api_mem_commit, + struct kbase_ioctl_mem_commit, kctx); break; case KBASE_IOCTL_MEM_ALIAS: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS, - kbase_api_mem_alias, - union kbase_ioctl_mem_alias, - kctx); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS, kbase_api_mem_alias, + union kbase_ioctl_mem_alias, kctx); break; case KBASE_IOCTL_MEM_IMPORT: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT, - kbase_api_mem_import, - union kbase_ioctl_mem_import, - kctx); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT, kbase_api_mem_import, + union kbase_ioctl_mem_import, kctx); break; case KBASE_IOCTL_MEM_FLAGS_CHANGE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE, - kbase_api_mem_flags_change, - struct kbase_ioctl_mem_flags_change, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE, kbase_api_mem_flags_change, + struct kbase_ioctl_mem_flags_change, kctx); break; case KBASE_IOCTL_STREAM_CREATE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE, - kbase_api_stream_create, - struct kbase_ioctl_stream_create, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE, kbase_api_stream_create, + struct kbase_ioctl_stream_create, kctx); break; case KBASE_IOCTL_FENCE_VALIDATE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE, - kbase_api_fence_validate, - struct kbase_ioctl_fence_validate, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE, kbase_api_fence_validate, + struct kbase_ioctl_fence_validate, kctx); break; case KBASE_IOCTL_MEM_PROFILE_ADD: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, - kbase_api_mem_profile_add, - struct kbase_ioctl_mem_profile_add, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, kbase_api_mem_profile_add, + struct kbase_ioctl_mem_profile_add, kctx); break; #if !MALI_USE_CSF case KBASE_IOCTL_SOFT_EVENT_UPDATE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, - kbase_api_soft_event_update, - struct kbase_ioctl_soft_event_update, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, kbase_api_soft_event_update, + struct kbase_ioctl_soft_event_update, kctx); break; #endif /* !MALI_USE_CSF */ case KBASE_IOCTL_STICKY_RESOURCE_MAP: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, - kbase_api_sticky_resource_map, - struct kbase_ioctl_sticky_resource_map, - kctx); + kbase_api_sticky_resource_map, + struct kbase_ioctl_sticky_resource_map, kctx); break; case KBASE_IOCTL_STICKY_RESOURCE_UNMAP: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP, - kbase_api_sticky_resource_unmap, - struct kbase_ioctl_sticky_resource_unmap, - kctx); + kbase_api_sticky_resource_unmap, + struct kbase_ioctl_sticky_resource_unmap, kctx); break; - /* Instrumentation. */ + /* Instrumentation. */ #if !MALI_USE_CSF case KBASE_IOCTL_KINSTR_JM_FD: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_JM_FD, - kbase_api_kinstr_jm_fd, - union kbase_kinstr_jm_fd, - kctx); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_JM_FD, kbase_api_kinstr_jm_fd, + union kbase_kinstr_jm_fd, kctx); break; #endif - case KBASE_IOCTL_HWCNT_READER_SETUP: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, - kbase_api_hwcnt_reader_setup, - struct kbase_ioctl_hwcnt_reader_setup, - kctx); - break; case KBASE_IOCTL_GET_CPU_GPU_TIMEINFO: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_GET_CPU_GPU_TIMEINFO, - kbase_api_get_cpu_gpu_timeinfo, - union kbase_ioctl_get_cpu_gpu_timeinfo, - kctx); + kbase_api_get_cpu_gpu_timeinfo, + union kbase_ioctl_get_cpu_gpu_timeinfo, kctx); break; #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) case KBASE_IOCTL_HWCNT_SET: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, - kbase_api_hwcnt_set, - struct kbase_ioctl_hwcnt_values, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, kbase_api_hwcnt_set, + struct kbase_ioctl_hwcnt_values, kctx); break; #endif /* CONFIG_MALI_BIFROST_NO_MALI */ #ifdef CONFIG_MALI_CINSTR_GWT case KBASE_IOCTL_CINSTR_GWT_START: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, - kbase_gpu_gwt_start, - kctx); + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, kbase_gpu_gwt_start, kctx); break; case KBASE_IOCTL_CINSTR_GWT_STOP: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP, - kbase_gpu_gwt_stop, - kctx); + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP, kbase_gpu_gwt_stop, kctx); break; case KBASE_IOCTL_CINSTR_GWT_DUMP: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP, - kbase_gpu_gwt_dump, - union kbase_ioctl_cinstr_gwt_dump, - kctx); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP, kbase_gpu_gwt_dump, + union kbase_ioctl_cinstr_gwt_dump, kctx); break; #endif #if MALI_USE_CSF case KBASE_IOCTL_CS_EVENT_SIGNAL: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_CS_EVENT_SIGNAL, - kbasep_cs_event_signal, - kctx); + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CS_EVENT_SIGNAL, kbasep_cs_event_signal, kctx); break; case KBASE_IOCTL_CS_QUEUE_REGISTER: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER, - kbasep_cs_queue_register, - struct kbase_ioctl_cs_queue_register, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER, kbasep_cs_queue_register, + struct kbase_ioctl_cs_queue_register, kctx); break; case KBASE_IOCTL_CS_QUEUE_REGISTER_EX: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER_EX, - kbasep_cs_queue_register_ex, - struct kbase_ioctl_cs_queue_register_ex, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER_EX, kbasep_cs_queue_register_ex, + struct kbase_ioctl_cs_queue_register_ex, kctx); break; case KBASE_IOCTL_CS_QUEUE_TERMINATE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_TERMINATE, - kbasep_cs_queue_terminate, - struct kbase_ioctl_cs_queue_terminate, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_TERMINATE, kbasep_cs_queue_terminate, + struct kbase_ioctl_cs_queue_terminate, kctx); break; case KBASE_IOCTL_CS_QUEUE_BIND: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_BIND, - kbasep_cs_queue_bind, - union kbase_ioctl_cs_queue_bind, - kctx); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_BIND, kbasep_cs_queue_bind, + union kbase_ioctl_cs_queue_bind, kctx); break; case KBASE_IOCTL_CS_QUEUE_KICK: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_KICK, - kbasep_cs_queue_kick, - struct kbase_ioctl_cs_queue_kick, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_KICK, kbasep_cs_queue_kick, + struct kbase_ioctl_cs_queue_kick, kctx); break; case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6: - KBASE_HANDLE_IOCTL_INOUT( - KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6, - kbasep_cs_queue_group_create_1_6, - union kbase_ioctl_cs_queue_group_create_1_6, kctx); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6, + kbasep_cs_queue_group_create_1_6, + union kbase_ioctl_cs_queue_group_create_1_6, kctx); + break; + case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18, + kbasep_cs_queue_group_create_1_18, + union kbase_ioctl_cs_queue_group_create_1_18, kctx); break; case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE, - kbasep_cs_queue_group_create, - union kbase_ioctl_cs_queue_group_create, - kctx); + kbasep_cs_queue_group_create, + union kbase_ioctl_cs_queue_group_create, kctx); break; case KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE, - kbasep_cs_queue_group_terminate, - struct kbase_ioctl_cs_queue_group_term, - kctx); + kbasep_cs_queue_group_terminate, + struct kbase_ioctl_cs_queue_group_term, kctx); break; case KBASE_IOCTL_KCPU_QUEUE_CREATE: - KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_KCPU_QUEUE_CREATE, - kbasep_kcpu_queue_new, - struct kbase_ioctl_kcpu_queue_new, - kctx); + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_KCPU_QUEUE_CREATE, kbasep_kcpu_queue_new, + struct kbase_ioctl_kcpu_queue_new, kctx); break; case KBASE_IOCTL_KCPU_QUEUE_DELETE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_DELETE, - kbasep_kcpu_queue_delete, - struct kbase_ioctl_kcpu_queue_delete, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_DELETE, kbasep_kcpu_queue_delete, + struct kbase_ioctl_kcpu_queue_delete, kctx); break; case KBASE_IOCTL_KCPU_QUEUE_ENQUEUE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, - kbasep_kcpu_queue_enqueue, - struct kbase_ioctl_kcpu_queue_enqueue, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, kbasep_kcpu_queue_enqueue, + struct kbase_ioctl_kcpu_queue_enqueue, kctx); break; case KBASE_IOCTL_CS_TILER_HEAP_INIT: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, - kbasep_cs_tiler_heap_init, - union kbase_ioctl_cs_tiler_heap_init, - kctx); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, kbasep_cs_tiler_heap_init, + union kbase_ioctl_cs_tiler_heap_init, kctx); break; case KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13, @@ -2041,22 +2084,16 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) union kbase_ioctl_cs_tiler_heap_init_1_13, kctx); break; case KBASE_IOCTL_CS_TILER_HEAP_TERM: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_TILER_HEAP_TERM, - kbasep_cs_tiler_heap_term, - struct kbase_ioctl_cs_tiler_heap_term, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_TILER_HEAP_TERM, kbasep_cs_tiler_heap_term, + struct kbase_ioctl_cs_tiler_heap_term, kctx); break; case KBASE_IOCTL_CS_GET_GLB_IFACE: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_GET_GLB_IFACE, - kbase_ioctl_cs_get_glb_iface, - union kbase_ioctl_cs_get_glb_iface, - kctx); + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_GET_GLB_IFACE, kbase_ioctl_cs_get_glb_iface, + union kbase_ioctl_cs_get_glb_iface, kctx); break; case KBASE_IOCTL_CS_CPU_QUEUE_DUMP: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_CPU_QUEUE_DUMP, - kbasep_ioctl_cs_cpu_queue_dump, - struct kbase_ioctl_cs_cpu_queue_info, - kctx); + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_CPU_QUEUE_DUMP, kbasep_ioctl_cs_cpu_queue_dump, + struct kbase_ioctl_cs_cpu_queue_info, kctx); break; /* This IOCTL will be kept for backward compatibility */ case KBASE_IOCTL_READ_USER_PAGE: @@ -2066,23 +2103,19 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) #endif /* MALI_USE_CSF */ #if MALI_UNIT_TEST case KBASE_IOCTL_TLSTREAM_STATS: - KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, - kbase_api_tlstream_stats, - struct kbase_ioctl_tlstream_stats, - kctx); + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, kbase_api_tlstream_stats, + struct kbase_ioctl_tlstream_stats, kctx); break; #endif /* MALI_UNIT_TEST */ case KBASE_IOCTL_CONTEXT_PRIORITY_CHECK: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CONTEXT_PRIORITY_CHECK, - kbasep_ioctl_context_priority_check, - struct kbase_ioctl_context_priority_check, - kctx); + kbasep_ioctl_context_priority_check, + struct kbase_ioctl_context_priority_check, kctx); break; case KBASE_IOCTL_SET_LIMITED_CORE_COUNT: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_LIMITED_CORE_COUNT, - kbasep_ioctl_set_limited_core_count, - struct kbase_ioctl_set_limited_core_count, - kctx); + kbasep_ioctl_set_limited_core_count, + struct kbase_ioctl_set_limited_core_count, kctx); break; } @@ -2091,22 +2124,45 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } +static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct kbase_file *const kfile = filp->private_data; + long ioctl_ret; + + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) + return -EPERM; + + ioctl_ret = kbase_kfile_ioctl(kfile, cmd, arg); + kbase_file_dec_fops_count(kfile); + + return ioctl_ret; +} + #if MALI_USE_CSF static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *const kctx = - kbase_file_get_kctx_if_setup_complete(kfile); - struct base_csf_notification event_data = { - .type = BASE_CSF_NOTIFICATION_EVENT }; + struct kbase_context *kctx; + struct base_csf_notification event_data = { .type = BASE_CSF_NOTIFICATION_EVENT }; const size_t data_size = sizeof(event_data); bool read_event = false, read_error = false; + ssize_t err = 0; - if (unlikely(!kctx)) + CSTD_UNUSED(f_pos); + + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) return -EPERM; - if (count < data_size) - return -ENOBUFS; + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (unlikely(!kctx)) { + err = -EPERM; + goto out; + } + + if (count < data_size) { + err = -ENOBUFS; + goto out; + } if (atomic_read(&kctx->event_count)) read_event = true; @@ -2114,8 +2170,7 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof read_error = kbase_csf_event_read_error(kctx, &event_data); if (!read_event && !read_error) { - bool dump = kbase_csf_cpu_queue_read_dump_req(kctx, - &event_data); + bool dump = kbase_csf_cpu_queue_read_dump_req(kctx, &event_data); /* This condition is not treated as an error. * It is possible that event handling thread was woken up due * to a fault/error that occurred for a queue group, but before @@ -2123,35 +2178,46 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof * queue group was already terminated by the userspace. */ if (!dump) - dev_dbg(kctx->kbdev->dev, - "Neither event nor error signaled"); + dev_dbg(kctx->kbdev->dev, "Neither event nor error signaled"); } if (copy_to_user(buf, &event_data, data_size) != 0) { - dev_warn(kctx->kbdev->dev, - "Failed to copy data\n"); - return -EFAULT; + dev_warn(kctx->kbdev->dev, "Failed to copy data\n"); + err = -EFAULT; + goto out; } if (read_event) atomic_set(&kctx->event_count, 0); - return data_size; +out: + kbase_file_dec_fops_count(kfile); + return err ? err : (ssize_t)data_size; } #else /* MALI_USE_CSF */ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *const kctx = - kbase_file_get_kctx_if_setup_complete(kfile); + struct kbase_context *kctx; struct base_jd_event_v2 uevent; int out_count = 0; + ssize_t err = 0; - if (unlikely(!kctx)) + CSTD_UNUSED(f_pos); + + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) return -EPERM; - if (count < sizeof(uevent)) - return -ENOBUFS; + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (unlikely(!kctx)) { + err = -EPERM; + goto out; + } + + if (count < sizeof(uevent)) { + err = -ENOBUFS; + goto out; + } memset(&uevent, 0, sizeof(uevent)); @@ -2160,90 +2226,113 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof if (out_count > 0) goto out; - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; + if (filp->f_flags & O_NONBLOCK) { + err = -EAGAIN; + goto out; + } - if (wait_event_interruptible(kctx->event_queue, - kbase_event_pending(kctx)) != 0) - return -ERESTARTSYS; + if (wait_event_interruptible(kfile->event_queue, + kbase_event_pending(kctx)) != 0) { + err = -ERESTARTSYS; + goto out; + } } if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { - if (out_count == 0) - return -EPIPE; + if (out_count == 0) { + err = -EPIPE; + goto out; + } goto out; } - if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) - return -EFAULT; + if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) { + err = -EFAULT; + goto out; + } buf += sizeof(uevent); out_count++; count -= sizeof(uevent); } while (count >= sizeof(uevent)); - out: - return out_count * sizeof(uevent); +out: + kbase_file_dec_fops_count(kfile); + return err ? err : (ssize_t)(out_count * sizeof(uevent)); } #endif /* MALI_USE_CSF */ static __poll_t kbase_poll(struct file *filp, poll_table *wait) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *const kctx = - kbase_file_get_kctx_if_setup_complete(kfile); + struct kbase_context *kctx; + __poll_t ret = 0; + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) { +#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) + ret = POLLNVAL; +#else + ret = EPOLLNVAL; +#endif + return ret; + } + + kctx = kbase_file_get_kctx_if_setup_complete(kfile); if (unlikely(!kctx)) { #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - return POLLERR; + ret = POLLERR; #else - return EPOLLERR; + ret = EPOLLERR; #endif + goto out; } - poll_wait(filp, &kctx->event_queue, wait); + poll_wait(filp, &kfile->event_queue, wait); if (kbase_event_pending(kctx)) { #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - return POLLIN | POLLRDNORM; + ret = POLLIN | POLLRDNORM; #else - return EPOLLIN | EPOLLRDNORM; + ret = EPOLLIN | EPOLLRDNORM; #endif } - return 0; +out: + kbase_file_dec_fops_count(kfile); + return ret; } void kbase_event_wakeup(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx); - dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", - (void *)kctx); - wake_up_interruptible(&kctx->event_queue); + dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", (void *)kctx); +#ifdef CONFIG_MALI_BIFROST_DEBUG + if (WARN_ON_ONCE(!kctx->kfile)) + return; +#endif + wake_up_interruptible(&kctx->kfile->event_queue); } KBASE_EXPORT_TEST_API(kbase_event_wakeup); #if MALI_USE_CSF -int kbase_event_pending(struct kbase_context *ctx) +int kbase_event_pending(struct kbase_context *kctx) { - KBASE_DEBUG_ASSERT(ctx); + KBASE_DEBUG_ASSERT(kctx); - if (unlikely(!ctx)) + if (unlikely(!kctx)) return -EPERM; - return (atomic_read(&ctx->event_count) != 0) || - kbase_csf_event_error_pending(ctx) || - kbase_csf_cpu_queue_dump_needed(ctx); + return (atomic_read(&kctx->event_count) != 0) || kbase_csf_event_error_pending(kctx) || + kbase_csf_cpu_queue_dump_needed(kctx); } #else -int kbase_event_pending(struct kbase_context *ctx) +int kbase_event_pending(struct kbase_context *kctx) { - KBASE_DEBUG_ASSERT(ctx); + KBASE_DEBUG_ASSERT(kctx); - if (unlikely(!ctx)) + if (unlikely(!kctx)) return -EPERM; - return (atomic_read(&ctx->event_count) != 0) || - (atomic_read(&ctx->event_closed) != 0); + return (atomic_read(&kctx->event_count) != 0) || (atomic_read(&kctx->event_closed) != 0); } #endif @@ -2252,13 +2341,20 @@ KBASE_EXPORT_TEST_API(kbase_event_pending); static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *const kctx = - kbase_file_get_kctx_if_setup_complete(kfile); + struct kbase_context *kctx; + int ret; - if (unlikely(!kctx)) + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) return -EPERM; - return kbase_context_mmap(kctx, vma); + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (likely(kctx)) + ret = kbase_context_mmap(kctx, vma); + else + ret = -EPERM; + + kbase_file_dec_fops_count(kfile); + return ret; } static int kbase_check_flags(int flags) @@ -2272,23 +2368,31 @@ static int kbase_check_flags(int flags) return 0; } -static unsigned long kbase_get_unmapped_area(struct file *const filp, - const unsigned long addr, const unsigned long len, - const unsigned long pgoff, const unsigned long flags) +static unsigned long kbase_get_unmapped_area(struct file *const filp, const unsigned long addr, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *const kctx = - kbase_file_get_kctx_if_setup_complete(kfile); + struct kbase_context *kctx; + unsigned long address; - if (unlikely(!kctx)) + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) return -EPERM; - return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (likely(kctx)) + address = kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); + else + address = -EPERM; + + kbase_file_dec_fops_count(kfile); + return address; } static const struct file_operations kbase_fops = { .owner = THIS_MODULE, .open = kbase_open, + .flush = kbase_flush, .release = kbase_release, .read = kbase_read, .poll = kbase_poll, @@ -2317,10 +2421,12 @@ static ssize_t power_policy_show(struct device *dev, struct device_attribute *at struct kbase_device *kbdev; const struct kbase_pm_policy *current_policy; const struct kbase_pm_policy *const *policy_list; - int policy_count; - int i; + uint policy_count; + uint i; ssize_t ret = 0; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) @@ -2328,16 +2434,16 @@ static ssize_t power_policy_show(struct device *dev, struct device_attribute *at current_policy = kbase_pm_get_policy(kbdev); - policy_count = kbase_pm_list_policies(kbdev, &policy_list); + policy_count = (uint)kbase_pm_list_policies(kbdev, &policy_list); - for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { + for (i = 0; i < policy_count && ret < (ssize_t)PAGE_SIZE; i++) { if (policy_list[i] == current_policy) ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); else ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); } - if (ret < PAGE_SIZE - 1) { + if (ret < (ssize_t)PAGE_SIZE - 1) { ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); } else { buf[PAGE_SIZE - 2] = '\n'; @@ -2363,20 +2469,23 @@ static ssize_t power_policy_show(struct device *dev, struct device_attribute *at * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t power_policy_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +static ssize_t power_policy_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *kbdev; const struct kbase_pm_policy *new_policy = NULL; const struct kbase_pm_policy *const *policy_list; - int policy_count; - int i; + uint policy_count; + uint i; + + CSTD_UNUSED(attr); kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - policy_count = kbase_pm_list_policies(kbdev, &policy_list); + policy_count = (uint)kbase_pm_list_policies(kbdev, &policy_list); for (i = 0; i < policy_count; i++) { if (sysfs_streq(policy_list[i]->name, buf)) { @@ -2415,12 +2524,14 @@ static DEVICE_ATTR_RW(power_policy); * * Return: The number of bytes output to @buf. */ -static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, char * const buf) +static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, char *const buf) { struct kbase_device *kbdev; unsigned long flags; ssize_t ret = 0; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) @@ -2429,30 +2540,23 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); #if MALI_USE_CSF - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Current debug core mask : 0x%llX\n", + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current debug core mask : 0x%llX\n", kbdev->pm.debug_core_mask); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Current desired core mask : 0x%llX\n", + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current desired core mask : 0x%llX\n", kbase_pm_ca_get_core_mask(kbdev)); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Current in use core mask : 0x%llX\n", + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current in use core mask : 0x%llX\n", kbdev->pm.backend.shaders_avail); #else - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Current core mask (JS0) : 0x%llX\n", - kbdev->pm.debug_core_mask[0]); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Current core mask (JS1) : 0x%llX\n", - kbdev->pm.debug_core_mask[1]); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Current core mask (JS2) : 0x%llX\n", - kbdev->pm.debug_core_mask[2]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask (JS0) : 0x%llX\n", + kbdev->pm.debug_core_mask[0]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask (JS1) : 0x%llX\n", + kbdev->pm.debug_core_mask[1]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask (JS2) : 0x%llX\n", + kbdev->pm.debug_core_mask[2]); #endif /* MALI_USE_CSF */ - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Available core mask : 0x%llX\n", - kbdev->gpu_props.props.raw_props.shader_present); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Available core mask : 0x%llX\n", + kbdev->gpu_props.shader_present); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -2471,14 +2575,15 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) { struct kbase_device *kbdev; #if MALI_USE_CSF u64 new_core_mask; #else u64 new_core_mask[3]; - u64 group0_core_mask; + u64 group_core_mask; int i; #endif /* MALI_USE_CSF */ @@ -2487,6 +2592,8 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr unsigned long flags; u64 shader_present; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) @@ -2496,21 +2603,19 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr items = sscanf(buf, "%llx", &new_core_mask); if (items != 1) { - dev_err(kbdev->dev, - "Couldn't process core mask write operation.\n" - "Use format \n"); + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" + "Use format \n"); err = -EINVAL; goto end; } #else - items = sscanf(buf, "%llx %llx %llx", - &new_core_mask[0], &new_core_mask[1], - &new_core_mask[2]); + items = sscanf(buf, "%llx %llx %llx", &new_core_mask[0], &new_core_mask[1], + &new_core_mask[2]); if (items != 1 && items != 3) { dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" - "Use format \n" - "or \n"); + "Use format \n" + "or \n"); err = -EINVAL; goto end; } @@ -2522,7 +2627,7 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr mutex_lock(&kbdev->pm.lock); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - shader_present = kbdev->gpu_props.props.raw_props.shader_present; + shader_present = kbdev->gpu_props.shader_present; #if MALI_USE_CSF if ((new_core_mask & shader_present) != new_core_mask) { @@ -2532,12 +2637,10 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr err = -EINVAL; goto unlock; - } else if (!(new_core_mask & shader_present & - kbdev->pm.backend.ca_cores_enabled)) { + } else if (!(new_core_mask & shader_present & kbdev->pm.backend.ca_cores_enabled)) { dev_err(dev, "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", - new_core_mask, - kbdev->gpu_props.props.raw_props.shader_present, + new_core_mask, kbdev->gpu_props.shader_present, kbdev->pm.backend.ca_cores_enabled); err = -EINVAL; goto unlock; @@ -2546,44 +2649,44 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr if (kbdev->pm.debug_core_mask != new_core_mask) kbase_pm_set_debug_core_mask(kbdev, new_core_mask); #else - group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask; for (i = 0; i < 3; ++i) { if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { - dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", - new_core_mask[i], i, shader_present); + dev_err(dev, + "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", + new_core_mask[i], i, shader_present); err = -EINVAL; goto unlock; - } else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) { - dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", - new_core_mask[i], i, - kbdev->gpu_props.props.raw_props.shader_present, - kbdev->pm.backend.ca_cores_enabled); + } else if (!(new_core_mask[i] & shader_present & + kbdev->pm.backend.ca_cores_enabled)) { + dev_err(dev, + "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", + new_core_mask[i], i, kbdev->gpu_props.shader_present, + kbdev->pm.backend.ca_cores_enabled); err = -EINVAL; goto unlock; - - } else if (!(new_core_mask[i] & group0_core_mask)) { - dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", - new_core_mask[i], i, group0_core_mask); + } else if (!(new_core_mask[i] & group_core_mask)) { + dev_err(dev, + "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", + new_core_mask[i], i, group_core_mask); err = -EINVAL; goto unlock; } else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) { - dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n", - new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); + dev_err(dev, + "Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n", + new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); err = -EINVAL; goto unlock; } } if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || - kbdev->pm.debug_core_mask[1] != - new_core_mask[1] || - kbdev->pm.debug_core_mask[2] != - new_core_mask[2]) { - - kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], - new_core_mask[1], new_core_mask[2]); + kbdev->pm.debug_core_mask[1] != new_core_mask[1] || + kbdev->pm.debug_core_mask[2] != new_core_mask[2]) { + kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], new_core_mask[1], + new_core_mask[2]); } #endif /* MALI_USE_CSF */ @@ -2621,23 +2724,22 @@ static DEVICE_ATTR_RW(core_mask); * * Return: count if the function succeeded. An error code on failure. */ -static ssize_t soft_job_timeout_store(struct device *dev, - struct device_attribute *attr, +static ssize_t soft_job_timeout_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kbase_device *kbdev; int soft_job_timeout_ms; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || - (soft_job_timeout_ms <= 0)) + if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || (soft_job_timeout_ms <= 0)) return -EINVAL; - atomic_set(&kbdev->js_data.soft_job_timeout_ms, - soft_job_timeout_ms); + atomic_set(&kbdev->js_data.soft_job_timeout_ms, soft_job_timeout_ms); return count; } @@ -2654,24 +2756,24 @@ static ssize_t soft_job_timeout_store(struct device *dev, * * Return: The number of bytes output to buf. */ -static ssize_t soft_job_timeout_show(struct device *dev, - struct device_attribute *attr, - char * const buf) +static ssize_t soft_job_timeout_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *kbdev; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - return scnprintf(buf, PAGE_SIZE, "%i\n", - atomic_read(&kbdev->js_data.soft_job_timeout_ms)); + return scnprintf(buf, PAGE_SIZE, "%i\n", atomic_read(&kbdev->js_data.soft_job_timeout_ms)); } static DEVICE_ATTR_RW(soft_job_timeout); -static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, - int default_ticks, u32 old_ticks) +static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, int default_ticks, + u32 old_ticks) { if (timeout_ms > 0) { u64 ticks = timeout_ms * 1000000ULL; @@ -2708,7 +2810,8 @@ static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t js_timeouts_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +static ssize_t js_timeouts_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) { struct kbase_device *kbdev; int items; @@ -2721,15 +2824,16 @@ static ssize_t js_timeouts_store(struct device *dev, struct device_attribute *at long js_reset_ms_cl; long js_reset_ms_dumping; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld", - &js_soft_stop_ms, &js_soft_stop_ms_cl, - &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, - &js_hard_stop_ms_dumping, &js_reset_ms_ss, - &js_reset_ms_cl, &js_reset_ms_dumping); + items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld", &js_soft_stop_ms, + &js_soft_stop_ms_cl, &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, + &js_hard_stop_ms_dumping, &js_reset_ms_ss, &js_reset_ms_cl, + &js_reset_ms_dumping); if (items == 8) { struct kbasep_js_device_data *js_data = &kbdev->js_data; @@ -2737,32 +2841,27 @@ static ssize_t js_timeouts_store(struct device *dev, struct device_attribute *at spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ - js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ - default, js_data->ticks_name); \ - dev_dbg(kbdev->dev, "Overriding " #ticks_name \ - " with %lu ticks (%lu ms)\n", \ - (unsigned long)js_data->ticks_name, \ - ms_name); \ +#define UPDATE_TIMEOUT(ticks_name, ms_name, default) \ + do { \ + js_data->ticks_name = \ + timeout_ms_to_ticks(kbdev, ms_name, default, js_data->ticks_name); \ + dev_dbg(kbdev->dev, "Overriding " #ticks_name " with %lu ticks (%lu ms)\n", \ + (unsigned long)js_data->ticks_name, ms_name); \ } while (0) - UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, - DEFAULT_JS_SOFT_STOP_TICKS); + UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, DEFAULT_JS_SOFT_STOP_TICKS); UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, - DEFAULT_JS_SOFT_STOP_TICKS_CL); + DEFAULT_JS_SOFT_STOP_TICKS_CL); UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, - DEFAULT_JS_HARD_STOP_TICKS_SS); + DEFAULT_JS_HARD_STOP_TICKS_SS); UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, - DEFAULT_JS_HARD_STOP_TICKS_CL); - UPDATE_TIMEOUT(hard_stop_ticks_dumping, - js_hard_stop_ms_dumping, - DEFAULT_JS_HARD_STOP_TICKS_DUMPING); - UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, - DEFAULT_JS_RESET_TICKS_SS); - UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, - DEFAULT_JS_RESET_TICKS_CL); + DEFAULT_JS_HARD_STOP_TICKS_CL); + UPDATE_TIMEOUT(hard_stop_ticks_dumping, js_hard_stop_ms_dumping, + DEFAULT_JS_HARD_STOP_TICKS_DUMPING); + UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, DEFAULT_JS_RESET_TICKS_SS); + UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, DEFAULT_JS_RESET_TICKS_CL); UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, - DEFAULT_JS_RESET_TICKS_DUMPING); + DEFAULT_JS_RESET_TICKS_DUMPING); kbase_js_set_timeouts(kbdev); @@ -2771,15 +2870,14 @@ static ssize_t js_timeouts_store(struct device *dev, struct device_attribute *at return count; } - dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n" - "Use format \n" - "Write 0 for no change, -1 to restore default timeout\n"); + dev_err(kbdev->dev, + "Couldn't process js_timeouts write operation.\n" + "Use format \n" + "Write 0 for no change, -1 to restore default timeout\n"); return -EINVAL; } -static unsigned long get_js_timeout_in_ms( - u32 scheduling_period_ns, - u32 ticks) +static unsigned long get_js_timeout_in_ms(u32 scheduling_period_ns, u32 ticks) { u64 ms = (u64)ticks * scheduling_period_ns; @@ -2801,7 +2899,7 @@ static unsigned long get_js_timeout_in_ms( * * Return: The number of bytes output to @buf. */ -static ssize_t js_timeouts_show(struct device *dev, struct device_attribute *attr, char * const buf) +static ssize_t js_timeouts_show(struct device *dev, struct device_attribute *attr, char *const buf) { struct kbase_device *kbdev; ssize_t ret; @@ -2815,15 +2913,15 @@ static ssize_t js_timeouts_show(struct device *dev, struct device_attribute *att unsigned long js_reset_ms_dumping; u32 scheduling_period_ns; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; scheduling_period_ns = kbdev->js_data.scheduling_period_ns; -#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ - scheduling_period_ns, \ - kbdev->js_data.name) +#define GET_TIMEOUT(name) get_js_timeout_in_ms(scheduling_period_ns, kbdev->js_data.name) js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); @@ -2836,13 +2934,12 @@ static ssize_t js_timeouts_show(struct device *dev, struct device_attribute *att #undef GET_TIMEOUT - ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", - js_soft_stop_ms, js_soft_stop_ms_cl, - js_hard_stop_ms_ss, js_hard_stop_ms_cl, - js_hard_stop_ms_dumping, js_reset_ms_ss, - js_reset_ms_cl, js_reset_ms_dumping); + ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", js_soft_stop_ms, + js_soft_stop_ms_cl, js_hard_stop_ms_ss, js_hard_stop_ms_cl, + js_hard_stop_ms_dumping, js_reset_ms_ss, js_reset_ms_cl, + js_reset_ms_dumping); - if (ret >= PAGE_SIZE) { + if (ret >= (ssize_t)PAGE_SIZE) { buf[PAGE_SIZE - 2] = '\n'; buf[PAGE_SIZE - 1] = '\0'; ret = PAGE_SIZE - 1; @@ -2866,15 +2963,12 @@ static ssize_t js_timeouts_show(struct device *dev, struct device_attribute *att */ static DEVICE_ATTR_RW(js_timeouts); -static u32 get_new_js_timeout( - u32 old_period, - u32 old_ticks, - u32 new_scheduling_period_ns) +static u32 get_new_js_timeout(u32 old_period, u32 old_ticks, u32 new_scheduling_period_ns) { u64 ticks = (u64)old_period * (u64)old_ticks; do_div(ticks, new_scheduling_period_ns); - return ticks?ticks:1; + return ticks ? ticks : 1; } /** @@ -2891,8 +2985,8 @@ static u32 get_new_js_timeout( * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t js_scheduling_period_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t js_scheduling_period_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *kbdev; int ret; @@ -2902,6 +2996,8 @@ static ssize_t js_scheduling_period_store(struct device *dev, struct kbasep_js_device_data *js_data; unsigned long flags; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -2911,7 +3007,7 @@ static ssize_t js_scheduling_period_store(struct device *dev, ret = kstrtouint(buf, 0, &js_scheduling_period); if (ret || !js_scheduling_period) { dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" - "Use format \n"); + "Use format \n"); return -EINVAL; } @@ -2930,10 +3026,8 @@ static ssize_t js_scheduling_period_store(struct device *dev, old_period = js_data->scheduling_period_ns; #define SET_TIMEOUT(name) \ - (js_data->name = get_new_js_timeout(\ - old_period, \ - kbdev->js_data.name, \ - new_scheduling_period_ns)) + (js_data->name = \ + get_new_js_timeout(old_period, kbdev->js_data.name, new_scheduling_period_ns)) SET_TIMEOUT(soft_stop_ticks); SET_TIMEOUT(soft_stop_ticks_cl); @@ -2953,8 +3047,7 @@ static ssize_t js_scheduling_period_store(struct device *dev, spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_data->runpool_mutex); - dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", - js_scheduling_period); + dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", js_scheduling_period); return count; } @@ -2971,36 +3064,38 @@ static ssize_t js_scheduling_period_store(struct device *dev, * * Return: The number of bytes output to @buf. */ -static ssize_t js_scheduling_period_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t js_scheduling_period_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *kbdev; u32 period; ssize_t ret; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; period = kbdev->js_data.scheduling_period_ns; - ret = scnprintf(buf, PAGE_SIZE, "%d\n", - period / 1000000); + ret = scnprintf(buf, PAGE_SIZE, "%d\n", period / 1000000); return ret; } static DEVICE_ATTR_RW(js_scheduling_period); - #ifdef CONFIG_MALI_BIFROST_DEBUG -static ssize_t js_softstop_always_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t js_softstop_always_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *kbdev; int ret; int softstop_always; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -3008,30 +3103,31 @@ static ssize_t js_softstop_always_store(struct device *dev, ret = kstrtoint(buf, 0, &softstop_always); if (ret || ((softstop_always != 0) && (softstop_always != 1))) { dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n" - "Use format \n"); + "Use format \n"); return -EINVAL; } - kbdev->js_data.softstop_always = (bool) softstop_always; + kbdev->js_data.softstop_always = (bool)softstop_always; dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n", - (kbdev->js_data.softstop_always) ? - "Enabled" : "Disabled"); + (kbdev->js_data.softstop_always) ? "Enabled" : "Disabled"); return count; } -static ssize_t js_softstop_always_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t js_softstop_always_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *kbdev; ssize_t ret; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always); - if (ret >= PAGE_SIZE) { + if (ret >= (ssize_t)PAGE_SIZE) { buf[PAGE_SIZE - 2] = '\n'; buf[PAGE_SIZE - 1] = '\0'; ret = PAGE_SIZE - 1; @@ -3071,12 +3167,10 @@ static void kbasep_ktrace_dump_wrapper(struct kbase_device *kbdev) } /* Debug commands supported by the driver */ -static const struct kbasep_debug_command debug_commands[] = { - { - .str = "dumptrace", - .func = &kbasep_ktrace_dump_wrapper, - } -}; +static const struct kbasep_debug_command debug_commands[] = { { + .str = "dumptrace", + .func = &kbasep_ktrace_dump_wrapper, +} }; /** * debug_command_show - Show callback for the debug_command sysfs file. @@ -3090,21 +3184,24 @@ static const struct kbasep_debug_command debug_commands[] = { * * Return: The number of bytes output to @buf. */ -static ssize_t debug_command_show(struct device *dev, struct device_attribute *attr, char * const buf) +static ssize_t debug_command_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *kbdev; - int i; + size_t i; ssize_t ret = 0; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++) + for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < (ssize_t)PAGE_SIZE; i++) ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str); - if (ret >= PAGE_SIZE) { + if (ret >= (ssize_t)PAGE_SIZE) { buf[PAGE_SIZE - 2] = '\n'; buf[PAGE_SIZE - 1] = '\0'; ret = PAGE_SIZE - 1; @@ -3128,11 +3225,14 @@ static ssize_t debug_command_show(struct device *dev, struct device_attribute *a * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t debug_command_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +static ssize_t debug_command_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *kbdev; int i; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) @@ -3174,79 +3274,60 @@ static DEVICE_ATTR_RW(debug_command); * * Return: The number of bytes output to @buf. */ -static ssize_t gpuinfo_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t gpuinfo_show(struct device *dev, struct device_attribute *attr, char *buf) { static const struct gpu_product_id_name { unsigned int id; char *name; } gpu_product_id_names[] = { - { .id = GPU_ID2_PRODUCT_TMIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G71" }, - { .id = GPU_ID2_PRODUCT_THEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G72" }, - { .id = GPU_ID2_PRODUCT_TSIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G51" }, - { .id = GPU_ID2_PRODUCT_TNOX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G76" }, - { .id = GPU_ID2_PRODUCT_TDVX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G31" }, - { .id = GPU_ID2_PRODUCT_TGOX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G52" }, - { .id = GPU_ID2_PRODUCT_TTRX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G77" }, - { .id = GPU_ID2_PRODUCT_TBEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G78" }, - { .id = GPU_ID2_PRODUCT_TBAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G78AE" }, - { .id = GPU_ID2_PRODUCT_LBEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G68" }, - { .id = GPU_ID2_PRODUCT_TNAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G57" }, - { .id = GPU_ID2_PRODUCT_TODX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G710" }, - { .id = GPU_ID2_PRODUCT_LODX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G610" }, - { .id = GPU_ID2_PRODUCT_TGRX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G510" }, - { .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G310" }, - { .id = GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TTIX" }, - { .id = GPU_ID2_PRODUCT_LTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-LTIX" }, + { .id = GPU_ID_PRODUCT_TMIX, .name = "Mali-G71" }, + { .id = GPU_ID_PRODUCT_THEX, .name = "Mali-G72" }, + { .id = GPU_ID_PRODUCT_TSIX, .name = "Mali-G51" }, + { .id = GPU_ID_PRODUCT_TNOX, .name = "Mali-G76" }, + { .id = GPU_ID_PRODUCT_TDVX, .name = "Mali-G31" }, + { .id = GPU_ID_PRODUCT_TGOX, .name = "Mali-G52" }, + { .id = GPU_ID_PRODUCT_TTRX, .name = "Mali-G77" }, + { .id = GPU_ID_PRODUCT_TBEX, .name = "Mali-G78" }, + { .id = GPU_ID_PRODUCT_TBAX, .name = "Mali-G78AE" }, + { .id = GPU_ID_PRODUCT_LBEX, .name = "Mali-G68" }, + { .id = GPU_ID_PRODUCT_TNAX, .name = "Mali-G57" }, + { .id = GPU_ID_PRODUCT_TODX, .name = "Mali-G710" }, + { .id = GPU_ID_PRODUCT_LODX, .name = "Mali-G610" }, + { .id = GPU_ID_PRODUCT_TGRX, .name = "Mali-G510" }, + { .id = GPU_ID_PRODUCT_TVAX, .name = "Mali-G310" }, + { .id = GPU_ID_PRODUCT_LTIX, .name = "Mali-G620" }, + { .id = GPU_ID_PRODUCT_TKRX, .name = "Mali-TKRX" }, + { .id = GPU_ID_PRODUCT_LKRX, .name = "Mali-LKRX" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; - u32 gpu_id; - unsigned int product_id, product_id_mask; + u32 product_id; + u32 product_model; unsigned int i; struct kbase_gpu_props *gpu_props; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; gpu_props = &kbdev->gpu_props; - gpu_id = gpu_props->props.raw_props.gpu_id; - product_id = gpu_id >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; - product_id_mask = GPU_ID2_PRODUCT_MODEL >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; + product_id = gpu_props->gpu_id.product_id; + product_model = gpu_props->gpu_id.product_model; for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { const struct gpu_product_id_name *p = &gpu_product_id_names[i]; - if ((p->id & product_id_mask) == - (product_id & product_id_mask)) { + if (p->id == product_model) { product_name = p->name; break; } } #if MALI_USE_CSF - if ((product_id & product_id_mask) == - ((GPU_ID2_PRODUCT_TTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT) & product_id_mask)) { - const bool rt_supported = - GPU_FEATURES_RAY_TRACING_GET(gpu_props->props.raw_props.gpu_features); + if (product_model == GPU_ID_PRODUCT_TTUX) { + const bool rt_supported = gpu_props->gpu_features.ray_intersection; const u8 nr_cores = gpu_props->num_cores; /* Mali-G715-Immortalis if 10 < number of cores with ray tracing supproted. @@ -3266,13 +3347,24 @@ static ssize_t gpuinfo_show(struct device *dev, dev_dbg(kbdev->dev, "GPU ID_Name: %s, nr_cores(%u)\n", product_name, nr_cores); } + + if (product_model == GPU_ID_PRODUCT_TTIX) { + const bool rt_supported = gpu_props->gpu_features.ray_intersection; + const u8 nr_cores = gpu_props->num_cores; + + if ((nr_cores >= 10) && rt_supported) + product_name = "Mali-G720-Immortalis"; + else + product_name = (nr_cores >= 6) ? "Mali-G720" : "Mali-G620"; + + dev_dbg(kbdev->dev, "GPU ID_Name: %s (ID: 0x%x), nr_cores(%u)\n", product_name, + product_id, nr_cores); + } #endif /* MALI_USE_CSF */ - return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", product_name, - kbdev->gpu_props.num_cores, - (gpu_id & GPU_ID_VERSION_MAJOR) >> KBASE_GPU_ID_VERSION_MAJOR_SHIFT, - (gpu_id & GPU_ID_VERSION_MINOR) >> KBASE_GPU_ID_VERSION_MINOR_SHIFT, - product_id); + return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%08X\n", product_name, + kbdev->gpu_props.num_cores, gpu_props->gpu_id.version_major, + gpu_props->gpu_id.version_minor, product_id); } static DEVICE_ATTR_RO(gpuinfo); @@ -3288,13 +3380,15 @@ static DEVICE_ATTR_RO(gpuinfo); * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t dvfs_period_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t dvfs_period_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) { struct kbase_device *kbdev; int ret; int dvfs_period; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -3302,7 +3396,7 @@ static ssize_t dvfs_period_store(struct device *dev, ret = kstrtoint(buf, 0, &dvfs_period); if (ret || dvfs_period <= 0) { dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n" - "Use format \n"); + "Use format \n"); return -EINVAL; } @@ -3323,12 +3417,13 @@ static ssize_t dvfs_period_store(struct device *dev, * * Return: The number of bytes output to @buf. */ -static ssize_t dvfs_period_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t dvfs_period_show(struct device *dev, struct device_attribute *attr, char *const buf) { struct kbase_device *kbdev; ssize_t ret; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -3340,8 +3435,9 @@ static ssize_t dvfs_period_show(struct device *dev, static DEVICE_ATTR_RW(dvfs_period); -int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev) +int kbase_pm_gpu_freq_init(struct kbase_device *kbdev) { + int err; /* Uses default reference frequency defined in below macro */ u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; @@ -3376,7 +3472,16 @@ int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev) #endif kbdev->lowest_gpu_freq_khz = lowest_freq_khz; + + err = kbase_device_populate_max_freq(kbdev); + if (unlikely(err < 0)) + return -1; + dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", kbdev->lowest_gpu_freq_khz); + dev_dbg(kbdev->dev, + "Setting default highest frequency to %u kHz (pending devfreq initialization", + kbdev->gpu_props.gpu_freq_khz_max); + return 0; } @@ -3397,8 +3502,8 @@ int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev) * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t pm_poweroff_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t pm_poweroff_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) { struct kbase_device *kbdev; struct kbasep_pm_tick_timer_state *stt; @@ -3407,16 +3512,18 @@ static ssize_t pm_poweroff_store(struct device *dev, unsigned int poweroff_shader_ticks, poweroff_gpu_ticks; unsigned long flags; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time, - &poweroff_shader_ticks, - &poweroff_gpu_ticks); + items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time, &poweroff_shader_ticks, + &poweroff_gpu_ticks); if (items != 3) { - dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n" - "Use format \n"); + dev_err(kbdev->dev, + "Couldn't process pm_poweroff write operation.\n" + "Use format \n"); return -EINVAL; } @@ -3446,14 +3553,15 @@ static ssize_t pm_poweroff_store(struct device *dev, * * Return: The number of bytes output to @buf. */ -static ssize_t pm_poweroff_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t pm_poweroff_show(struct device *dev, struct device_attribute *attr, char *const buf) { struct kbase_device *kbdev; struct kbasep_pm_tick_timer_state *stt; ssize_t ret; unsigned long flags; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -3461,8 +3569,7 @@ static ssize_t pm_poweroff_show(struct device *dev, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); stt = &kbdev->pm.backend.shader_tick_timer; - ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", - ktime_to_ns(stt->configured_interval), + ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", ktime_to_ns(stt->configured_interval), stt->default_ticks); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -3484,26 +3591,39 @@ static DEVICE_ATTR_RW(pm_poweroff); * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t reset_timeout_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t reset_timeout_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *kbdev; int ret; - int reset_timeout; + u32 reset_timeout; + u32 default_reset_timeout; + + CSTD_UNUSED(attr); kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - ret = kstrtoint(buf, 0, &reset_timeout); - if (ret || reset_timeout <= 0) { + ret = kstrtou32(buf, 0, &reset_timeout); + if (ret || reset_timeout == 0) { dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n" - "Use format \n"); + "Use format \n"); return -EINVAL; } +#if MALI_USE_CSF + default_reset_timeout = kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT); +#else /* MALI_USE_CSF */ + default_reset_timeout = JM_DEFAULT_RESET_TIMEOUT_MS; +#endif /* !MALI_USE_CSF */ + + if (reset_timeout < default_reset_timeout) + dev_warn(kbdev->dev, "requested reset_timeout(%u) is smaller than default(%u)", + reset_timeout, default_reset_timeout); + kbdev->reset_timeout_ms = reset_timeout; - dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout); + dev_dbg(kbdev->dev, "Reset timeout: %ums\n", reset_timeout); return count; } @@ -3518,12 +3638,14 @@ static ssize_t reset_timeout_store(struct device *dev, * * Return: The number of bytes output to @buf. */ -static ssize_t reset_timeout_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t reset_timeout_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *kbdev; ssize_t ret; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -3535,64 +3657,72 @@ static ssize_t reset_timeout_show(struct device *dev, static DEVICE_ATTR_RW(reset_timeout); -static ssize_t mem_pool_size_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t mem_pool_size_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *const kbdev = to_kbase_device(dev); + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; - return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, - kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_size); + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, kbdev->mem_pools.small, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_size); } -static ssize_t mem_pool_size_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t mem_pool_size_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *const kbdev = to_kbase_device(dev); - int err; + ssize_t err; + + CSTD_UNUSED(attr); if (!kbdev) return -ENODEV; - err = kbase_debugfs_helper_set_attr_from_string(buf, - kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_trim); + err = kbase_debugfs_helper_set_attr_from_string(buf, kbdev->mem_pools.small, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_trim); - return err ? err : count; + return err ? err : (ssize_t)count; } static DEVICE_ATTR_RW(mem_pool_size); -static ssize_t mem_pool_max_size_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t mem_pool_max_size_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *const kbdev = to_kbase_device(dev); + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; - return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, - kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_max_size); + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, kbdev->mem_pools.small, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); } -static ssize_t mem_pool_max_size_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t mem_pool_max_size_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *const kbdev = to_kbase_device(dev); - int err; + ssize_t err; + + CSTD_UNUSED(attr); if (!kbdev) return -ENODEV; - err = kbase_debugfs_helper_set_attr_from_string(buf, - kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_set_max_size); + err = kbase_debugfs_helper_set_attr_from_string(buf, kbdev->mem_pools.small, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); - return err ? err : count; + return err ? err : (ssize_t)count; } static DEVICE_ATTR_RW(mem_pool_max_size); @@ -3607,17 +3737,19 @@ static DEVICE_ATTR_RW(mem_pool_max_size); * * Return: The number of bytes output to @buf. */ -static ssize_t lp_mem_pool_size_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t lp_mem_pool_size_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *const kbdev = to_kbase_device(dev); + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; - return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, - kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_size); + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, kbdev->mem_pools.large, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_size); } /** @@ -3632,20 +3764,22 @@ static ssize_t lp_mem_pool_size_show(struct device *dev, * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t lp_mem_pool_size_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t lp_mem_pool_size_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *const kbdev = to_kbase_device(dev); - int err; + ssize_t err; + + CSTD_UNUSED(attr); if (!kbdev) return -ENODEV; - err = kbase_debugfs_helper_set_attr_from_string(buf, - kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_trim); + err = kbase_debugfs_helper_set_attr_from_string(buf, kbdev->mem_pools.large, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_trim); - return err ? err : count; + return err ? err : (ssize_t)count; } static DEVICE_ATTR_RW(lp_mem_pool_size); @@ -3660,17 +3794,19 @@ static DEVICE_ATTR_RW(lp_mem_pool_size); * * Return: The number of bytes output to @buf. */ -static ssize_t lp_mem_pool_max_size_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t lp_mem_pool_max_size_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *const kbdev = to_kbase_device(dev); + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; - return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, - kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_max_size); + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, kbdev->mem_pools.large, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); } /** @@ -3684,20 +3820,22 @@ static ssize_t lp_mem_pool_max_size_show(struct device *dev, * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t lp_mem_pool_max_size_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t lp_mem_pool_max_size_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *const kbdev = to_kbase_device(dev); - int err; + ssize_t err; + + CSTD_UNUSED(attr); if (!kbdev) return -ENODEV; - err = kbase_debugfs_helper_set_attr_from_string(buf, - kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_set_max_size); + err = kbase_debugfs_helper_set_attr_from_string(buf, kbdev->mem_pools.large, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); - return err ? err : count; + return err ? err : (ssize_t)count; } static DEVICE_ATTR_RW(lp_mem_pool_max_size); @@ -3715,16 +3853,18 @@ static DEVICE_ATTR_RW(lp_mem_pool_max_size); * * Return: The number of bytes output to @buf. */ -static ssize_t show_simplified_mem_pool_max_size(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t show_simplified_mem_pool_max_size(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *const kbdev = to_kbase_device(dev); + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; - return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, - kbdev->mem_pools.small, 1, kbase_mem_pool_debugfs_max_size); + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, kbdev->mem_pools.small, 1, + kbase_mem_pool_debugfs_max_size); } /** @@ -3740,14 +3880,16 @@ static ssize_t show_simplified_mem_pool_max_size(struct device *dev, * * Return: The number of bytes output to @buf. */ -static ssize_t set_simplified_mem_pool_max_size(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t set_simplified_mem_pool_max_size(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *const kbdev = to_kbase_device(dev); unsigned long new_size; int gid; int err; + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; @@ -3756,14 +3898,13 @@ static ssize_t set_simplified_mem_pool_max_size(struct device *dev, return -EINVAL; for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) - kbase_mem_pool_debugfs_set_max_size( - kbdev->mem_pools.small, gid, (size_t)new_size); + kbase_mem_pool_debugfs_set_max_size(kbdev->mem_pools.small, gid, (size_t)new_size); return count; } static DEVICE_ATTR(max_size, 0600, show_simplified_mem_pool_max_size, - set_simplified_mem_pool_max_size); + set_simplified_mem_pool_max_size); /** * show_simplified_lp_mem_pool_max_size - Show the maximum size for the memory @@ -3779,15 +3920,17 @@ static DEVICE_ATTR(max_size, 0600, show_simplified_mem_pool_max_size, * Return: The number of bytes output to @buf. */ static ssize_t show_simplified_lp_mem_pool_max_size(struct device *dev, - struct device_attribute *attr, char * const buf) + struct device_attribute *attr, char *const buf) { struct kbase_device *const kbdev = to_kbase_device(dev); + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; - return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, - kbdev->mem_pools.large, 1, kbase_mem_pool_debugfs_max_size); + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, kbdev->mem_pools.large, 1, + kbase_mem_pool_debugfs_max_size); } /** @@ -3804,13 +3947,16 @@ static ssize_t show_simplified_lp_mem_pool_max_size(struct device *dev, * Return: The number of bytes output to @buf. */ static ssize_t set_simplified_lp_mem_pool_max_size(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) + struct device_attribute *attr, const char *buf, + size_t count) { struct kbase_device *const kbdev = to_kbase_device(dev); unsigned long new_size; int gid; int err; + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; @@ -3819,14 +3965,13 @@ static ssize_t set_simplified_lp_mem_pool_max_size(struct device *dev, return -EINVAL; for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) - kbase_mem_pool_debugfs_set_max_size( - kbdev->mem_pools.large, gid, (size_t)new_size); + kbase_mem_pool_debugfs_set_max_size(kbdev->mem_pools.large, gid, (size_t)new_size); return count; } static DEVICE_ATTR(lp_max_size, 0600, show_simplified_lp_mem_pool_max_size, - set_simplified_lp_mem_pool_max_size); + set_simplified_lp_mem_pool_max_size); /** * show_simplified_ctx_default_max_size - Show the default maximum size for the @@ -3843,16 +3988,17 @@ static DEVICE_ATTR(lp_max_size, 0600, show_simplified_lp_mem_pool_max_size, * Return: The number of bytes output to @buf. */ static ssize_t show_simplified_ctx_default_max_size(struct device *dev, - struct device_attribute *attr, char * const buf) + struct device_attribute *attr, char *const buf) { struct kbase_device *kbdev = to_kbase_device(dev); size_t max_size; + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; - max_size = kbase_mem_pool_config_debugfs_max_size( - kbdev->mem_pool_defaults.small, 0); + max_size = kbase_mem_pool_config_debugfs_max_size(kbdev->mem_pool_defaults.small, 0); return scnprintf(buf, PAGE_SIZE, "%zu\n", max_size); } @@ -3875,12 +4021,15 @@ static ssize_t show_simplified_ctx_default_max_size(struct device *dev, * Return: @count if the function succeeded. An error code on failure. */ static ssize_t set_simplified_ctx_default_max_size(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) + struct device_attribute *attr, const char *buf, + size_t count) { struct kbase_device *kbdev; unsigned long new_size; int err; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -3889,15 +4038,13 @@ static ssize_t set_simplified_ctx_default_max_size(struct device *dev, if (err) return -EINVAL; - kbase_mem_pool_group_config_set_max_size( - &kbdev->mem_pool_defaults, (size_t)new_size); + kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, (size_t)new_size); return count; } -static DEVICE_ATTR(ctx_default_max_size, 0600, - show_simplified_ctx_default_max_size, - set_simplified_ctx_default_max_size); +static DEVICE_ATTR(ctx_default_max_size, 0600, show_simplified_ctx_default_max_size, + set_simplified_ctx_default_max_size); #if !MALI_USE_CSF /** @@ -3911,11 +4058,13 @@ static DEVICE_ATTR(ctx_default_max_size, 0600, * * Return: The number of bytes output to @buf. */ -static ssize_t js_ctx_scheduling_mode_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t js_ctx_scheduling_mode_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *kbdev; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -3937,8 +4086,8 @@ static ssize_t js_ctx_scheduling_mode_show(struct device *dev, * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t js_ctx_scheduling_mode_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t js_ctx_scheduling_mode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_context *kctx; u32 new_js_ctx_scheduling_mode; @@ -3946,6 +4095,8 @@ static ssize_t js_ctx_scheduling_mode_store(struct device *dev, unsigned long flags; int ret; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -3953,8 +4104,8 @@ static ssize_t js_ctx_scheduling_mode_store(struct device *dev, ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode); if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) { dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode" - " write operation.\n" - "Use format \n"); + " write operation.\n" + "Use format \n"); return -EINVAL; } @@ -3986,17 +4137,16 @@ static DEVICE_ATTR_RW(js_ctx_scheduling_mode); /* Maximum string length in serialize_jobs_settings[].name */ #define MAX_SERIALIZE_JOBS_NAME_LEN 16 -static struct -{ +static struct { char *name; u8 setting; } serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = { - {"none", 0}, - {"intra-slot", KBASE_SERIALIZE_INTRA_SLOT}, - {"inter-slot", KBASE_SERIALIZE_INTER_SLOT}, - {"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT}, - {"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | - KBASE_SERIALIZE_RESET} + { "none", 0 }, + { "intra-slot", KBASE_SERIALIZE_INTRA_SLOT }, + { "inter-slot", KBASE_SERIALIZE_INTER_SLOT }, + { "full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT }, + { "full-reset", + KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | KBASE_SERIALIZE_RESET } }; /** @@ -4014,16 +4164,15 @@ static struct * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev, - const char *buf, size_t count) +static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev, const char *buf, + size_t count) { int i; bool valid = false; for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { - kbdev->serialize_jobs = - serialize_jobs_settings[i].setting; + kbdev->serialize_jobs = serialize_jobs_settings[i].setting; valid = true; break; } @@ -4050,8 +4199,7 @@ static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev, * * Return: 0 on success, or an error code on error */ -static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile, - void *data) +static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile, void *data) { struct kbase_device *kbdev = sfile->private; int i; @@ -4060,11 +4208,9 @@ static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile, for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting) - seq_printf(sfile, "[%s] ", - serialize_jobs_settings[i].name); + seq_printf(sfile, "[%s] ", serialize_jobs_settings[i].name); else - seq_printf(sfile, "%s ", - serialize_jobs_settings[i].name); + seq_printf(sfile, "%s ", serialize_jobs_settings[i].name); } seq_puts(sfile, "\n"); @@ -4086,8 +4232,8 @@ static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile, * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, - const char __user *ubuf, size_t count, loff_t *ppos) +static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, const char __user *ubuf, + size_t count, loff_t *ppos) { struct seq_file *s = file->private_data; struct kbase_device *kbdev = s->private; @@ -4112,11 +4258,9 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, * * Return: Zero on success, error code on failure */ -static int kbasep_serialize_jobs_debugfs_open(struct inode *in, - struct file *file) +static int kbasep_serialize_jobs_debugfs_open(struct inode *in, struct file *file) { - return single_open(file, kbasep_serialize_jobs_seq_debugfs_show, - in->i_private); + return single_open(file, kbasep_serialize_jobs_seq_debugfs_show, in->i_private); } static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { @@ -4143,17 +4287,17 @@ static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { * * Return: The number of bytes output to @buf. */ -static ssize_t show_serialize_jobs_sysfs(struct device *dev, - struct device_attribute *attr, +static ssize_t show_serialize_jobs_sysfs(struct device *dev, struct device_attribute *attr, char *buf) { struct kbase_device *kbdev = to_kbase_device(dev); ssize_t ret = 0; int i; + CSTD_UNUSED(attr); + for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { - if (kbdev->serialize_jobs == - serialize_jobs_settings[i].setting) + if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting) ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s]", serialize_jobs_settings[i].name); else @@ -4161,7 +4305,7 @@ static ssize_t show_serialize_jobs_sysfs(struct device *dev, serialize_jobs_settings[i].name); } - if (ret < PAGE_SIZE - 1) { + if (ret < (ssize_t)(PAGE_SIZE - 1)) { ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); } else { buf[PAGE_SIZE - 2] = '\n'; @@ -4186,21 +4330,20 @@ static ssize_t show_serialize_jobs_sysfs(struct device *dev, * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t store_serialize_jobs_sysfs(struct device *dev, - struct device_attribute *attr, +static ssize_t store_serialize_jobs_sysfs(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + CSTD_UNUSED(attr); return update_serialize_jobs_setting(to_kbase_device(dev), buf, count); } -static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs, - store_serialize_jobs_sysfs); +static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs, store_serialize_jobs_sysfs); #endif /* !MALI_USE_CSF */ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) { - struct kbase_device *kbdev = container_of(data, struct kbase_device, - protected_mode_hwcnt_disable_work); + struct kbase_device *kbdev = + container_of(data, struct kbase_device, protected_mode_hwcnt_disable_work); spinlock_t *backend_lock; unsigned long flags; @@ -4213,8 +4356,7 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) #endif spin_lock_irqsave(backend_lock, flags); - do_disable = !kbdev->protected_mode_hwcnt_desired && - !kbdev->protected_mode_hwcnt_disabled; + do_disable = !kbdev->protected_mode_hwcnt_desired && !kbdev->protected_mode_hwcnt_disabled; spin_unlock_irqrestore(backend_lock, flags); if (!do_disable) @@ -4223,8 +4365,7 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); spin_lock_irqsave(backend_lock, flags); - do_disable = !kbdev->protected_mode_hwcnt_desired && - !kbdev->protected_mode_hwcnt_disabled; + do_disable = !kbdev->protected_mode_hwcnt_desired && !kbdev->protected_mode_hwcnt_disabled; if (do_disable) { /* Protected mode state did not change while we were doing the @@ -4271,16 +4412,14 @@ static const struct protected_mode_ops kbasep_native_protected_ops = { int kbase_protected_mode_init(struct kbase_device *kbdev) { /* Use native protected ops */ - kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), - GFP_KERNEL); + kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), GFP_KERNEL); if (!kbdev->protected_dev) return -ENOMEM; kbdev->protected_dev->data = kbdev; kbdev->protected_ops = PLATFORM_PROTECTED_CALLBACKS; INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, - kbasep_protected_mode_hwcnt_disable_worker); + kbasep_protected_mode_hwcnt_disable_worker); kbdev->protected_mode_hwcnt_desired = true; - kbdev->protected_mode_hwcnt_disabled = false; return 0; } @@ -4295,7 +4434,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) { return 0; } -static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +static void kbase_common_reg_unmap(struct kbase_device *const kbdev) { } #else /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ @@ -4324,7 +4463,7 @@ out_region: return err; } -static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +static void kbase_common_reg_unmap(struct kbase_device *const kbdev) { if (kbdev->reg) { iounmap(kbdev->reg); @@ -4336,7 +4475,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev) } #endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -int registers_map(struct kbase_device * const kbdev) +int registers_map(struct kbase_device *const kbdev) { /* the first memory resource is the physical address of the GPU * registers. @@ -4356,11 +4495,11 @@ int registers_map(struct kbase_device * const kbdev) #if MALI_USE_CSF if (kbdev->reg_size < - (CSF_HW_DOORBELL_PAGE_OFFSET + - CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { - dev_err(kbdev->dev, "Insufficient register space, will override to the required size\n"); - kbdev->reg_size = CSF_HW_DOORBELL_PAGE_OFFSET + - CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE; + (CSF_HW_DOORBELL_PAGE_OFFSET + CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { + dev_err(kbdev->dev, + "Insufficient register space, will override to the required size\n"); + kbdev->reg_size = + CSF_HW_DOORBELL_PAGE_OFFSET + CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE; } #endif @@ -4387,18 +4526,18 @@ static bool kbase_is_pm_enabled(const struct device_node *gpu_node) const void *operating_point_node; bool is_pm_enable = false; - power_model_node = of_get_child_by_name(gpu_node, - "power_model"); + power_model_node = of_get_child_by_name(gpu_node, "power-model"); + if (!power_model_node) + power_model_node = of_get_child_by_name(gpu_node, "power_model"); + if (power_model_node) is_pm_enable = true; - cooling_cells_node = of_get_property(gpu_node, - "#cooling-cells", NULL); + cooling_cells_node = of_get_property(gpu_node, "#cooling-cells", NULL); if (cooling_cells_node) is_pm_enable = true; - operating_point_node = of_get_property(gpu_node, - "operating-points", NULL); + operating_point_node = of_get_property(gpu_node, "operating-points", NULL); if (operating_point_node) is_pm_enable = true; @@ -4409,8 +4548,9 @@ static bool kbase_is_pv_enabled(const struct device_node *gpu_node) { const void *arbiter_if_node; - arbiter_if_node = of_get_property(gpu_node, - "arbiter_if", NULL); + arbiter_if_node = of_get_property(gpu_node, "arbiter-if", NULL); + if (!arbiter_if_node) + arbiter_if_node = of_get_property(gpu_node, "arbiter_if", NULL); return arbiter_if_node ? true : false; } @@ -4420,9 +4560,7 @@ static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node) const void *coherency_dts; u32 coherency; - coherency_dts = of_get_property(gpu_node, - "system-coherency", - NULL); + coherency_dts = of_get_property(gpu_node, "system-coherency", NULL); if (coherency_dts) { coherency = be32_to_cpup(coherency_dts); if (coherency == COHERENCY_ACE) @@ -4438,21 +4576,20 @@ int kbase_device_pm_init(struct kbase_device *kbdev) int err = 0; #if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) - - u32 gpu_id; - u32 product_id; - u32 gpu_model_id; + u32 product_model; if (kbase_is_pv_enabled(kbdev->dev->of_node)) { dev_info(kbdev->dev, "Arbitration interface enabled\n"); if (kbase_is_pm_enabled(kbdev->dev->of_node)) { /* Arbitration AND power management invalid */ - dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); + dev_err(kbdev->dev, + "Invalid combination of arbitration AND power management\n"); return -EPERM; } if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) { /* Arbitration AND full coherency invalid */ - dev_err(kbdev->dev, "Invalid combination of arbitration AND full coherency\n"); + dev_err(kbdev->dev, + "Invalid combination of arbitration AND full coherency\n"); return -EPERM; } err = kbase_arbiter_pm_early_init(kbdev); @@ -4461,15 +4598,14 @@ int kbase_device_pm_init(struct kbase_device *kbdev) * supported GPU platform */ kbase_pm_register_access_enable(kbdev); - gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id, + kbase_reg_get_gpu_id(kbdev)); kbase_pm_register_access_disable(kbdev); - product_id = - KBASE_UBFX32(gpu_id, KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, 16); - gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id); + product_model = kbdev->gpu_props.gpu_id.product_model; - if (gpu_model_id != GPU_ID2_PRODUCT_TGOX - && gpu_model_id != GPU_ID2_PRODUCT_TNOX - && gpu_model_id != GPU_ID2_PRODUCT_TBAX) { + if (product_model != GPU_ID_PRODUCT_TGOX && + product_model != GPU_ID_PRODUCT_TNOX && + product_model != GPU_ID_PRODUCT_TBAX) { kbase_arbiter_pm_early_term(kbdev); dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); return -EPERM; @@ -4516,9 +4652,8 @@ int power_control_init(struct kbase_device *kbdev) int err = 0; unsigned int i; #if defined(CONFIG_REGULATOR) - static const char * const regulator_names[] = { - "mali", "mem" - }; + static const char *const regulator_names[] = { "mali", "mem" }; + // BUILD_BUG_ON(ARRAY_SIZE(regulator_names) < BASE_MAX_NR_CLOCKS_REGULATORS); #endif /* CONFIG_REGULATOR */ if (!kbdev) @@ -4535,8 +4670,7 @@ int power_control_init(struct kbase_device *kbdev) * operating with a partial initialization of regulators. */ for (i = 0; i < ARRAY_SIZE(regulator_names); i++) { - kbdev->regulators[i] = regulator_get_optional(kbdev->dev, - regulator_names[i]); + kbdev->regulators[i] = regulator_get_optional(kbdev->dev, regulator_names[i]); if (IS_ERR(kbdev->regulators[i])) { err = PTR_ERR(kbdev->regulators[i]); kbdev->regulators[i] = NULL; @@ -4573,9 +4707,7 @@ int power_control_init(struct kbase_device *kbdev) err = clk_prepare(kbdev->clocks[i]); if (err) { - dev_err(kbdev->dev, - "Failed to prepare and enable clock (%d)\n", - err); + dev_err(kbdev->dev, "Failed to prepare and enable clock (%d)\n", err); clk_put(kbdev->clocks[i]); break; } @@ -4606,13 +4738,12 @@ int power_control_init(struct kbase_device *kbdev) err = kbdev->token; goto regulators_probe_defer; } - } #elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) if (kbdev->nr_regulators > 0) { - kbdev->opp_table = - dev_pm_opp_set_regulators(kbdev->dev, regulator_names, - kbdev->nr_regulators); + kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev, regulator_names, + kbdev->nr_regulators); + if (IS_ERR(kbdev->opp_table)) { dev_err(kbdev->dev, "Failed to set regulators\n"); return 0; @@ -4641,7 +4772,7 @@ int power_control_init(struct kbase_device *kbdev) #endif /* CONFIG_PM_OPP */ return 0; -#if defined(CONFIG_PM_OPP) && \ +#if defined(CONFIG_PM_OPP) && \ ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_REGULATOR)) for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { if (kbdev->clocks[i]) { @@ -4711,25 +4842,25 @@ static void trigger_reset(struct kbase_device *kbdev) kbase_pm_context_idle(kbdev); } -#define MAKE_QUIRK_ACCESSORS(type) \ -static int type##_quirks_set(void *data, u64 val) \ -{ \ - struct kbase_device *kbdev; \ - kbdev = (struct kbase_device *)data; \ - kbdev->hw_quirks_##type = (u32)val; \ - trigger_reset(kbdev); \ - return 0; \ -} \ -\ -static int type##_quirks_get(void *data, u64 *val) \ -{ \ - struct kbase_device *kbdev; \ - kbdev = (struct kbase_device *)data; \ - *val = kbdev->hw_quirks_##type; \ - return 0; \ -} \ -DEFINE_DEBUGFS_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get, \ - type##_quirks_set, "%llu\n") +#define MAKE_QUIRK_ACCESSORS(type) \ + static int type##_quirks_set(void *data, u64 val) \ + { \ + struct kbase_device *kbdev; \ + kbdev = (struct kbase_device *)data; \ + kbdev->hw_quirks_##type = (u32)val; \ + trigger_reset(kbdev); \ + return 0; \ + } \ + \ + static int type##_quirks_get(void *data, u64 *val) \ + { \ + struct kbase_device *kbdev; \ + kbdev = (struct kbase_device *)data; \ + *val = kbdev->hw_quirks_##type; \ + return 0; \ + } \ + DEFINE_DEBUGFS_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get, type##_quirks_set, \ + "%llu\n") MAKE_QUIRK_ACCESSORS(sc); MAKE_QUIRK_ACCESSORS(tiler); @@ -4773,18 +4904,18 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_trigger_reset, NULL, &kbase_device_debugfs_reset_w * * Return: Number of bytes added to user buffer */ -static ssize_t debugfs_protected_debug_mode_read(struct file *file, - char __user *buf, size_t len, loff_t *ppos) +static ssize_t debugfs_protected_debug_mode_read(struct file *file, char __user *buf, size_t len, + loff_t *ppos) { struct kbase_device *kbdev = (struct kbase_device *)file->private_data; u32 gpu_status; ssize_t ret_val; kbase_pm_context_active(kbdev); - gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)); + gpu_status = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)); kbase_pm_context_idle(kbdev); - if (gpu_status & GPU_DBGEN) + if (gpu_status & GPU_STATUS_GPU_DBG_ENABLED) ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2); else ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2); @@ -4804,37 +4935,32 @@ static const struct file_operations fops_protected_debug_mode = { .llseek = default_llseek, }; -static int kbase_device_debugfs_mem_pool_max_size_show(struct seq_file *sfile, - void *data) +static int kbase_device_debugfs_mem_pool_max_size_show(struct seq_file *sfile, void *data) { CSTD_UNUSED(data); - return kbase_debugfs_helper_seq_read(sfile, - MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_config_debugfs_max_size); + return kbase_debugfs_helper_seq_read(sfile, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_config_debugfs_max_size); } static ssize_t kbase_device_debugfs_mem_pool_max_size_write(struct file *file, - const char __user *ubuf, size_t count, loff_t *ppos) + const char __user *ubuf, size_t count, + loff_t *ppos) { - int err = 0; + ssize_t err = 0; CSTD_UNUSED(ppos); - err = kbase_debugfs_helper_seq_write(file, ubuf, count, - MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_config_debugfs_set_max_size); + err = kbase_debugfs_helper_seq_write(file, ubuf, count, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_config_debugfs_set_max_size); - return err ? err : count; + return err ? err : (ssize_t)count; } -static int kbase_device_debugfs_mem_pool_max_size_open(struct inode *in, - struct file *file) +static int kbase_device_debugfs_mem_pool_max_size_open(struct inode *in, struct file *file) { - return single_open(file, kbase_device_debugfs_mem_pool_max_size_show, - in->i_private); + return single_open(file, kbase_device_debugfs_mem_pool_max_size_show, in->i_private); } -static const struct file_operations - kbase_device_debugfs_mem_pool_max_size_fops = { +static const struct file_operations kbase_device_debugfs_mem_pool_max_size_fops = { .owner = THIS_MODULE, .open = kbase_device_debugfs_mem_pool_max_size_open, .read = seq_read, @@ -4863,21 +4989,20 @@ static struct dentry *debugfs_ctx_defaults_init(struct kbase_device *const kbdev return dentry; } - debugfs_create_bool("infinite_cache", mode, - debugfs_ctx_defaults_directory, - &kbdev->infinite_cache_active_default); + debugfs_create_bool("infinite_cache", mode, debugfs_ctx_defaults_directory, + &kbdev->infinite_cache_active_default); dentry = debugfs_create_file("mem_pool_max_size", mode, debugfs_ctx_defaults_directory, - &kbdev->mem_pool_defaults.small, - &kbase_device_debugfs_mem_pool_max_size_fops); + &kbdev->mem_pool_defaults.small, + &kbase_device_debugfs_mem_pool_max_size_fops); if (IS_ERR_OR_NULL(dentry)) { dev_err(kbdev->dev, "Unable to create mem_pool_max_size debugfs entry\n"); return dentry; } dentry = debugfs_create_file("lp_mem_pool_max_size", mode, debugfs_ctx_defaults_directory, - &kbdev->mem_pool_defaults.large, - &kbase_device_debugfs_mem_pool_max_size_fops); + &kbdev->mem_pool_defaults.large, + &kbase_device_debugfs_mem_pool_max_size_fops); if (IS_ERR_OR_NULL(dentry)) dev_err(kbdev->dev, "Unable to create lp_mem_pool_max_size debugfs entry\n"); @@ -4896,9 +5021,7 @@ static struct dentry *init_debugfs(struct kbase_device *kbdev) kbdev->mali_debugfs_directory = dentry; if (IS_ERR_OR_NULL(dentry)) { - dev_err(kbdev->dev, - "Couldn't create mali debugfs directory: %s\n", - kbdev->devname); + dev_err(kbdev->dev, "Couldn't create mali debugfs directory: %s\n", kbdev->devname); return dentry; } @@ -4934,32 +5057,29 @@ static struct dentry *init_debugfs(struct kbase_device *kbdev) /* fops_* variables created by invocations of macro * MAKE_QUIRK_ACCESSORS() above. */ - dentry = debugfs_create_file("quirks_sc", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_sc_quirks); + dentry = debugfs_create_file("quirks_sc", 0644, kbdev->mali_debugfs_directory, kbdev, + &fops_sc_quirks); if (IS_ERR_OR_NULL(dentry)) { dev_err(kbdev->dev, "Unable to create quirks_sc debugfs entry\n"); return dentry; } - dentry = debugfs_create_file("quirks_tiler", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_tiler_quirks); + dentry = debugfs_create_file("quirks_tiler", 0644, kbdev->mali_debugfs_directory, kbdev, + &fops_tiler_quirks); if (IS_ERR_OR_NULL(dentry)) { dev_err(kbdev->dev, "Unable to create quirks_tiler debugfs entry\n"); return dentry; } - dentry = debugfs_create_file("quirks_mmu", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_mmu_quirks); + dentry = debugfs_create_file("quirks_mmu", 0644, kbdev->mali_debugfs_directory, kbdev, + &fops_mmu_quirks); if (IS_ERR_OR_NULL(dentry)) { dev_err(kbdev->dev, "Unable to create quirks_mmu debugfs entry\n"); return dentry; } - dentry = debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory, - kbdev, &fops_gpu_quirks); + dentry = debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory, kbdev, + &fops_gpu_quirks); if (IS_ERR_OR_NULL(dentry)) { dev_err(kbdev->dev, "Unable to create quirks_gpu debugfs entry\n"); return dentry; @@ -4971,17 +5091,17 @@ static struct dentry *init_debugfs(struct kbase_device *kbdev) if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { dentry = debugfs_create_file("protected_debug_mode", 0444, - kbdev->mali_debugfs_directory, kbdev, - &fops_protected_debug_mode); + kbdev->mali_debugfs_directory, kbdev, + &fops_protected_debug_mode); if (IS_ERR_OR_NULL(dentry)) { - dev_err(kbdev->dev, "Unable to create protected_debug_mode debugfs entry\n"); + dev_err(kbdev->dev, + "Unable to create protected_debug_mode debugfs entry\n"); return dentry; } } - dentry = debugfs_create_file("reset", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_trigger_reset); + dentry = debugfs_create_file("reset", 0644, kbdev->mali_debugfs_directory, kbdev, + &fops_trigger_reset); if (IS_ERR_OR_NULL(dentry)) { dev_err(kbdev->dev, "Unable to create reset debugfs entry\n"); return dentry; @@ -4998,9 +5118,8 @@ static struct dentry *init_debugfs(struct kbase_device *kbdev) #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ #if !MALI_USE_CSF - dentry = debugfs_create_file("serialize_jobs", 0644, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_serialize_jobs_debugfs_fops); + dentry = debugfs_create_file("serialize_jobs", 0644, kbdev->mali_debugfs_directory, kbdev, + &kbasep_serialize_jobs_debugfs_fops); if (IS_ERR_OR_NULL(dentry)) { dev_err(kbdev->dev, "Unable to create serialize_jobs debugfs entry\n"); return dentry; @@ -5030,87 +5149,125 @@ void kbase_device_debugfs_term(struct kbase_device *kbdev) } #endif /* CONFIG_DEBUG_FS */ +/** + * kbase_device_normalize_coherency_bitmap - Update the supported coherency + * bitmap for devices where the flags were incorrectly documented. + * + * @kbdev: Kbase device + * + * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly documented for tMIx, + * so force the correct value here. + * + * Return: u32 bitmap of the supported coherency modes. + */ +static u32 kbase_device_normalize_coherency_bitmap(struct kbase_device *kbdev) +{ + u32 supported_coherency_bitmap = kbdev->gpu_props.coherency_mode; + + if ((kbdev->gpu_props.gpu_id.product_model == GPU_ID_PRODUCT_TMIX) && + (supported_coherency_bitmap == COHERENCY_FEATURE_BIT(COHERENCY_ACE))) + supported_coherency_bitmap |= COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + + return supported_coherency_bitmap; +} + +/** + * kbase_device_supports_coherency_mode - Check if the GPU supports a coherency mode. + * + * @kbdev: Kbase device instance. + * @coherency_mode: Bitmask of requested coherency modes. + * + * The coherency interfaces supported by the individual GPU vary based on the + * hardware revision and architecture. For instance: + * * JM supports both ACE and ACE-lite. + * * CSF supports ACE-lite only. + * * Some GPUs explicitly don't support it + * + * Return: boolean indicating whether the current GPU supports the + * coherency mode. + */ +static bool kbase_device_supports_coherency_mode(struct kbase_device *kbdev, u32 coherency_mode) +{ + u32 supported_coherency_bitmap = kbase_device_normalize_coherency_bitmap(kbdev); + + /* In the case of invalid flags specified from the DT node, we want to exit + * early. + */ + if (coherency_mode > COHERENCY_NONE) { + dev_warn(kbdev->dev, "Ignoring unsupported coherency mode %u set from dtb", + coherency_mode); + return false; + } + + /* ACE coherency is a little different, since it is explicitly not supported + * on CSF GPUs. + */ + if (coherency_mode == COHERENCY_ACE) { + if (IS_ENABLED(MALI_USE_CSF) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)) { + dev_err(kbdev->dev, + "ACE coherency not supported on CSF, wrong DT configuration"); + return false; + } + } + + /* Finally, we need to know if the hardware supports it at all. */ + if (!(supported_coherency_bitmap & COHERENCY_FEATURE_BIT(coherency_mode))) { + dev_warn(kbdev->dev, "Device does not support coherency mode %u set from dtb", + coherency_mode); + return false; + } + + return true; +} + int kbase_device_coherency_init(struct kbase_device *kbdev) { -#if IS_ENABLED(CONFIG_OF) - u32 supported_coherency_bitmap = - kbdev->gpu_props.props.raw_props.coherency_mode; - const void *coherency_override_dts; - bool dma_coherent; - u32 override_coherency, gpu_id; - unsigned int prod_id; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - gpu_id &= GPU_ID_VERSION_PRODUCT_ID; - prod_id = gpu_id >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - /* Only for tMIx : - * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly - * documented for tMIx so force correct value here. - */ - if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == - GPU_ID2_PRODUCT_TMIX) - if (supported_coherency_bitmap == - COHERENCY_FEATURE_BIT(COHERENCY_ACE)) - supported_coherency_bitmap |= - COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); - -#endif /* CONFIG_OF */ + int err = 0; kbdev->system_coherency = COHERENCY_NONE; /* device tree may override the coherency */ -#if IS_ENABLED(CONFIG_OF) - /* treat "dma-coherency" as a synonym for ACE-lite */ - dma_coherent = of_dma_is_coherent(kbdev->dev->of_node); - coherency_override_dts = of_get_property(kbdev->dev->of_node, - "system-coherency", - NULL); - if (coherency_override_dts || dma_coherent) { - if (coherency_override_dts) { - override_coherency = be32_to_cpup(coherency_override_dts); - if (dma_coherent && override_coherency != COHERENCY_ACE_LITE) { - dev_err(kbdev->dev, - "system-coherency needs to be 0 when dma-coherent is set\n"); - return -EINVAL; - } - } else { - /* dma-coherent set and system-coherency not specified */ - override_coherency = COHERENCY_ACE_LITE; - } + if (IS_ENABLED(CONFIG_OF)) { + u32 override_coherency; + const void *coherency_override_dts; + bool dma_coherent; -#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - /* ACE coherency mode is not supported by Driver on CSF GPUs. - * Return an error to signal the invalid device tree configuration. + /* treat "dma-coherency" as a synonym for ACE-lite */ + dma_coherent = of_dma_is_coherent(kbdev->dev->of_node); + coherency_override_dts = + of_get_property(kbdev->dev->of_node, "system-coherency", NULL); + + /* If there's no override, then we can skip the rest of the checks, and + * keep the default value of no coherency. */ - if (override_coherency == COHERENCY_ACE) { + if (!coherency_override_dts && !dma_coherent) + goto early_exit; + + if (coherency_override_dts) + override_coherency = be32_to_cpup(coherency_override_dts); + else + override_coherency = COHERENCY_ACE_LITE; + + if (dma_coherent && override_coherency != COHERENCY_ACE_LITE) { dev_err(kbdev->dev, - "ACE coherency not supported, wrong DT configuration"); - return -EINVAL; + "system-coherency needs to be 0 when dma-coherent is set!"); + err = -EINVAL; + goto early_exit; } -#endif - if ((override_coherency <= COHERENCY_NONE) && - (supported_coherency_bitmap & - COHERENCY_FEATURE_BIT(override_coherency))) { + if (!kbase_device_supports_coherency_mode(kbdev, override_coherency)) { + err = -EINVAL; + goto early_exit; + } - kbdev->system_coherency = override_coherency; + kbdev->system_coherency = override_coherency; - dev_info(kbdev->dev, - "Using coherency mode %u set from dtb", - override_coherency); - } else - dev_warn(kbdev->dev, - "Ignoring unsupported coherency mode %u set from dtb", - override_coherency); + dev_info(kbdev->dev, "Using coherency mode %u set from dtb", override_coherency); } +early_exit: + kbdev->gpu_props.coherency_mode = kbdev->system_coherency; -#endif /* CONFIG_OF */ - - kbdev->gpu_props.props.raw_props.coherency_mode = - kbdev->system_coherency; - - return 0; + return err; } @@ -5128,14 +5285,15 @@ int kbase_device_coherency_init(struct kbase_device *kbdev) * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t csg_scheduling_period_store(struct device *dev, - struct device_attribute *attr, +static ssize_t csg_scheduling_period_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kbase_device *kbdev; int ret; unsigned int csg_scheduling_period; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -5150,8 +5308,7 @@ static ssize_t csg_scheduling_period_store(struct device *dev, kbase_csf_scheduler_lock(kbdev); kbdev->csf.scheduler.csg_scheduling_period_ms = csg_scheduling_period; - dev_dbg(kbdev->dev, "CSG scheduling period: %ums\n", - csg_scheduling_period); + dev_dbg(kbdev->dev, "CSG scheduling period: %ums\n", csg_scheduling_period); kbase_csf_scheduler_unlock(kbdev); return count; @@ -5168,19 +5325,19 @@ static ssize_t csg_scheduling_period_store(struct device *dev, * * Return: The number of bytes output to @buf. */ -static ssize_t csg_scheduling_period_show(struct device *dev, - struct device_attribute *attr, +static ssize_t csg_scheduling_period_show(struct device *dev, struct device_attribute *attr, char *const buf) { struct kbase_device *kbdev; ssize_t ret; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - ret = scnprintf(buf, PAGE_SIZE, "%u\n", - kbdev->csf.scheduler.csg_scheduling_period_ms); + ret = scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->csf.scheduler.csg_scheduling_period_ms); return ret; } @@ -5199,14 +5356,15 @@ static DEVICE_ATTR_RW(csg_scheduling_period); * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t fw_timeout_store(struct device *dev, - struct device_attribute *attr, const char *buf, +static ssize_t fw_timeout_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kbase_device *kbdev; int ret; unsigned int fw_timeout; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -5239,12 +5397,13 @@ static ssize_t fw_timeout_store(struct device *dev, * * Return: The number of bytes output to @buf. */ -static ssize_t fw_timeout_show(struct device *dev, - struct device_attribute *attr, char *const buf) +static ssize_t fw_timeout_show(struct device *dev, struct device_attribute *attr, char *const buf) { struct kbase_device *kbdev; ssize_t ret; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; @@ -5271,23 +5430,28 @@ static DEVICE_ATTR_RW(fw_timeout); * * Return: @count if the function succeeded. An error code on failure. */ -static ssize_t idle_hysteresis_time_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t idle_hysteresis_time_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct kbase_device *kbdev; u32 dur = 0; + CSTD_UNUSED(attr); + kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; if (kstrtou32(buf, 0, &dur)) { dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time write operation.\n" - "Use format \n"); + "Use format \n"); return -EINVAL; } - kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur); + /* In sysFs, The unit of the input value of idle_hysteresis_time is us. + * But the unit of the input parameter of this function is ns, so multiply by 1000 + */ + kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur * NSEC_PER_USEC); return count; } @@ -5303,8 +5467,79 @@ static ssize_t idle_hysteresis_time_store(struct device *dev, * * Return: The number of bytes output to @buf. */ -static ssize_t idle_hysteresis_time_show(struct device *dev, - struct device_attribute *attr, char * const buf) +static ssize_t idle_hysteresis_time_show(struct device *dev, struct device_attribute *attr, + char *const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + u32 dur; + + CSTD_UNUSED(attr); + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + /* The unit of return value of idle_hysteresis_time_show is us, So divide by 1000.*/ + dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev) / NSEC_PER_USEC; + ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur); + + return ret; +} + +static DEVICE_ATTR_RW(idle_hysteresis_time); + +/** + * idle_hysteresis_time_ns_store - Store callback for CSF + * idle_hysteresis_time_ns sysfs file. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the idle_hysteresis_time_ns sysfs + * file is written to. + * + * This file contains values of the idle hysteresis duration in ns. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t idle_hysteresis_time_ns_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kbase_device *kbdev; + u32 dur = 0; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if (kstrtou32(buf, 0, &dur)) { + dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time_ns write operation.\n" + "Use format \n"); + return -EINVAL; + } + + kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur); + + return count; +} + +/** + * idle_hysteresis_time_ns_show - Show callback for CSF + * idle_hysteresis_time_ns sysfs entry. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current idle hysteresis duration in ns. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t idle_hysteresis_time_ns_show(struct device *dev, struct device_attribute *attr, + char *const buf) { struct kbase_device *kbdev; ssize_t ret; @@ -5320,7 +5555,7 @@ static ssize_t idle_hysteresis_time_show(struct device *dev, return ret; } -static DEVICE_ATTR_RW(idle_hysteresis_time); +static DEVICE_ATTR_RW(idle_hysteresis_time_ns); /** * mcu_shader_pwroff_timeout_show - Get the MCU shader Core power-off time value. @@ -5341,10 +5576,13 @@ static ssize_t mcu_shader_pwroff_timeout_show(struct device *dev, struct device_ struct kbase_device *kbdev = dev_get_drvdata(dev); u32 pwroff; + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; - pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); + /* The unit of return value of the function is us, So divide by 1000.*/ + pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev) / NSEC_PER_USEC; return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff); } @@ -5368,18 +5606,98 @@ static ssize_t mcu_shader_pwroff_timeout_store(struct device *dev, struct device struct kbase_device *kbdev = dev_get_drvdata(dev); u32 dur; + const struct kbase_pm_policy *current_policy; + bool always_on; + + CSTD_UNUSED(attr); + if (!kbdev) return -ENODEV; if (kstrtouint(buf, 0, &dur)) return -EINVAL; + current_policy = kbase_pm_get_policy(kbdev); + always_on = current_policy == &kbase_pm_always_on_policy_ops; + if (dur == 0 && !always_on) + return -EINVAL; + + /* In sysFs, The unit of the input value of mcu_shader_pwroff_timeout is us. + * But the unit of the input parameter of this function is ns, so multiply by 1000 + */ + kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur * NSEC_PER_USEC); + + return count; +} + +static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout); + +/** + * mcu_shader_pwroff_timeout_ns_show - Get the MCU shader Core power-off time value. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer for the sysfs file contents + * + * Get the internally recorded MCU shader Core power-off (nominal) timeout value. + * The unit of the value is in nanoseconds. + * + * Return: The number of bytes output to @buf if the + * function succeeded. A Negative value on failure. + */ +static ssize_t mcu_shader_pwroff_timeout_ns_show(struct device *dev, struct device_attribute *attr, + char *const buf) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + u32 pwroff; + + if (!kbdev) + return -ENODEV; + + pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); + return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff); +} + +/** + * mcu_shader_pwroff_timeout_ns_store - Set the MCU shader core power-off time value. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * The duration value (unit: nanoseconds) for configuring MCU Shader Core + * timer, when the shader cores' power transitions are delegated to the + * MCU (normal operational mode) + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t mcu_shader_pwroff_timeout_ns_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + u32 dur; + + const struct kbase_pm_policy *current_policy; + bool always_on; + + if (!kbdev) + return -ENODEV; + + if (kstrtouint(buf, 0, &dur)) + return -EINVAL; + + current_policy = kbase_pm_get_policy(kbdev); + always_on = current_policy == &kbase_pm_always_on_policy_ops; + if (dur == 0 && !always_on) + return -EINVAL; + kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur); return count; } -static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout); +static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout_ns); #endif /* MALI_USE_CSF */ @@ -5411,7 +5729,9 @@ static struct attribute *kbase_attrs[] = { &dev_attr_csg_scheduling_period.attr, &dev_attr_fw_timeout.attr, &dev_attr_idle_hysteresis_time.attr, + &dev_attr_idle_hysteresis_time_ns.attr, &dev_attr_mcu_shader_pwroff_timeout.attr, + &dev_attr_mcu_shader_pwroff_timeout_ns.attr, #endif /* !MALI_USE_CSF */ &dev_attr_power_policy.attr, &dev_attr_core_mask.attr, @@ -5425,12 +5745,9 @@ static struct attribute *kbase_attrs[] = { NULL }; -static struct attribute *kbase_mempool_attrs[] = { - &dev_attr_max_size.attr, - &dev_attr_lp_max_size.attr, - &dev_attr_ctx_default_max_size.attr, - NULL -}; +static struct attribute *kbase_mempool_attrs[] = { &dev_attr_max_size.attr, + &dev_attr_lp_max_size.attr, + &dev_attr_ctx_default_max_size.attr, NULL }; #define SYSFS_SCHEDULING_GROUP "scheduling" static const struct attribute_group kbase_scheduling_attr_group = { @@ -5462,25 +5779,18 @@ int kbase_sysfs_init(struct kbase_device *kbdev) if (err) return err; - err = sysfs_create_group(&kbdev->dev->kobj, - &kbase_scheduling_attr_group); + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group); if (err) { - dev_err(kbdev->dev, "Creation of %s sysfs group failed", - SYSFS_SCHEDULING_GROUP); - sysfs_remove_group(&kbdev->dev->kobj, - &kbase_attr_group); + dev_err(kbdev->dev, "Creation of %s sysfs group failed", SYSFS_SCHEDULING_GROUP); + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); return err; } - err = sysfs_create_group(&kbdev->dev->kobj, - &kbase_mempool_attr_group); + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_mempool_attr_group); if (err) { - dev_err(kbdev->dev, "Creation of %s sysfs group failed", - SYSFS_MEMPOOL_GROUP); - sysfs_remove_group(&kbdev->dev->kobj, - &kbase_scheduling_attr_group); - sysfs_remove_group(&kbdev->dev->kobj, - &kbase_attr_group); + dev_err(kbdev->dev, "Creation of %s sysfs group failed", SYSFS_MEMPOOL_GROUP); + sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group); + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); } return err; @@ -5555,8 +5865,7 @@ static int kbase_platform_device_probe(struct platform_device *pdev) if (err) { if (err == -EPROBE_DEFER) - dev_info(kbdev->dev, - "Device initialization Deferred\n"); + dev_info(kbdev->dev, "Device initialization Deferred\n"); else dev_err(kbdev->dev, "Device initialization failed\n"); @@ -5566,8 +5875,15 @@ static int kbase_platform_device_probe(struct platform_device *pdev) mutex_unlock(&kbase_probe_mutex); #endif } else { - dev_info(kbdev->dev, - "Probed as %s\n", dev_name(kbdev->mdev.this_device)); +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + /* Since upstream is not exporting mmap_min_addr, kbase at the + * moment is unable to track possible kernel changes via sysfs. + * Flag this out in a device info message. + */ + dev_info(kbdev->dev, KBASE_COMPILED_MMAP_MIN_ADDR_MSG); +#endif + + dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); kbase_increment_device_id(); #if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) mutex_unlock(&kbase_probe_mutex); @@ -5760,7 +6076,6 @@ static int kbase_device_runtime_resume(struct device *dev) } #endif /* KBASE_PM_RUNTIME */ - #ifdef KBASE_PM_RUNTIME /** * kbase_device_runtime_idle - Runtime idle callback from the OS. @@ -5817,7 +6132,7 @@ static struct platform_driver kbase_platform_driver = { .probe = kbase_platform_device_probe, .remove = kbase_platform_device_remove, .driver = { - .name = kbase_drv_name, + .name = KBASE_DRV_NAME, .pm = &kbase_pm_ops, .of_match_table = of_match_ptr(kbase_dt_ids), .probe_type = PROBE_PREFER_ASYNCHRONOUS, @@ -5863,9 +6178,8 @@ module_init(kbase_driver_init); module_exit(kbase_driver_exit); #endif MODULE_LICENSE("GPL"); -MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ - __stringify(BASE_UK_VERSION_MAJOR) "." \ - __stringify(BASE_UK_VERSION_MINOR) ")"); +MODULE_VERSION(MALI_RELEASE_NAME " (UK version " __stringify(BASE_UK_VERSION_MAJOR) "." __stringify( + BASE_UK_VERSION_MINOR) ")"); MODULE_SOFTDEP("pre: memory_group_manager"); MODULE_INFO(import_ns, "DMA_BUF"); @@ -5884,12 +6198,11 @@ void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value) trace_mali_pm_status(dev_id, event, value); } -void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id) +void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, + u8 atom_id) { - trace_mali_job_slots_event(dev_id, event, - (kctx != NULL ? kctx->tgid : 0), - (kctx != NULL ? kctx->pid : 0), - atom_id); + trace_mali_job_slots_event(dev_id, event, (kctx != NULL ? kctx->tgid : 0), + (kctx != NULL ? kctx->pid : 0), atom_id); } void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h index 7e885ca46ce9..0c794e2e90bc 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,5 +36,3 @@ static inline void mali_kbase_print_cs_experimental(void) } #endif /* _KBASE_CS_EXPERIMENTAL_H_ */ - - diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c index dc6feb95a391..871d7d0b8395 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c @@ -53,8 +53,6 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev) kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; kbdev->as_free = as_present; /* All ASs initially free */ - memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx)); - return 0; } @@ -69,10 +67,10 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev) } } -void kbase_ctx_sched_init_ctx(struct kbase_context *kctx) +int kbase_ctx_sched_init_ctx(struct kbase_context *kctx) { kctx->as_nr = KBASEP_AS_NR_INVALID; - atomic_set(&kctx->refcount, 0); + return 0; } /* kbasep_ctx_sched_find_as_for_ctx - Find a free address space @@ -93,8 +91,7 @@ static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx) lockdep_assert_held(&kbdev->hwaccess_lock); /* First check if the previously assigned AS is available */ - if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && - (kbdev->as_free & (1u << kctx->as_nr))) + if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && (kbdev->as_free & (1u << kctx->as_nr))) return kctx->as_nr; /* The previously assigned AS was taken, we'll be returning any free @@ -125,22 +122,19 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) * assigned the same address space before. */ if (free_as != kctx->as_nr) { - struct kbase_context *const prev_kctx = - kbdev->as_to_kctx[free_as]; + struct kbase_context *const prev_kctx = kbdev->as_to_kctx[free_as]; if (prev_kctx) { WARN_ON(atomic_read(&prev_kctx->refcount) != 0); kbase_mmu_disable(prev_kctx); - KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( - kbdev, prev_kctx->id); + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, + prev_kctx->id); prev_kctx->as_nr = KBASEP_AS_NR_INVALID; } kctx->as_nr = free_as; kbdev->as_to_kctx[free_as] = kctx; - KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( - kbdev, kctx->id, free_as); - kbase_mmu_update(kbdev, &kctx->mmu, - kctx->as_nr); + KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS(kbdev, kctx->id, free_as); + kbase_mmu_update(kbdev, &kctx->mmu, kctx->as_nr); } } else { atomic_dec(&kctx->refcount); @@ -239,14 +233,14 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) WARN_ON(!kbdev->pm.backend.gpu_powered); + kbdev->mmu_unresponsive = false; + for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { struct kbase_context *kctx; - kbdev->as[i].is_unresponsive = false; #if MALI_USE_CSF if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) { - kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, - MCU_AS_NR); + kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, MCU_AS_NR); continue; } #endif @@ -255,17 +249,14 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) if (atomic_read(&kctx->refcount)) { WARN_ON(kctx->as_nr != i); - kbase_mmu_update(kbdev, &kctx->mmu, - kctx->as_nr); - kbase_ctx_flag_clear(kctx, - KCTX_AS_DISABLED_ON_FAULT); + kbase_mmu_update(kbdev, &kctx->mmu, kctx->as_nr); + kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); } else { /* This context might have been assigned an * AS before, clear it. */ if (kctx->as_nr != KBASEP_AS_NR_INVALID) { - KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( - kbdev, kctx->id); + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id); kbdev->as_to_kctx[kctx->as_nr] = NULL; kctx->as_nr = KBASEP_AS_NR_INVALID; } @@ -276,8 +267,7 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) } } -struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( - struct kbase_device *kbdev, size_t as_nr) +struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount(struct kbase_device *kbdev, size_t as_nr) { unsigned long flags; struct kbase_context *found_kctx = NULL; @@ -292,7 +282,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( found_kctx = kbdev->as_to_kctx[as_nr]; - if (!WARN_ON(found_kctx == NULL)) + if (found_kctx) kbase_ctx_sched_retain_ctx_refcount(found_kctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -300,8 +290,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( return found_kctx; } -struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, - size_t as_nr) +struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, size_t as_nr) { unsigned long flags; struct kbase_context *found_kctx; @@ -315,8 +304,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, return found_kctx; } -struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( - struct kbase_device *kbdev, size_t as_nr) +struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock(struct kbase_device *kbdev, size_t as_nr) { struct kbase_context *found_kctx; @@ -352,7 +340,7 @@ bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx) kbase_ctx_sched_retain_ctx_refcount(kctx); KBASE_KTRACE_ADD(kctx->kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, - kbase_ktrace_get_ctx_refcnt(kctx)); + kbase_ktrace_get_ctx_refcnt(kctx)); result = true; } @@ -389,7 +377,7 @@ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx) spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); if (!WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID) && - !WARN_ON(atomic_read(&kctx->refcount) <= 0)) + !WARN_ON(atomic_read(&kctx->refcount) <= 0)) kbase_ctx_sched_release_ctx(kctx); spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); @@ -413,8 +401,7 @@ bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx) mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && - (kctx == kbdev->as_to_kctx[kctx->as_nr])) { + if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && (kctx == kbdev->as_to_kctx[kctx->as_nr])) { atomic_inc(&kctx->refcount); if (kbdev->as_free & (1u << kctx->as_nr)) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h index 5a8d17547b7b..397724267fdf 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -65,8 +65,10 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev); * * This must be called during context initialization before any other context * scheduling functions are called on @kctx + * + * Return: 0 */ -void kbase_ctx_sched_init_ctx(struct kbase_context *kctx); +int kbase_ctx_sched_init_ctx(struct kbase_context *kctx); /** * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context @@ -157,8 +159,7 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev); * as being busy or return NULL on failure, indicating that no context was found * in as_nr. */ -struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( - struct kbase_device *kbdev, size_t as_nr); +struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount(struct kbase_device *kbdev, size_t as_nr); /** * kbase_ctx_sched_as_to_ctx - Lookup a context based on its current address @@ -169,8 +170,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( * Return: a valid struct kbase_context on success or NULL on failure, * indicating that no context was found in as_nr. */ -struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, - size_t as_nr); +struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, size_t as_nr); /** * kbase_ctx_sched_as_to_ctx_nolock - Lookup a context based on its current @@ -184,8 +184,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, * Return: a valid struct kbase_context on success or NULL on failure, * indicating that no context was found in as_nr. */ -struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( - struct kbase_device *kbdev, size_t as_nr); +struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock(struct kbase_device *kbdev, size_t as_nr); /** * kbase_ctx_sched_inc_refcount_nolock - Refcount a context as being busy, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug.c b/drivers/gpu/arm/bifrost/mali_kbase_debug.c index 6d3b1093b442..77442feb1fda 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2014, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,10 +21,7 @@ #include -static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { - NULL, - NULL -}; +static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { NULL, NULL }; void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param) { @@ -38,4 +35,3 @@ void kbasep_debug_assert_call_hook(void) kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param); } KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook); - diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug.h b/drivers/gpu/arm/bifrost/mali_kbase_debug.h index 9d6ff73aa4e0..876ecdd5c617 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2015, 2017, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,7 +37,7 @@ #else #define KBASE_DEBUG_DISABLE_ASSERTS 1 #endif -#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ +#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ /** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ typedef void kbase_debug_assert_hook(void *); @@ -53,8 +53,7 @@ struct kbasep_debug_assert_cb { * KBASE_DEBUG_SKIP_FUNCTION_NAME */ #if !KBASE_DEBUG_SKIP_TRACE -#define KBASEP_DEBUG_PRINT_TRACE \ - "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) +#define KBASEP_DEBUG_PRINT_TRACE "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) #if !KBASE_DEBUG_SKIP_FUNCTION_NAME #define KBASEP_DEBUG_PRINT_FUNCTION __func__ #else @@ -75,11 +74,11 @@ struct kbasep_debug_assert_cb { */ /* Select the correct system output function*/ #ifdef CONFIG_MALI_BIFROST_DEBUG -#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) \ - do { \ - pr_err("Mali: %s function:%s ", trace, function); \ - pr_err(__VA_ARGS__); \ - pr_err("\n"); \ +#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) \ + do { \ + pr_err("Mali: %s function:%s ", trace, function); \ + pr_err(__VA_ARGS__); \ + pr_err("\n"); \ } while (false) #else #define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() @@ -99,8 +98,7 @@ struct kbasep_debug_assert_cb { * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 * */ -#define KBASE_DEBUG_ASSERT(expr) \ - KBASE_DEBUG_ASSERT_MSG(expr, #expr) +#define KBASE_DEBUG_ASSERT(expr) KBASE_DEBUG_ASSERT_MSG(expr, #expr) #if KBASE_DEBUG_DISABLE_ASSERTS #define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() @@ -114,15 +112,16 @@ struct kbasep_debug_assert_cb { * * This macro does nothing if the flag KBASE_DEBUG_DISABLE_ASSERTS is set to 1 */ -#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ - do { \ - if (!(expr)) { \ - KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\ - KBASE_CALL_ASSERT_HOOK();\ - BUG();\ - } \ - } while (false) -#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ +#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ + do { \ + if (!(expr)) { \ + KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, \ + KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__); \ + KBASE_CALL_ASSERT_HOOK(); \ + BUG(); \ + } \ + } while (false) +#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ /** * KBASE_DEBUG_CODE() - Executes the code inside the macro only in debug mode @@ -132,7 +131,7 @@ struct kbasep_debug_assert_cb { #define KBASE_DEBUG_CODE(X) X #else #define KBASE_DEBUG_CODE(X) CSTD_NOP() -#endif /* CONFIG_MALI_BIFROST_DEBUG */ +#endif /* CONFIG_MALI_BIFROST_DEBUG */ /** @} */ @@ -163,4 +162,4 @@ void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param */ void kbasep_debug_assert_call_hook(void); -#endif /* _KBASE_DEBUG_H */ +#endif /* _KBASE_DEBUG_H */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c index d6518b476115..679dd75187db 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,8 +28,8 @@ static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) { struct list_head *event_list = &kbdev->job_fault_event_list; - unsigned long flags; - bool ret; + unsigned long flags; + bool ret; spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ret = !list_empty(event_list); @@ -48,18 +48,17 @@ static void kbase_ctx_remove_pending_event(struct kbase_context *kctx) list_for_each_entry(event, event_list, head) { if (event->katom->kctx == kctx) { list_del(&event->head); - spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); - + WARN_ON_ONCE(&event->job_fault_work != kctx->job_fault_work); wake_up(&kctx->kbdev->job_fault_resume_wq); - flush_work(&event->job_fault_work); - /* job_fault_event_list can only have a single atom for * each context. */ - return; + break; } } spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); + if (kctx->job_fault_work) + flush_work(kctx->job_fault_work); } static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) @@ -67,7 +66,7 @@ static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) struct kbase_device *kbdev = kctx->kbdev; struct list_head *event_list = &kctx->kbdev->job_fault_event_list; struct base_job_fault_event *event; - unsigned long flags; + unsigned long flags; spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); if (list_empty(event_list)) { @@ -76,8 +75,7 @@ static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) } list_for_each_entry(event, event_list, head) { if (event->katom->kctx == kctx) { - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, - flags); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); return false; } } @@ -89,8 +87,8 @@ static int wait_for_job_fault(struct kbase_device *kbdev) { #if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE int ret = wait_event_interruptible_timeout(kbdev->job_fault_wq, - kbase_is_job_fault_event_pending(kbdev), - msecs_to_jiffies(2000)); + kbase_is_job_fault_event_pending(kbdev), + msecs_to_jiffies(2000)); if (ret == 0) return -EAGAIN; else if (ret > 0) @@ -99,17 +97,17 @@ static int wait_for_job_fault(struct kbase_device *kbdev) return ret; #else return wait_event_interruptible(kbdev->job_fault_wq, - kbase_is_job_fault_event_pending(kbdev)); + kbase_is_job_fault_event_pending(kbdev)); #endif } /* wait until the fault happen and copy the event */ static int kbase_job_fault_event_wait(struct kbase_device *kbdev, - struct base_job_fault_event *event) + struct base_job_fault_event *event) { - struct list_head *event_list = &kbdev->job_fault_event_list; + struct list_head *event_list = &kbdev->job_fault_event_list; struct base_job_fault_event *event_in; - unsigned long flags; + unsigned long flags; spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); while (list_empty(event_list)) { @@ -124,29 +122,27 @@ static int kbase_job_fault_event_wait(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); } - event_in = list_entry(event_list->next, - struct base_job_fault_event, head); + event_in = list_entry(event_list->next, struct base_job_fault_event, head); event->event_code = event_in->event_code; event->katom = event_in->katom; spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); return 0; - } /* remove the event from the queue */ -static struct base_job_fault_event *kbase_job_fault_event_dequeue( - struct kbase_device *kbdev, struct list_head *event_list) +static struct base_job_fault_event *kbase_job_fault_event_dequeue(struct kbase_device *kbdev, + struct list_head *event_list) { struct base_job_fault_event *event; - event = list_entry(event_list->next, - struct base_job_fault_event, head); + CSTD_UNUSED(kbdev); + + event = list_entry(event_list->next, struct base_job_fault_event, head); list_del(event_list->next); return event; - } /* Remove all the following atoms after the failed atom in the same context @@ -157,27 +153,33 @@ static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) { struct list_head *event_list = &kctx->job_fault_resume_event_list; + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + while (!list_empty(event_list)) { struct base_job_fault_event *event; event = kbase_job_fault_event_dequeue(kctx->kbdev, - &kctx->job_fault_resume_event_list); - kbase_jd_done_worker(&event->katom->work); + &kctx->job_fault_resume_event_list); + WARN_ON(work_pending(&event->katom->work)); + INIT_WORK(&event->katom->work, kbase_jd_done_worker); + queue_work(kctx->jctx.job_done_wq, &event->katom->work); } - } static void kbase_job_fault_resume_worker(struct work_struct *data) { - struct base_job_fault_event *event = container_of(data, - struct base_job_fault_event, job_fault_work); + struct base_job_fault_event *event = + container_of(data, struct base_job_fault_event, job_fault_work); + struct kbase_device *kbdev; struct kbase_context *kctx; struct kbase_jd_atom *katom; + unsigned long flags; katom = event->katom; kctx = katom->kctx; + kbdev = kctx->kbdev; - dev_info(kctx->kbdev->dev, "Job dumping wait\n"); + dev_info(kbdev->dev, "Job dumping wait\n"); /* When it was waked up, it need to check if queue is empty or the * failed atom belongs to different context. If yes, wake up. Both @@ -185,25 +187,26 @@ static void kbase_job_fault_resume_worker(struct work_struct *data) * should never happen that the job_fault_event_list has the two * atoms belong to the same context. */ - wait_event(kctx->kbdev->job_fault_resume_wq, - kbase_ctx_has_no_event_pending(kctx)); + wait_event(kbdev->job_fault_resume_wq, kbase_ctx_has_no_event_pending(kctx)); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); atomic_set(&kctx->job_fault_count, 0); - kbase_jd_done_worker(&katom->work); + WARN_ON(work_pending(&katom->work)); + INIT_WORK(&katom->work, kbase_jd_done_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); /* In case the following atoms were scheduled during failed job dump * the job_done_worker was held. We need to rerun it after the dump * was finished */ kbase_job_fault_resume_event_cleanup(kctx); - - dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n"); + dev_info(kbdev->dev, "Job dumping finish, resume scheduler\n"); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -static struct base_job_fault_event *kbase_job_fault_event_queue( - struct list_head *event_list, - struct kbase_jd_atom *atom, - u32 completion_code) +static struct base_job_fault_event *kbase_job_fault_event_queue(struct list_head *event_list, + struct kbase_jd_atom *atom, + u32 completion_code) { struct base_job_fault_event *event; @@ -215,28 +218,24 @@ static struct base_job_fault_event *kbase_job_fault_event_queue( list_add_tail(&event->head, event_list); return event; - } -static void kbase_job_fault_event_post(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, u32 completion_code) +static void kbase_job_fault_event_post(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + u32 completion_code) { struct base_job_fault_event *event; unsigned long flags; spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, - katom, completion_code); + event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, katom, completion_code); + INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker); + katom->kctx->job_fault_work = &event->job_fault_work; + wake_up_interruptible(&kbdev->job_fault_wq); + queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work); spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - wake_up_interruptible(&kbdev->job_fault_wq); - - INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker); - queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work); - - dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d", - katom->kctx->tgid, katom->kctx->id); - + dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d", katom->kctx->tgid, + katom->kctx->id); } /* @@ -246,8 +245,7 @@ static void kbase_job_fault_event_post(struct kbase_device *kbdev, * Create a Wait queue to wait until the job dump finish */ -bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, - u32 completion_code) +bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, u32 completion_code) { struct kbase_context *kctx = katom->kctx; @@ -256,11 +254,9 @@ bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, * If the atom belongs to different context, it can be dumped */ if (atomic_read(&kctx->job_fault_count) > 0) { - kbase_job_fault_event_queue( - &kctx->job_fault_resume_event_list, - katom, completion_code); - dev_info(kctx->kbdev->dev, "queue:%d\n", - kbase_jd_atom_id(kctx, katom)); + kbase_job_fault_event_queue(&kctx->job_fault_resume_event_list, katom, + completion_code); + dev_info(kctx->kbdev->dev, "queue:%d\n", kbase_jd_atom_id(kctx, katom)); return true; } @@ -268,25 +264,19 @@ bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, return false; if (atomic_read(&kctx->kbdev->job_fault_debug) > 0) { - if (completion_code != BASE_JD_EVENT_DONE) { - if (kbase_job_fault_get_reg_snapshot(kctx) == false) { dev_warn(kctx->kbdev->dev, "get reg dump failed\n"); return false; } - kbase_job_fault_event_post(kctx->kbdev, katom, - completion_code); + kbase_job_fault_event_post(kctx->kbdev, katom, completion_code); atomic_inc(&kctx->job_fault_count); - dev_info(kctx->kbdev->dev, "post:%d\n", - kbase_jd_atom_id(kctx, katom)); + dev_info(kctx->kbdev->dev, "post:%d\n", kbase_jd_atom_id(kctx, katom)); return true; - } } return false; - } static int debug_job_fault_show(struct seq_file *m, void *v) @@ -296,16 +286,15 @@ static int debug_job_fault_show(struct seq_file *m, void *v) struct kbase_context *kctx = event->katom->kctx; int i; - dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d", - kctx->tgid, kctx->id, event->reg_offset); + dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d", kctx->tgid, kctx->id, + event->reg_offset); if (kctx->reg_dump == NULL) { dev_warn(kbdev->dev, "reg dump is NULL"); return -1; } - if (kctx->reg_dump[event->reg_offset] == - REGISTER_DUMP_TERMINATION_FLAG) { + if (kctx->reg_dump[event->reg_offset] == REGISTER_DUMP_TERMINATION_FLAG) { /* Return the error here to stop the read. And the * following next() will not be called. The stop can * get the real event resource and release it @@ -317,18 +306,14 @@ static int debug_job_fault_show(struct seq_file *m, void *v) seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id); for (i = 0; i < 50; i++) { - if (kctx->reg_dump[event->reg_offset] == - REGISTER_DUMP_TERMINATION_FLAG) { + if (kctx->reg_dump[event->reg_offset] == REGISTER_DUMP_TERMINATION_FLAG) { break; } - seq_printf(m, "%08x: %08x\n", - kctx->reg_dump[event->reg_offset], - kctx->reg_dump[1+event->reg_offset]); + seq_printf(m, "%08x: %08x\n", kctx->reg_dump[event->reg_offset], + kctx->reg_dump[1 + event->reg_offset]); event->reg_offset += 2; - } - return 0; } static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos) @@ -336,8 +321,7 @@ static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos) struct kbase_device *kbdev = m->private; struct base_job_fault_event *event = (struct base_job_fault_event *)v; - dev_info(kbdev->dev, "debug job fault seq next:%d, %d", - event->reg_offset, (int)*pos); + dev_info(kbdev->dev, "debug job fault seq next:%d, %d", event->reg_offset, (int)*pos); return event; } @@ -392,14 +376,12 @@ static void debug_job_fault_stop(struct seq_file *m, void *v) spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); if (!list_empty(&kbdev->job_fault_event_list)) { - kbase_job_fault_event_dequeue(kbdev, - &kbdev->job_fault_event_list); + kbase_job_fault_event_dequeue(kbdev, &kbdev->job_fault_event_list); wake_up(&kbdev->job_fault_resume_wq); } spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); dev_info(kbdev->dev, "debug job fault seq stop stage 2"); } - } static const struct seq_operations ops = { @@ -423,16 +405,14 @@ static int debug_job_fault_open(struct inode *in, struct file *file) ((struct seq_file *)file->private_data)->private = kbdev; dev_info(kbdev->dev, "debug job fault seq open"); - return 0; - } static int debug_job_fault_release(struct inode *in, struct file *file) { struct kbase_device *kbdev = in->i_private; struct list_head *event_list = &kbdev->job_fault_event_list; - unsigned long flags; + unsigned long flags; seq_release(in, file); @@ -479,28 +459,23 @@ static const struct file_operations kbasep_debug_job_fault_fops = { */ void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev) { - debugfs_create_file("job_fault", 0400, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_debug_job_fault_fops); + debugfs_create_file("job_fault", 0400, kbdev->mali_debugfs_directory, kbdev, + &kbasep_debug_job_fault_fops); } - int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) { - INIT_LIST_HEAD(&kbdev->job_fault_event_list); init_waitqueue_head(&(kbdev->job_fault_wq)); init_waitqueue_head(&(kbdev->job_fault_resume_wq)); spin_lock_init(&kbdev->job_fault_event_lock); - kbdev->job_fault_resume_workq = alloc_workqueue( - "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); + kbdev->job_fault_resume_workq = + alloc_workqueue("kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); if (!kbdev->job_fault_resume_workq) return -ENOMEM; - atomic_set(&kbdev->job_fault_debug, 0); - return 0; } @@ -512,25 +487,21 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) destroy_workqueue(kbdev->job_fault_resume_workq); } - /* * Initialize the relevant data structure per context */ int kbase_debug_job_fault_context_init(struct kbase_context *kctx) { - /* We need allocate double size register range * Because this memory will keep the register address and value */ kctx->reg_dump = vmalloc(0x4000 * 2); if (kctx->reg_dump != NULL) { - if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == - false) { + if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) { vfree(kctx->reg_dump); kctx->reg_dump = NULL; } INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); - atomic_set(&kctx->job_fault_count, 0); } return 0; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h index 059d9c4553bb..0d7d2be28575 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,6 +32,8 @@ * per device and initialize the required lists. * @kbdev: Device pointer * + * This function must be called only when a kbase device is initialized. + * * Return: Zero on success or a negative error code. */ int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); @@ -50,9 +52,11 @@ void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev); void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); /** - * kbase_debug_job_fault_context_init - Initialize the relevant - * data structure per context + * kbase_debug_job_fault_context_init - Initialize the relevant data structure per context * @kctx: KBase context pointer + * + * This function must be called only when a kbase context is instantiated. + * * Return: 0 on success */ int kbase_debug_job_fault_context_init(struct kbase_context *kctx); @@ -93,8 +97,7 @@ void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx); * * Return: true if dump is going on */ -bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, - u32 completion_code); +bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, u32 completion_code); /** * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers @@ -105,8 +108,7 @@ bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, * * Return: true if initializing successfully */ -bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, - int reg_range); +bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, int reg_range); /** * kbase_job_fault_get_reg_snapshot - Read the interested registers for @@ -118,4 +120,4 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, */ bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx); -#endif /*_KBASE_DEBUG_JOB_FAULT_H*/ +#endif /*_KBASE_DEBUG_JOB_FAULT_H*/ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c index 418bb19086bb..c92fb9e0957e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c @@ -24,6 +24,7 @@ */ #include "mali_kbase_debug_mem_allocs.h" +#include "mali_kbase_reg_track.h" #include "mali_kbase.h" #include @@ -34,8 +35,7 @@ /** * debug_zone_mem_allocs_show - Show information from specific rbtree - * @zone: Name of GPU virtual memory zone - * @rbtree: Pointer to the root of the rbtree associated with @zone + * @zone: The memory zone to be displayed * @sfile: The debugfs entry * * This function is called to show information about all the GPU allocations of a @@ -43,29 +43,25 @@ * The information like the start virtual address and size (in bytes) is shown for * every GPU allocation mapped in the zone. */ -static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struct seq_file *sfile) +static void debug_zone_mem_allocs_show(struct kbase_reg_zone *zone, struct seq_file *sfile) { struct rb_node *p; + struct rb_root *rbtree = &zone->reg_rbtree; struct kbase_va_region *reg; - const char *type_names[5] = { - "Native", - "Imported UMM", - "Imported user buf", - "Alias", - "Raw" - }; + const char *type_names[5] = { "Native", "Imported UMM", "Imported user buf", "Alias", + "Raw" }; #define MEM_ALLOCS_HEADER \ " VA, VA size, Commit size, Flags, Mem type\n" - seq_printf(sfile, "Zone name: %s\n:", zone); + seq_printf(sfile, "Zone name: %s\n:", kbase_reg_zone_get_name(zone->id)); seq_printf(sfile, MEM_ALLOCS_HEADER); for (p = rb_first(rbtree); p; p = rb_next(p)) { reg = rb_entry(p, struct kbase_va_region, rblink); if (!(reg->flags & KBASE_REG_FREE)) { seq_printf(sfile, "%16llx, %16zx, %16zx, %8lx, %s\n", - reg->start_pfn << PAGE_SHIFT, reg->nr_pages << PAGE_SHIFT, - kbase_reg_current_backed_size(reg) << PAGE_SHIFT, - reg->flags, type_names[reg->gpu_alloc->type]); + reg->start_pfn << PAGE_SHIFT, reg->nr_pages << PAGE_SHIFT, + kbase_reg_current_backed_size(reg) << PAGE_SHIFT, reg->flags, + type_names[reg->gpu_alloc->type]); } } } @@ -82,18 +78,17 @@ static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struc static int debug_ctx_mem_allocs_show(struct seq_file *sfile, void *data) { struct kbase_context *const kctx = sfile->private; + enum kbase_memory_zone zone_idx; + + CSTD_UNUSED(data); kbase_gpu_vm_lock(kctx); + for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) { + struct kbase_reg_zone *zone; - debug_zone_mem_allocs_show("SAME_VA:", &kctx->reg_rbtree_same, sfile); - debug_zone_mem_allocs_show("CUSTOM_VA:", &kctx->reg_rbtree_custom, sfile); - debug_zone_mem_allocs_show("EXEC_VA:", &kctx->reg_rbtree_exec, sfile); - -#if MALI_USE_CSF - debug_zone_mem_allocs_show("EXEC_VA_FIXED:", &kctx->reg_rbtree_exec_fixed, sfile); - debug_zone_mem_allocs_show("FIXED_VA:", &kctx->reg_rbtree_fixed, sfile); -#endif /* MALI_USE_CSF */ - + zone = &kctx->reg_zone[zone_idx]; + debug_zone_mem_allocs_show(zone, sfile); + } kbase_gpu_vm_unlock(kctx); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c index ce87a0070b72..eb587bd7f6e9 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,21 +31,21 @@ #if IS_ENABLED(CONFIG_DEBUG_FS) -#define SHOW_GPU_MEM_DATA(type, format) \ -{ \ - unsigned int i, j; \ - const type *ptr = (type *)cpu_addr; \ - const unsigned int col_width = sizeof(type); \ - const unsigned int row_width = (col_width == sizeof(u64)) ? 32 : 16; \ - const unsigned int num_cols = row_width / col_width; \ - for (i = 0; i < PAGE_SIZE; i += row_width) { \ - seq_printf(m, "%016llx:", gpu_addr + i); \ - for (j = 0; j < num_cols; j++) \ - seq_printf(m, format, ptr[j]); \ - ptr += num_cols; \ - seq_putc(m, '\n'); \ - } \ -} +#define SHOW_GPU_MEM_DATA(type, format) \ + { \ + unsigned int i, j; \ + const type *ptr = (type *)cpu_addr; \ + const unsigned int col_width = sizeof(type); \ + const unsigned int row_width = (col_width == sizeof(u64)) ? 32 : 16; \ + const unsigned int num_cols = row_width / col_width; \ + for (i = 0; i < PAGE_SIZE; i += row_width) { \ + seq_printf(m, "%016llx:", gpu_addr + i); \ + for (j = 0; j < num_cols; j++) \ + seq_printf(m, format, ptr[j]); \ + ptr += num_cols; \ + seq_putc(m, '\n'); \ + } \ + } struct debug_mem_mapping { struct list_head node; @@ -76,8 +76,8 @@ static void *debug_mem_start(struct seq_file *m, loff_t *_pos) loff_t pos = *_pos; list_for_each_entry(map, &mem_data->mapping_list, node) { - if (pos >= map->nr_pages) { - pos -= map->nr_pages; + if (pos >= (loff_t)map->nr_pages) { + pos -= (loff_t)map->nr_pages; } else { data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) @@ -94,6 +94,8 @@ static void *debug_mem_start(struct seq_file *m, loff_t *_pos) static void debug_mem_stop(struct seq_file *m, void *v) { + CSTD_UNUSED(m); + kfree(v); } @@ -138,8 +140,8 @@ static int debug_mem_show(struct seq_file *m, void *v) kbase_gpu_vm_lock(mem_data->kctx); if (data->offset >= map->alloc->nents) { - seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn + - data->offset) << PAGE_SHIFT); + seq_printf(m, "%016llx: Unbacked page\n\n", + (map->start_pfn + data->offset) << PAGE_SHIFT); goto out; } @@ -189,13 +191,13 @@ static const struct seq_operations ops = { .show = debug_mem_show, }; -static int debug_mem_zone_open(struct rb_root *rbtree, - struct debug_mem_data *mem_data) +static int debug_mem_zone_open(struct kbase_reg_zone *zone, struct debug_mem_data *mem_data) { int ret = 0; struct rb_node *p; struct kbase_va_region *reg; struct debug_mem_mapping *mapping; + struct rb_root *rbtree = &zone->reg_rbtree; for (p = rb_first(rbtree); p; p = rb_next(p)) { reg = rb_entry(p, struct kbase_va_region, rblink); @@ -233,8 +235,9 @@ static int debug_mem_open(struct inode *i, struct file *file) struct kbase_context *const kctx = i->i_private; struct debug_mem_data *mem_data; int ret; + enum kbase_memory_zone idx; - if (get_file_rcu(kctx->filp) == 0) + if (!kbase_file_inc_fops_count_unless_closed(kctx->kfile)) return -ENOENT; /* Check if file was opened in write mode. GPU memory contents @@ -263,38 +266,16 @@ static int debug_mem_open(struct inode *i, struct file *file) mem_data->column_width = kctx->mem_view_column_width; - ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; - } + for (idx = 0; idx < CONTEXT_ZONE_MAX; idx++) { + struct kbase_reg_zone *zone = &kctx->reg_zone[idx]; - ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; + ret = debug_mem_zone_open(zone, mem_data); + if (ret != 0) { + kbase_gpu_vm_unlock(kctx); + goto out; + } } - ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; - } - -#if MALI_USE_CSF - ret = debug_mem_zone_open(&kctx->reg_rbtree_exec_fixed, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; - } - - ret = debug_mem_zone_open(&kctx->reg_rbtree_fixed, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; - } -#endif - kbase_gpu_vm_unlock(kctx); ((struct seq_file *)file->private_data)->private = mem_data; @@ -307,7 +288,7 @@ out: struct debug_mem_mapping *mapping; mapping = list_first_entry(&mem_data->mapping_list, - struct debug_mem_mapping, node); + struct debug_mem_mapping, node); kbase_mem_phy_alloc_put(mapping->alloc); list_del(&mapping->node); kfree(mapping); @@ -316,7 +297,7 @@ out: } seq_release(i, file); open_fail: - fput(kctx->filp); + kbase_file_dec_fops_count(kctx->kfile); return ret; } @@ -337,7 +318,7 @@ static int debug_mem_release(struct inode *inode, struct file *file) while (!list_empty(&mem_data->mapping_list)) { mapping = list_first_entry(&mem_data->mapping_list, - struct debug_mem_mapping, node); + struct debug_mem_mapping, node); kbase_mem_phy_alloc_put(mapping->alloc); list_del(&mapping->node); kfree(mapping); @@ -346,13 +327,13 @@ static int debug_mem_release(struct inode *inode, struct file *file) kfree(mem_data); } - fput(kctx->filp); + kbase_file_dec_fops_count(kctx->kfile); return 0; } -static ssize_t debug_mem_write(struct file *file, const char __user *ubuf, - size_t count, loff_t *ppos) +static ssize_t debug_mem_write(struct file *file, const char __user *ubuf, size_t count, + loff_t *ppos) { struct kbase_context *const kctx = file->private_data; unsigned int column_width = 0; @@ -365,14 +346,14 @@ static ssize_t debug_mem_write(struct file *file, const char __user *ubuf, if (ret) return ret; if (!is_power_of_2(column_width)) { - dev_dbg(kctx->kbdev->dev, - "Column width %u not a multiple of power of 2", column_width); - return -EINVAL; + dev_dbg(kctx->kbdev->dev, "Column width %u not a multiple of power of 2", + column_width); + return -EINVAL; } if (column_width > 8) { - dev_dbg(kctx->kbdev->dev, - "Column width %u greater than 8 not supported", column_width); - return -EINVAL; + dev_dbg(kctx->kbdev->dev, "Column width %u greater than 8 not supported", + column_width); + return -EINVAL; } kbase_gpu_vm_lock(kctx); @@ -382,29 +363,25 @@ static ssize_t debug_mem_write(struct file *file, const char __user *ubuf, return count; } -static const struct file_operations kbase_debug_mem_view_fops = { - .owner = THIS_MODULE, - .open = debug_mem_open, - .release = debug_mem_release, - .read = seq_read, - .write = debug_mem_write, - .llseek = seq_lseek -}; +static const struct file_operations kbase_debug_mem_view_fops = { .owner = THIS_MODULE, + .open = debug_mem_open, + .release = debug_mem_release, + .read = seq_read, + .write = debug_mem_write, + .llseek = seq_lseek }; void kbase_debug_mem_view_init(struct kbase_context *const kctx) { /* Caller already ensures this, but we keep the pattern for * maintenance safety. */ - if (WARN_ON(!kctx) || - WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) return; /* Default column width is 4 */ kctx->mem_view_column_width = sizeof(u32); - debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx, - &kbase_debug_mem_view_fops); + debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx, &kbase_debug_mem_view_fops); } #endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.c index 1f8db32aa16f..01267b599d89 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,30 +47,31 @@ static int debug_mem_zones_show(struct seq_file *sfile, void *data) { struct kbase_context *const kctx = sfile->private; - size_t i; + struct kbase_reg_zone *reg_zone; + enum kbase_memory_zone zone_idx; - const char *zone_names[KBASE_REG_ZONE_MAX] = { - "SAME_VA", - "CUSTOM_VA", - "EXEC_VA" -#if MALI_USE_CSF - , - "MCU_SHARED_VA", - "EXEC_FIXED_VA", - "FIXED_VA" -#endif - }; + CSTD_UNUSED(data); kbase_gpu_vm_lock(kctx); - for (i = 0; i < KBASE_REG_ZONE_MAX; i++) { - struct kbase_reg_zone *reg_zone = &kctx->reg_zone[i]; + for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) { + reg_zone = &kctx->reg_zone[zone_idx]; if (reg_zone->base_pfn) { - seq_printf(sfile, "%15s %zu 0x%.16llx 0x%.16llx\n", zone_names[i], i, - reg_zone->base_pfn, reg_zone->va_size_pages); + seq_printf(sfile, "%15s %u 0x%.16llx 0x%.16llx\n", + kbase_reg_zone_get_name(zone_idx), zone_idx, reg_zone->base_pfn, + reg_zone->va_size_pages); } } +#if MALI_USE_CSF + reg_zone = &kctx->kbdev->csf.mcu_shared_zone; + + if (reg_zone && reg_zone->base_pfn) { + seq_printf(sfile, "%15s %u 0x%.16llx 0x%.16llx\n", + kbase_reg_zone_get_name(MCU_SHARED_ZONE), MCU_SHARED_ZONE, + reg_zone->base_pfn, reg_zone->va_size_pages); + } +#endif kbase_gpu_vm_unlock(kctx); return 0; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c index c846491e78fb..69e715caf7cc 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c @@ -45,9 +45,8 @@ * * Return: 0 if success, negative error code otherwise. */ -static int -set_attr_from_string(char *const buf, void *const array, size_t const nelems, - kbase_debugfs_helper_set_attr_fn * const set_attr_fn) +static int set_attr_from_string(char *const buf, void *const array, size_t const nelems, + kbase_debugfs_helper_set_attr_fn *const set_attr_fn) { size_t index, err = 0; char *ptr = buf; @@ -140,9 +139,9 @@ int kbase_debugfs_string_validator(char *const buf) return err; } -int kbase_debugfs_helper_set_attr_from_string( - const char *const buf, void *const array, size_t const nelems, - kbase_debugfs_helper_set_attr_fn * const set_attr_fn) +int kbase_debugfs_helper_set_attr_from_string(const char *const buf, void *const array, + size_t const nelems, + kbase_debugfs_helper_set_attr_fn *const set_attr_fn) { char *const wbuf = kstrdup(buf, GFP_KERNEL); int err = 0; @@ -157,17 +156,15 @@ int kbase_debugfs_helper_set_attr_from_string( return err; } - err = set_attr_from_string(wbuf, array, nelems, - set_attr_fn); + err = set_attr_from_string(wbuf, array, nelems, set_attr_fn); kfree(wbuf); return err; } -ssize_t kbase_debugfs_helper_get_attr_to_string( - char *const buf, size_t const size, void *const array, - size_t const nelems, - kbase_debugfs_helper_get_attr_fn * const get_attr_fn) +ssize_t kbase_debugfs_helper_get_attr_to_string(char *const buf, size_t const size, + void *const array, size_t const nelems, + kbase_debugfs_helper_get_attr_fn *const get_attr_fn) { ssize_t total = 0; size_t index; @@ -175,20 +172,19 @@ ssize_t kbase_debugfs_helper_get_attr_to_string( for (index = 0; index < nelems; ++index) { const char *postfix = " "; - if (index == (nelems-1)) + if (index == (nelems - 1)) postfix = "\n"; - total += scnprintf(buf + total, size - total, "%zu%s", - get_attr_fn(array, index), postfix); + total += scnprintf(buf + total, size - total, "%zu%s", get_attr_fn(array, index), + postfix); } return total; } -int kbase_debugfs_helper_seq_write( - struct file *const file, const char __user *const ubuf, - size_t const count, size_t const nelems, - kbase_debugfs_helper_set_attr_fn * const set_attr_fn) +int kbase_debugfs_helper_seq_write(struct file *const file, const char __user *const ubuf, + size_t const count, size_t const nelems, + kbase_debugfs_helper_set_attr_fn *const set_attr_fn) { const struct seq_file *const sfile = file->private_data; void *const array = sfile->private; @@ -219,16 +215,14 @@ int kbase_debugfs_helper_seq_write( return err; } - err = set_attr_from_string(buf, - array, nelems, set_attr_fn); + err = set_attr_from_string(buf, array, nelems, set_attr_fn); kfree(buf); return err; } -int kbase_debugfs_helper_seq_read( - struct seq_file * const sfile, size_t const nelems, - kbase_debugfs_helper_get_attr_fn * const get_attr_fn) +int kbase_debugfs_helper_seq_read(struct seq_file *const sfile, size_t const nelems, + kbase_debugfs_helper_get_attr_fn *const get_attr_fn) { void *const array = sfile->private; size_t index; @@ -239,7 +233,7 @@ int kbase_debugfs_helper_seq_read( for (index = 0; index < nelems; ++index) { const char *postfix = " "; - if (index == (nelems-1)) + if (index == (nelems - 1)) postfix = "\n"; seq_printf(sfile, "%zu%s", get_attr_fn(array, index), postfix); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h index cbb24d6e0acf..c8c6c5c0d32f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,8 +30,7 @@ * @index: An element index. The valid range depends on the use-case. * @value: Attribute value to be set. */ -typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index, - size_t value); +typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index, size_t value); /** * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an @@ -53,9 +52,8 @@ typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index, * * Return: 0 if success, negative error code otherwise. */ -int kbase_debugfs_helper_set_attr_from_string( - const char *buf, void *array, size_t nelems, - kbase_debugfs_helper_set_attr_fn *set_attr_fn); +int kbase_debugfs_helper_set_attr_from_string(const char *buf, void *array, size_t nelems, + kbase_debugfs_helper_set_attr_fn *set_attr_fn); /** * kbase_debugfs_string_validator - Validate a string to be written to a @@ -108,9 +106,8 @@ typedef size_t kbase_debugfs_helper_get_attr_fn(void *array, size_t index); * * Return: Number of characters written excluding the nul terminator. */ -ssize_t kbase_debugfs_helper_get_attr_to_string( - char *buf, size_t size, void *array, size_t nelems, - kbase_debugfs_helper_get_attr_fn *get_attr_fn); +ssize_t kbase_debugfs_helper_get_attr_to_string(char *buf, size_t size, void *array, size_t nelems, + kbase_debugfs_helper_get_attr_fn *get_attr_fn); /** * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an @@ -130,9 +127,8 @@ ssize_t kbase_debugfs_helper_get_attr_to_string( * * Return: 0 if success, negative error code otherwise. */ -int kbase_debugfs_helper_seq_read( - struct seq_file *sfile, size_t nelems, - kbase_debugfs_helper_get_attr_fn *get_attr_fn); +int kbase_debugfs_helper_seq_read(struct seq_file *sfile, size_t nelems, + kbase_debugfs_helper_get_attr_fn *get_attr_fn); /** * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an @@ -153,10 +149,7 @@ int kbase_debugfs_helper_seq_read( * * Return: 0 if success, negative error code otherwise. */ -int kbase_debugfs_helper_seq_write(struct file *file, - const char __user *ubuf, size_t count, - size_t nelems, - kbase_debugfs_helper_set_attr_fn *set_attr_fn); - -#endif /*_KBASE_DEBUGFS_HELPER_H_ */ +int kbase_debugfs_helper_seq_write(struct file *file, const char __user *ubuf, size_t count, + size_t nelems, kbase_debugfs_helper_set_attr_fn *set_attr_fn); +#endif /*_KBASE_DEBUGFS_HELPER_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h index 809e73000c4d..fd98bea1d183 100755 --- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -51,10 +52,9 @@ #include #include #include +#include -#include "mali_kbase_fence_defs.h" - #if IS_ENABLED(CONFIG_DEBUG_FS) #include #endif /* CONFIG_DEBUG_FS */ @@ -79,7 +79,7 @@ #include "debug/mali_kbase_debug_ktrace_defs.h" /** Number of milliseconds before we time out on a GPU soft/hard reset */ -#define RESET_TIMEOUT 500 +#define RESET_TIMEOUT 500 /** * BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware. @@ -87,7 +87,7 @@ * You can optimize this down if your target devices will only ever support a * small number of job slots. */ -#define BASE_JM_MAX_NR_SLOTS 3 +#define BASE_JM_MAX_NR_SLOTS 3 /** * BASE_MAX_NR_AS - The maximum number of Address Spaces to support in the Hardware. @@ -95,19 +95,19 @@ * You can optimize this down if your target devices will only ever support a * small number of Address Spaces */ -#define BASE_MAX_NR_AS 16 +#define BASE_MAX_NR_AS 16 /* mmu */ #define MIDGARD_MMU_LEVEL(x) (x) -#define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) +#define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) #define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3) #define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) /** setting in kbase_context::as_nr that indicates it's invalid */ -#define KBASEP_AS_NR_INVALID (-1) +#define KBASEP_AS_NR_INVALID (-1) /** * KBASE_LOCK_REGION_MAX_SIZE_LOG2 - Maximum size in bytes of a MMU lock region, @@ -115,15 +115,6 @@ */ #define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /* 256 TB */ -/** - * KBASE_REG_ZONE_MAX - Maximum number of GPU memory region zones - */ -#if MALI_USE_CSF -#define KBASE_REG_ZONE_MAX 6ul -#else -#define KBASE_REG_ZONE_MAX 4ul -#endif - #include "mali_kbase_hwaccess_defs.h" /* Maximum number of pages of memory that require a permanent mapping, per @@ -164,6 +155,60 @@ struct kbase_as; struct kbase_mmu_setup; struct kbase_kinstr_jm; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +/** + * struct kbase_gpu_metrics - Object containing members that are used to emit + * GPU metrics tracepoints for all applications that + * created Kbase context(s) for a GPU. + * + * @active_list: List of applications that did some GPU activity in the recent work period. + * @inactive_list: List of applications that didn't do any GPU activity in the recent work period. + */ +struct kbase_gpu_metrics { + struct list_head active_list; + struct list_head inactive_list; +}; + +/** + * struct kbase_gpu_metrics_ctx - Object created for every application, that created + * Kbase context(s), containing members that are used + * to emit GPU metrics tracepoints for the application. + * + * @link: Links the object in kbase_device::gpu_metrics::active_list + * or kbase_device::gpu_metrics::inactive_list. + * @first_active_start_time: Records the time at which the application first became + * active in the current work period. + * @last_active_start_time: Records the time at which the application last became + * active in the current work period. + * @last_active_end_time: Records the time at which the application last became + * inactive in the current work period. + * @total_active: Tracks the time for which application has been active + * in the current work period. + * @prev_wp_active_end_time: Records the time at which the application last became + * inactive in the previous work period. + * @aid: Unique identifier for an application. + * @kctx_count: Counter to keep a track of the number of Kbase contexts + * created for an application. There may be multiple Kbase + * contexts contributing GPU activity data to a single GPU + * metrics context. + * @active_cnt: Counter that is updated every time the GPU activity starts + * and ends in the current work period for an application. + * @flags: Flags to track the state of GPU metrics context. + */ +struct kbase_gpu_metrics_ctx { + struct list_head link; + u64 first_active_start_time; + u64 last_active_start_time; + u64 last_active_end_time; + u64 total_active; + u64 prev_wp_active_end_time; + unsigned int aid; + unsigned int kctx_count; + u8 active_cnt; + u8 flags; +}; +#endif + /** * struct kbase_io_access - holds information about 1 register access * @@ -212,12 +257,12 @@ struct kbase_debug_copy_buffer { size_t size; struct page **pages; bool is_vmalloc; - int nr_pages; + unsigned int nr_pages; size_t offset; struct kbase_mem_phy_alloc *gpu_alloc; struct page **extres_pages; - int nr_extres_pages; + unsigned int nr_extres_pages; }; struct kbase_device_info { @@ -225,9 +270,9 @@ struct kbase_device_info { }; struct kbase_mmu_setup { - u64 transtab; - u64 memattr; - u64 transcfg; + u64 transtab; + u64 memattr; + u64 transcfg; }; /** @@ -298,7 +343,7 @@ struct kbase_mmu_table { u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)]; } teardown_pages; /** - * @free_pgds: Scratch memory user for insertion, update and teardown + * @free_pgds: Scratch memory used for insertion, update and teardown * operations to store a temporary list of PGDs to be freed * at the end of the operation. */ @@ -311,21 +356,6 @@ struct kbase_mmu_table { } scratch_mem; }; -/** - * struct kbase_reg_zone - Information about GPU memory region zones - * @base_pfn: Page Frame Number in GPU virtual address space for the start of - * the Zone - * @va_size_pages: Size of the Zone in pages - * - * Track information about a zone KBASE_REG_ZONE() and related macros. - * In future, this could also store the &rb_root that are currently in - * &kbase_context and &kbase_csf_device. - */ -struct kbase_reg_zone { - u64 base_pfn; - u64 va_size_pages; -}; - #if MALI_USE_CSF #include "csf/mali_kbase_csf_defs.h" #else @@ -334,14 +364,12 @@ struct kbase_reg_zone { #include "mali_kbase_hwaccess_time.h" -static inline int kbase_as_has_bus_fault(struct kbase_as *as, - struct kbase_fault *fault) +static inline int kbase_as_has_bus_fault(struct kbase_as *as, struct kbase_fault *fault) { return (fault == &as->bf_data); } -static inline int kbase_as_has_page_fault(struct kbase_as *as, - struct kbase_fault *fault) +static inline int kbase_as_has_page_fault(struct kbase_as *as, struct kbase_fault *fault) { return (fault == &as->pf_data); } @@ -374,9 +402,8 @@ struct kbase_clk_rate_listener; * sleep. No clock rate manager functions must be called from here, as * its lock is taken. */ -typedef void -kbase_clk_rate_listener_on_change_t(struct kbase_clk_rate_listener *listener, - u32 clk_index, u32 clk_rate_hz); +typedef void kbase_clk_rate_listener_on_change_t(struct kbase_clk_rate_listener *listener, + u32 clk_index, u32 clk_rate_hz); /** * struct kbase_clk_rate_listener - Clock frequency listener @@ -420,7 +447,15 @@ struct kbase_clk_rate_trace_manager { * Note that some code paths keep shaders/the tiler * powered whilst this is 0. * Use kbase_pm_is_active() instead to check for such cases. - * @suspending: Flag indicating suspending/suspended + * @suspending: Flag set to true when System suspend of GPU device begins and + * set to false only when System resume of GPU device starts. + * So GPU device could be in suspended state while the flag is set. + * The flag is updated with @lock held. + * @resuming: Flag set to true when System resume of GPU device starts and is set + * to false when resume ends. The flag is set to true at the same time + * when @suspending is set to false with @lock held. + * The flag is currently used only to prevent Kbase context termination + * during System resume of GPU device. * @runtime_active: Flag to track if the GPU is in runtime suspended or active * state. This ensures that runtime_put and runtime_get * functions are called in pairs. For example if runtime_get @@ -431,7 +466,7 @@ struct kbase_clk_rate_trace_manager { * This structure contains data for the power management framework. * There is one instance of this structure per device in the system. * @zero_active_count_wait: Wait queue set when active_count == 0 - * @resume_wait: system resume of GPU device. + * @resume_wait: Wait queue to wait for the System suspend/resume of GPU device. * @debug_core_mask: Bit masks identifying the available shader cores that are * specified via sysfs. One mask per job slot. * @debug_core_mask_all: Bit masks identifying the available shader cores that @@ -452,6 +487,7 @@ struct kbase_pm_device_data { struct mutex lock; int active_count; bool suspending; + bool resuming; #if MALI_USE_CSF bool runtime_active; #endif @@ -608,28 +644,23 @@ struct kbase_devfreq_opp { * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. */ struct kbase_mmu_mode { - void (*update)(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - int as_nr); - void (*get_as_setup)(struct kbase_mmu_table *mmut, - struct kbase_mmu_setup * const setup); + void (*update)(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr); + void (*get_as_setup)(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup); void (*disable_as)(struct kbase_device *kbdev, int as_nr); phys_addr_t (*pte_to_phy_addr)(u64 entry); int (*ate_is_valid)(u64 ate, int level); int (*pte_is_valid)(u64 pte, int level); - void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, - unsigned long flags, int level); + void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, unsigned long flags, int level); void (*entry_set_pte)(u64 *entry, phys_addr_t phy); void (*entries_invalidate)(u64 *entry, u32 count); unsigned int (*get_num_valid_entries)(u64 *pgd); - void (*set_num_valid_entries)(u64 *pgd, - unsigned int num_of_valid_entries); + void (*set_num_valid_entries)(u64 *pgd, unsigned int num_of_valid_entries); unsigned long flags; }; struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); -#define DEVNAME_SIZE 16 +#define DEVNAME_SIZE 16 /** * enum kbase_devfreq_work_type - The type of work to perform in the devfreq @@ -638,11 +669,7 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * @DEVFREQ_WORK_SUSPEND: Call devfreq_suspend_device(). * @DEVFREQ_WORK_RESUME: Call devfreq_resume_device(). */ -enum kbase_devfreq_work_type { - DEVFREQ_WORK_NONE, - DEVFREQ_WORK_SUSPEND, - DEVFREQ_WORK_RESUME -}; +enum kbase_devfreq_work_type { DEVFREQ_WORK_NONE, DEVFREQ_WORK_SUSPEND, DEVFREQ_WORK_RESUME }; /** * struct kbase_devfreq_queue_info - Object representing an instance for managing @@ -738,6 +765,13 @@ struct kbase_mem_migrate { * @reg_size: Size of the region containing GPU registers * @reg: Kernel virtual address of the region containing GPU * registers, using which Driver will access the registers. + * @regmap: Top level structure for hw_access regmaps, containing + * the size of the regmap, pointers to Look-Up Tables (LUT). + * @regmap.regs: Pointer to regmap LUT of precomputed iomem pointers from + * GPU register offsets and @reg. + * @regmap.flags: Pointer to regmap LUT of flags detailing each register + * access type and width (32/64-bit). + * @regmap.size: Size of the regs and flags LUT. * @irqs: Array containing IRQ resource info for 3 types of * interrupts : Job scheduling, MMU & GPU events (like * power management, cache etc.) @@ -797,10 +831,14 @@ struct kbase_mem_migrate { * to the GPU device. This points to an internal memory * group manager if no platform-specific memory group * manager was retrieved through device tree. + * @mmu_unresponsive: Flag to indicate MMU is not responding. + * Set if a MMU command isn't completed within + * &kbase_device:mmu_or_gpu_cache_op_wait_time_ms. + * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. * @as: Array of objects representing address spaces of GPU. - * @as_free: Bitpattern of free/available GPU address spaces. * @as_to_kctx: Array of pointers to struct kbase_context, having * GPU adrress spaces assigned to them. + * @as_free: Bitpattern of free/available GPU address spaces. * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask * register used in the handling of Bus & Page faults. * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are @@ -838,7 +876,6 @@ struct kbase_mem_migrate { * @hwaccess_lock must be held when calling * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. - * @vinstr_ctx: vinstr context created per device. * @kinstr_prfcnt_ctx: kinstr_prfcnt context created per device. * @timeline_flags: Bitmask defining which sets of timeline tracepoints * are enabled. If zero, there is no timeline client and @@ -1009,7 +1046,7 @@ struct kbase_mem_migrate { * @l2_size_override: Used to set L2 cache size via device tree blob * @l2_hash_override: Used to set L2 cache hash via device tree blob * @l2_hash_values_override: true if @l2_hash_values is valid. - * @l2_hash_values: Used to set L2 asn_hash via device tree blob + * @l2_hash_values: Used to set L2 cache slice hash via device tree blob * @sysc_alloc: Array containing values to be programmed into * SYSC_ALLOC[0..7] GPU registers on L2 cache * power down. These come from either DTB or @@ -1030,7 +1067,7 @@ struct kbase_mem_migrate { * allocations. * @dummy_job_wa: struct for dummy job execution workaround for the * GPU hang issue - * @dummy_job_wa.ctx: dummy job workaround context + * @dummy_job_wa.kctx: dummy job workaround context * @dummy_job_wa.jc: dummy job workaround job * @dummy_job_wa.slot: dummy job workaround slot * @dummy_job_wa.flags: dummy job workaround flags @@ -1045,9 +1082,11 @@ struct kbase_mem_migrate { * KCPU queue. These structures may outlive kbase module * itself. Therefore, in such a case, a warning should be * be produced. - * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of - * a MMU operation + * @mmu_or_gpu_cache_op_wait_time_ms: Maximum waiting time in ms for the completion of + * a cache operation via MMU_AS_CONTROL or GPU_CONTROL. * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures. + * @fence_signal_timeout_enabled: Global flag for whether fence signal timeout tracking + * is enabled. */ struct kbase_device { u32 hw_quirks_sc; @@ -1061,7 +1100,11 @@ struct kbase_device { u64 reg_start; size_t reg_size; void __iomem *reg; - + struct { + void __iomem **regs; + u32 *flags; + size_t size; + } regmap; struct { int irq; int flags; @@ -1079,7 +1122,7 @@ struct kbase_device { #endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ #endif /* CONFIG_REGULATOR */ char devname[DEVNAME_SIZE]; - u32 id; + u32 id; #if !IS_ENABLED(CONFIG_MALI_REAL_HW) void *model; @@ -1098,9 +1141,10 @@ struct kbase_device { struct memory_group_manager_device *mgm_dev; + bool mmu_unresponsive; struct kbase_as as[BASE_MAX_NR_AS]; - u16 as_free; struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; + u16 as_free; spinlock_t mmu_mask_change; @@ -1146,10 +1190,9 @@ struct kbase_device { struct kbase_hwcnt_context *hwcnt_gpu_ctx; struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; - struct kbase_vinstr_context *vinstr_ctx; struct kbase_kinstr_prfcnt_context *kinstr_prfcnt_ctx; - atomic_t timeline_flags; + atomic_t timeline_flags; struct kbase_timeline *timeline; #if KBASE_KTRACE_TARGET_RBUF @@ -1159,9 +1202,7 @@ struct kbase_device { u64 lowest_gpu_freq_khz; -#if MALI_USE_CSF struct kbase_backend_time backend_time; -#endif bool cache_clean_in_progress; u32 cache_clean_queued; @@ -1169,8 +1210,8 @@ struct kbase_device { void *platform_context; - struct list_head kctx_list; - struct mutex kctx_list_lock; + struct list_head kctx_list; + struct mutex kctx_list_lock; struct rockchip_opp_info opp_info; bool is_runtime_resumed; @@ -1183,7 +1224,7 @@ struct kbase_device { unsigned long current_voltages[BASE_MAX_NR_CLOCKS_REGULATORS]; u64 current_core_mask; struct kbase_devfreq_opp *devfreq_table; - int num_opps; + unsigned int num_opps; struct kbasep_pm_metrics last_devfreq_metrics; struct ipa_power_model_data *model_data; struct kbase_devfreq_queue_info devfreq_queue; @@ -1297,9 +1338,9 @@ struct kbase_device { u8 l2_size_override; u8 l2_hash_override; bool l2_hash_values_override; - u32 l2_hash_values[ASN_HASH_COUNT]; + u32 l2_hash_values[GPU_L2_SLICE_HASH_COUNT]; - u32 sysc_alloc[SYSC_ALLOC_COUNT]; + u32 sysc_alloc[GPU_SYSC_ALLOC_COUNT]; struct mutex fw_load_lock; #if MALI_USE_CSF @@ -1328,7 +1369,7 @@ struct kbase_device { spinlock_t gpu_mem_usage_lock; struct { - struct kbase_context *ctx; + struct kbase_context *kctx; u64 jc; int slot; u64 flags; @@ -1336,7 +1377,7 @@ struct kbase_device { bool dummy_job_wa_loaded; #ifdef CONFIG_MALI_ARBITER_SUPPORT - struct kbase_arbiter_device arb; + struct kbase_arbiter_device arb; #endif /* Priority Control Manager device */ struct priority_control_manager_device *pcm_dev; @@ -1361,8 +1402,18 @@ struct kbase_device { #if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) atomic_t live_fence_metadata; #endif - u32 mmu_as_inactive_wait_time_ms; + u32 mmu_or_gpu_cache_op_wait_time_ms; struct kmem_cache *va_region_slab; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @gpu_metrics: GPU device wide structure used for emitting GPU metrics tracepoints. + */ + struct kbase_gpu_metrics gpu_metrics; +#endif +#if MALI_USE_CSF + atomic_t fence_signal_timeout_enabled; +#endif }; /** @@ -1379,6 +1430,9 @@ struct kbase_device { * @KBASE_FILE_COMPLETE: Indicates if the setup for context has * completed, i.e. flags have been set for the * context. + * @KBASE_FILE_DESTROY_CTX: Indicates that destroying of context has begun or + * is complete. This state can only be reached after + * @KBASE_FILE_COMPLETE. * * The driver allows only limited interaction with user-space until setup * is complete. @@ -1388,7 +1442,8 @@ enum kbase_file_state { KBASE_FILE_VSN_IN_PROGRESS, KBASE_FILE_NEED_CTX, KBASE_FILE_CTX_IN_PROGRESS, - KBASE_FILE_COMPLETE + KBASE_FILE_COMPLETE, + KBASE_FILE_DESTROY_CTX }; /** @@ -1398,6 +1453,12 @@ enum kbase_file_state { * allocated from the probe method of the Mali driver. * @filp: Pointer to the struct file corresponding to device file * /dev/malixx instance, passed to the file's open method. + * @owner: Pointer to the file table structure of a process that + * created the instance of /dev/malixx device file. Set to + * NULL when that process closes the file instance. No more + * file operations would be allowed once set to NULL. + * It would be updated only in the Userspace context, i.e. + * when @kbase_open or @kbase_flush is called. * @kctx: Object representing an entity, among which GPU is * scheduled and which gets its own GPU address space. * Invalid until @setup_state is KBASE_FILE_COMPLETE. @@ -1406,13 +1467,44 @@ enum kbase_file_state { * @setup_state is KBASE_FILE_NEED_CTX. * @setup_state: Initialization state of the file. Values come from * the kbase_file_state enumeration. + * @destroy_kctx_work: Work item for destroying the @kctx, enqueued only when + * @fops_count and @map_count becomes zero after /dev/malixx + * file was previously closed by the @owner. + * @lock: Lock to serialize the access to members like @owner, @fops_count, + * @map_count. + * @fops_count: Counter that is incremented at the beginning of a method + * defined for @kbase_fops and is decremented at the end. + * So the counter keeps a track of the file operations in progress + * for /dev/malixx file, that are being handled by the Kbase. + * The counter is needed to defer the context termination as + * Userspace can close the /dev/malixx file and flush() method + * can get called when some other file operation is in progress. + * @map_count: Counter to keep a track of the memory mappings present on + * /dev/malixx file instance. The counter is needed to defer the + * context termination as Userspace can close the /dev/malixx + * file and flush() method can get called when mappings are still + * present. + * @zero_fops_count_wait: Waitqueue used to wait for the @fops_count to become 0. + * Currently needed only for the "mem_view" debugfs file. + * @event_queue: Wait queue used for blocking the thread, which consumes + * the base_jd_event corresponding to an atom, when there + * are no more posted events. */ struct kbase_file { - struct kbase_device *kbdev; - struct file *filp; + struct kbase_device *kbdev; + struct file *filp; + fl_owner_t owner; struct kbase_context *kctx; - unsigned long api_version; - atomic_t setup_state; + unsigned long api_version; + atomic_t setup_state; + struct work_struct destroy_kctx_work; + spinlock_t lock; + int fops_count; + int map_count; +#if IS_ENABLED(CONFIG_DEBUG_FS) + wait_queue_head_t zero_fops_count_wait; +#endif + wait_queue_head_t event_queue; }; #if MALI_JIT_PRESSURE_LIMIT_BASE /** @@ -1582,8 +1674,8 @@ struct kbase_sub_alloc { /** * struct kbase_context - Kernel base context * - * @filp: Pointer to the struct file corresponding to device file - * /dev/malixx instance, passed to the file's open method. + * @kfile: Pointer to the object representing the /dev/malixx device + * file instance. * @kbdev: Pointer to the Kbase device for which the context is created. * @kctx_list_link: Node into Kbase device list of contexts. * @mmu: Structure holding details of the MMU tables for this @@ -1620,22 +1712,6 @@ struct kbase_sub_alloc { * for the allocations >= 2 MB in size. * @reg_lock: Lock used for GPU virtual address space management operations, * like adding/freeing a memory region in the address space. - * Can be converted to a rwlock ?. - * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA - * zone of the GPU virtual address space. Used for allocations - * having the same value for GPU & CPU virtual address. - * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA - * zone of the GPU virtual address space. - * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA - * zone of the GPU virtual address space. Used for GPU-executable - * allocations which don't need the SAME_VA property. - * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the - * EXEC_FIXED_VA zone of the GPU virtual address space. Used for - * GPU-executable allocations with FIXED/FIXABLE GPU virtual - * addresses. - * @reg_rbtree_fixed: RB tree of the memory regions allocated from the FIXED_VA zone - * of the GPU virtual address space. Used for allocations with - * FIXED/FIXABLE GPU virtual addresses. * @num_fixable_allocs: A count for the number of memory allocations with the * BASE_MEM_FIXABLE property. * @num_fixed_allocs: A count for the number of memory allocations with the @@ -1652,9 +1728,6 @@ struct kbase_sub_alloc { * used in conjunction with @cookies bitmask mainly for * providing a mechansim to have the same value for CPU & * GPU virtual address. - * @event_queue: Wait queue used for blocking the thread, which consumes - * the base_jd_event corresponding to an atom, when there - * are no more posted events. * @tgid: Thread group ID of the process whose thread created * the context (by calling KBASE_IOCTL_VERSION_CHECK or * KBASE_IOCTL_SET_FLAGS, depending on the @api_version). @@ -1747,6 +1820,8 @@ struct kbase_sub_alloc { * atom but before the debug data for faulty atom was dumped. * @mem_view_column_width: Controls the number of bytes shown in every column of the * output of "mem_view" debugfs file. + * @job_fault_work: Tracking the latest fault dump work item for assisting the + * operation of the job-fault-dump debug process. * @jsctx_queue: Per slot & priority arrays of object containing the root * of RB-tree holding currently runnable atoms on the job slot * and the head item of the linked list of atoms blocked on @@ -1848,6 +1923,7 @@ struct kbase_sub_alloc { * that created the Kbase context. It would be set only for the * contexts created by the Userspace and not for the contexts * created internally by the Kbase. + * @comm: Record the process name * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. @@ -1856,7 +1932,7 @@ struct kbase_sub_alloc { * is made on the device file. */ struct kbase_context { - struct file *filp; + struct kbase_file *kfile; struct kbase_device *kbdev; struct list_head kctx_list_link; struct kbase_mmu_table mmu; @@ -1877,28 +1953,21 @@ struct kbase_context { struct tagged_addr aliasing_sink_page; - spinlock_t mem_partials_lock; - struct list_head mem_partials; + spinlock_t mem_partials_lock; + struct list_head mem_partials; - struct mutex reg_lock; - - struct rb_root reg_rbtree_same; - struct rb_root reg_rbtree_custom; - struct rb_root reg_rbtree_exec; + struct mutex reg_lock; #if MALI_USE_CSF - struct rb_root reg_rbtree_exec_fixed; - struct rb_root reg_rbtree_fixed; atomic64_t num_fixable_allocs; atomic64_t num_fixed_allocs; #endif - struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX]; + struct kbase_reg_zone reg_zone[CONTEXT_ZONE_MAX]; #if MALI_USE_CSF struct kbase_csf_context csf; #else struct kbase_jd_context jctx; - struct jsctx_queue jsctx_queue - [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; + struct jsctx_queue jsctx_queue[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; struct kbase_jsctx_slot_tracking slot_tracking[BASE_JM_MAX_NR_SLOTS]; atomic_t atoms_pulled_all_slots; @@ -1915,7 +1984,6 @@ struct kbase_context { DECLARE_BITMAP(cookies, BITS_PER_LONG); struct kbase_va_region *pending_regions[BITS_PER_LONG]; - wait_queue_head_t event_queue; pid_t tgid; pid_t pid; atomic_t used_pages; @@ -1924,8 +1992,8 @@ struct kbase_context { struct kbase_mem_pool_group mem_pools; - struct shrinker reclaim; - struct list_head evict_list; + struct shrinker reclaim; + struct list_head evict_list; atomic_t evict_nents; struct list_head waiting_soft_jobs; @@ -1955,6 +2023,7 @@ struct kbase_context { struct list_head job_fault_resume_event_list; unsigned int mem_view_column_width; + struct work_struct *job_fault_work; #endif /* CONFIG_DEBUG_FS */ struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT]; u8 jit_max_allocations; @@ -2000,6 +2069,16 @@ struct kbase_context { #endif struct task_struct *task; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @gpu_metrics_ctx: Pointer to the GPU metrics context corresponding to the + * application that created the Kbase context. + */ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; +#endif + + char comm[TASK_COMM_LEN]; }; #ifdef CONFIG_MALI_CINSTR_GWT @@ -2048,15 +2127,12 @@ struct kbase_ctx_ext_res_meta { u32 ref; }; -enum kbase_reg_access_type { - REG_READ, - REG_WRITE -}; +enum kbase_reg_access_type { REG_READ, REG_WRITE }; enum kbase_share_attr_bits { /* (1ULL << 8) bit is reserved */ - SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ - SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ + SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ + SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ }; /** @@ -2068,7 +2144,7 @@ enum kbase_share_attr_bits { static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) { if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || - (kbdev->system_coherency == COHERENCY_ACE)) + (kbdev->system_coherency == COHERENCY_ACE)) return true; return false; @@ -2085,21 +2161,20 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) */ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props) { - if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >= - GPU_ID2_MODEL_MAKE(12, 0)) + if (gpu_props->gpu_id.product_model >= GPU_ID_MODEL_MAKE(12, 0)) return 12; /* 4 kB */ return 15; /* 32 kB */ } /* Conversion helpers for setting up high resolution timers */ -#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) +#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x)) * 1000000U)) #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) /* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ -#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 +#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ -#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 +#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 /* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */ -#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000 +#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000 #endif /* _KBASE_DEFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c b/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c index 7d6e475585ae..dc2df465f3b2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,8 +29,7 @@ void kbase_disjoint_init(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev != NULL); - atomic_set(&kbdev->disjoint_event.count, 0); - atomic_set(&kbdev->disjoint_event.state, 0); + /* disjoiont_event is already zero-initialized by kzalloc. */ } /* increment the disjoint event count */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c index 0e0dab912145..4b322e62f2de 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -55,7 +55,7 @@ struct wa_blob { static bool in_range(const u8 *base, const u8 *end, off_t off, size_t sz) { - return !(end - base - off < sz); + return !((size_t)(end - base - off) < sz); } static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) @@ -65,7 +65,7 @@ static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) u32 val; for (loop = 0; loop < timeout; loop++) { - val = kbase_reg_read(kbdev, offset); + val = kbase_reg_read32(kbdev, offset); if (val & bits) break; udelay(10); @@ -74,25 +74,28 @@ static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) if (loop == timeout) { dev_err(kbdev->dev, "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", - (unsigned long)offset, (unsigned long)bits, - (unsigned long)val); + (unsigned long)offset, (unsigned long)bits, (unsigned long)val); } return (val & bits); } -static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set) +static int wait(struct kbase_device *kbdev, off_t offset, u64 bits, bool set) { int loop; const int timeout = 100; - u32 val; - u32 target = 0; + u64 val; + u64 target = 0; if (set) target = bits; for (loop = 0; loop < timeout; loop++) { - val = kbase_reg_read(kbdev, (offset)); + if (kbase_reg_is_size64(kbdev, offset)) + val = kbase_reg_read64(kbdev, offset); + else + val = kbase_reg_read32(kbdev, offset); + if ((val & bits) == target) break; @@ -101,48 +104,37 @@ static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set) if (loop == timeout) { dev_err(kbdev->dev, - "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", - (unsigned long)offset, (unsigned long)bits, - (unsigned long)val); + "Timeout reading register 0x%lx, bits 0x%llx, last read was 0x%llx\n", + (unsigned long)offset, bits, val); return -ETIMEDOUT; } return 0; } -static inline int run_job(struct kbase_device *kbdev, int as, int slot, - u64 cores, u64 jc) +static inline int run_job(struct kbase_device *kbdev, int as, int slot, u64 cores, u64 jc) { u32 done; /* setup job */ - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_LO), - jc & U32_MAX); - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_HI), - jc >> 32); - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_LO), - cores & U32_MAX); - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_HI), - cores >> 32); - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_CONFIG_NEXT), - JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as); + kbase_reg_write64(kbdev, JOB_SLOT_OFFSET(slot, HEAD_NEXT), jc); + kbase_reg_write64(kbdev, JOB_SLOT_OFFSET(slot, AFFINITY_NEXT), cores); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(slot, CONFIG_NEXT), + JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as); /* go */ - kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_COMMAND_NEXT), - JS_COMMAND_START); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(slot, COMMAND_NEXT), JS_COMMAND_START); /* wait for the slot to finish (done, error) */ - done = wait_any(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), - (1ul << (16+slot)) | (1ul << slot)); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done); + done = wait_any(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT), + (1ul << (16 + slot)) | (1ul << slot)); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), done); if (done != (1ul << slot)) { - dev_err(kbdev->dev, - "Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n", - slot, (unsigned long long)cores, - (unsigned long)done); + dev_err(kbdev->dev, "Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n", + slot, (unsigned long long)cores, (unsigned long)done); dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n", - kbase_reg_read(kbdev, JOB_SLOT_REG(slot, JS_STATUS))); + kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(slot, STATUS))); return -EFAULT; } else { @@ -164,32 +156,29 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) if (!kbdev) return -EFAULT; - if (!kbdev->dummy_job_wa.ctx) + if (!kbdev->dummy_job_wa.kctx) return -EFAULT; - as = kbdev->dummy_job_wa.ctx->as_nr; + as = kbdev->dummy_job_wa.kctx->as_nr; slot = kbdev->dummy_job_wa.slot; jc = kbdev->dummy_job_wa.jc; /* mask off all but MMU IRQs */ - old_gpu_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - old_job_mask = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); + old_gpu_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); + old_job_mask = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK)); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), 0); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK), 0); /* power up requested cores */ - kbase_reg_write(kbdev, SHADER_PWRON_LO, (cores & U32_MAX)); - kbase_reg_write(kbdev, SHADER_PWRON_HI, (cores >> 32)); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(SHADER_PWRON), cores); if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) { /* wait for power-ups */ - wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true); - if (cores >> 32) - wait(kbdev, SHADER_READY_HI, (cores >> 32), true); + wait(kbdev, GPU_CONTROL_ENUM(SHADER_READY), cores, true); } if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) { - int i; + size_t i; /* do for each requested core */ for (i = 0; i < sizeof(cores) * 8; i++) { @@ -211,45 +200,40 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) runs++; } - if (kbdev->dummy_job_wa.flags & - KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { + if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { /* power off shader cores (to reduce any dynamic leakage) */ - kbase_reg_write(kbdev, SHADER_PWROFF_LO, (cores & U32_MAX)); - kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32)); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(SHADER_PWROFF), cores); /* wait for power off complete */ - wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false); - wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false); - if (cores >> 32) { - wait(kbdev, SHADER_READY_HI, (cores >> 32), false); - wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false); - } - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX); + wait(kbdev, GPU_CONTROL_ENUM(SHADER_READY), cores, false); + wait(kbdev, GPU_CONTROL_ENUM(SHADER_PWRTRANS), cores, false); + + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), U32_MAX); } /* restore IRQ masks */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), old_gpu_mask); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), old_job_mask); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), old_gpu_mask); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK), old_job_mask); if (failed) - dev_err(kbdev->dev, - "WA complete with %d failures out of %d runs\n", failed, - runs); + dev_err(kbdev->dev, "WA complete with %d failures out of %d runs\n", failed, runs); return failed ? -EFAULT : 0; } -static ssize_t dummy_job_wa_info_show(struct device * const dev, - struct device_attribute * const attr, char * const buf) +static ssize_t dummy_job_wa_info_show(struct device *const dev, struct device_attribute *const attr, + char *const buf) { struct kbase_device *const kbdev = dev_get_drvdata(dev); int err; - if (!kbdev || !kbdev->dummy_job_wa.ctx) + CSTD_UNUSED(attr); + + if (!kbdev || !kbdev->dummy_job_wa.kctx) return -ENODEV; - err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n", - kbdev->dummy_job_wa.slot, kbdev->dummy_job_wa.flags); + err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n", kbdev->dummy_job_wa.slot, + kbdev->dummy_job_wa.flags); return err; } @@ -295,14 +279,13 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) err = request_firmware(&firmware, wa_name, kbdev->dev); if (err) { - dev_err(kbdev->dev, "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, " - "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed"); + dev_err(kbdev->dev, + "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, " + "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed"); return -ENODEV; } - kctx = kbase_create_context(kbdev, true, - BASE_CONTEXT_CREATE_FLAG_NONE, 0, - NULL); + kctx = kbase_create_context(kbdev, true, BASE_CONTEXT_CREATE_FLAG_NONE, 0, NULL); if (!kctx) { dev_err(kbdev->dev, "Failed to create WA context\n"); @@ -312,8 +295,7 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) fw = firmware->data; fw_end = fw + firmware->size; - dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n", - firmware->size); + dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n", firmware->size); if (!in_range(fw, fw_end, 0, sizeof(*header))) { dev_err(kbdev->dev, "WA too small\n"); @@ -380,8 +362,8 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) nr_pages = PFN_UP(blob->size); flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED; - va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, - &gpu_va, mmu_sync_info); + va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &gpu_va, + mmu_sync_info); if (!va_region) { dev_err(kbdev->dev, "Failed to allocate for blob\n"); @@ -393,18 +375,15 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) /* copy the payload, */ payload = fw + blob->payload_offset; - dst = kbase_vmap(kctx, - va_region->start_pfn << PAGE_SHIFT, + dst = kbase_vmap(kctx, va_region->start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT, &vmap); if (dst) { memcpy(dst, payload, blob->size); kbase_vunmap(kctx, &vmap); } else { - dev_err(kbdev->dev, - "Failed to copy payload\n"); + dev_err(kbdev->dev, "Failed to copy payload\n"); } - } blob_offset = blob->blob_offset; /* follow chain */ } @@ -413,10 +392,9 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) kbasep_js_schedule_privileged_ctx(kbdev, kctx); - kbdev->dummy_job_wa.ctx = kctx; + kbdev->dummy_job_wa.kctx = kctx; - err = sysfs_create_file(&kbdev->dev->kobj, - &dev_attr_dummy_job_wa_info.attr); + err = sysfs_create_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); if (err) dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n"); @@ -431,7 +409,7 @@ no_ctx: void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) { - struct kbase_context *wa_ctx; + struct kbase_context *wa_kctx; /* return if the dummy job has not been loaded */ if (kbdev->dummy_job_wa_loaded == false) @@ -440,13 +418,13 @@ void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) /* Can be safely called even if the file wasn't created on probe */ sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); - wa_ctx = READ_ONCE(kbdev->dummy_job_wa.ctx); - WRITE_ONCE(kbdev->dummy_job_wa.ctx, NULL); + wa_kctx = READ_ONCE(kbdev->dummy_job_wa.kctx); + WRITE_ONCE(kbdev->dummy_job_wa.kctx, NULL); /* make this write visible before we tear down the ctx */ smp_mb(); - if (wa_ctx) { - kbasep_js_release_privileged_ctx(kbdev, wa_ctx); - kbase_destroy_context(wa_ctx); + if (wa_kctx) { + kbasep_js_release_privileged_ctx(kbdev, wa_kctx); + kbase_destroy_context(wa_kctx); } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h index 8713ba1eaa6f..b46197aa6df3 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,9 +26,9 @@ #define KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP (1ull << 1) #define KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER (1ull << 2) -#define KBASE_DUMMY_JOB_WA_FLAGS (KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE | \ - KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \ - KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) +#define KBASE_DUMMY_JOB_WA_FLAGS \ + (KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE | KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \ + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) #if MALI_USE_CSF @@ -43,8 +43,7 @@ static inline void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) CSTD_UNUSED(kbdev); } -static inline int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, - u64 cores) +static inline int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) { CSTD_UNUSED(kbdev); CSTD_UNUSED(cores); @@ -65,7 +64,7 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores); static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) { - return (kbdev->dummy_job_wa.ctx != NULL); + return (kbdev->dummy_job_wa.kctx != NULL); } #endif /* MALI_USE_CSF */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c index e4cb71632aee..e912178b3324 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,6 +37,7 @@ static int kbasep_dvfs_utilization_debugfs_show(struct seq_file *file, void *dat { struct kbase_device *kbdev = file->private; + CSTD_UNUSED(data); #if MALI_USE_CSF seq_printf(file, "busy_time: %u idle_time: %u protm_time: %u\n", kbdev->pm.backend.metrics.values.time_busy, @@ -51,11 +52,9 @@ static int kbasep_dvfs_utilization_debugfs_show(struct seq_file *file, void *dat return 0; } -static int kbasep_dvfs_utilization_debugfs_open(struct inode *in, - struct file *file) +static int kbasep_dvfs_utilization_debugfs_open(struct inode *in, struct file *file) { - return single_open(file, kbasep_dvfs_utilization_debugfs_show, - in->i_private); + return single_open(file, kbasep_dvfs_utilization_debugfs_show, in->i_private); } static const struct file_operations kbasep_dvfs_utilization_debugfs_fops = { @@ -73,13 +72,11 @@ void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev) if (WARN_ON(!kbdev || IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) return; - file = debugfs_create_file("dvfs_utilization", mode, - kbdev->mali_debugfs_directory, kbdev, + file = debugfs_create_file("dvfs_utilization", mode, kbdev->mali_debugfs_directory, kbdev, &kbasep_dvfs_utilization_debugfs_fops); if (IS_ERR_OR_NULL(file)) { - dev_warn(kbdev->dev, - "Unable to create dvfs debugfs entry"); + dev_warn(kbdev->dev, "Unable to create dvfs debugfs entry"); } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_event.c b/drivers/gpu/arm/bifrost/mali_kbase_event.c index 910c51170ae8..3c59b20913f2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_event.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_event.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,8 @@ #include #include -static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) +static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, + struct kbase_jd_atom *katom) { struct base_jd_udata data; struct kbase_device *kbdev; @@ -48,48 +49,48 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru return data; } -int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent) +int kbase_event_dequeue(struct kbase_context *kctx, struct base_jd_event_v2 *uevent) { struct kbase_jd_atom *atom; - KBASE_DEBUG_ASSERT(ctx); + KBASE_DEBUG_ASSERT(kctx); - mutex_lock(&ctx->event_mutex); + mutex_lock(&kctx->event_mutex); - if (list_empty(&ctx->event_list)) { - if (!atomic_read(&ctx->event_closed)) { - mutex_unlock(&ctx->event_mutex); + if (list_empty(&kctx->event_list)) { + if (!atomic_read(&kctx->event_closed)) { + mutex_unlock(&kctx->event_mutex); return -1; } /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */ - mutex_unlock(&ctx->event_mutex); + mutex_unlock(&kctx->event_mutex); uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED; memset(&uevent->udata, 0, sizeof(uevent->udata)); - dev_dbg(ctx->kbdev->dev, - "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", - BASE_JD_EVENT_DRV_TERMINATED); + dev_dbg(kctx->kbdev->dev, + "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", + BASE_JD_EVENT_DRV_TERMINATED); return 0; } /* normal event processing */ - atomic_dec(&ctx->event_count); - atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); - list_del(ctx->event_list.next); + atomic_dec(&kctx->event_count); + atom = list_entry(kctx->event_list.next, struct kbase_jd_atom, dep_item[0]); + list_del(kctx->event_list.next); - mutex_unlock(&ctx->event_mutex); + mutex_unlock(&kctx->event_mutex); - dev_dbg(ctx->kbdev->dev, "event dequeuing %pK\n", (void *)atom); + dev_dbg(kctx->kbdev->dev, "event dequeuing %pK\n", (void *)atom); uevent->event_code = atom->event_code; - uevent->atom_number = (atom - ctx->jctx.atoms); + uevent->atom_number = (atom - kctx->jctx.atoms); if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_free_external_resources(atom); - mutex_lock(&ctx->jctx.lock); - uevent->udata = kbase_event_process(ctx, atom); - mutex_unlock(&ctx->jctx.lock); + mutex_lock(&kctx->jctx.lock); + uevent->udata = kbase_event_process(kctx, atom); + mutex_unlock(&kctx->jctx.lock); return 0; } @@ -104,8 +105,7 @@ KBASE_EXPORT_TEST_API(kbase_event_dequeue); */ static void kbase_event_process_noreport_worker(struct work_struct *data) { - struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, - work); + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); struct kbase_context *kctx = katom->kctx; if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) @@ -125,8 +125,7 @@ static void kbase_event_process_noreport_worker(struct work_struct *data) * Atoms that do have external resources will be processed on a workqueue, in * order to avoid locking issues. */ -static void kbase_event_process_noreport(struct kbase_context *kctx, - struct kbase_jd_atom *katom) +static void kbase_event_process_noreport(struct kbase_context *kctx, struct kbase_jd_atom *katom) { if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { INIT_WORK(&katom->work, kbase_event_process_noreport_worker); @@ -160,59 +159,54 @@ static int kbase_event_coalesce(struct kbase_context *kctx) return event_count; } -void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) +void kbase_event_post(struct kbase_context *kctx, struct kbase_jd_atom *atom) { - struct kbase_device *kbdev = ctx->kbdev; + struct kbase_device *kbdev = kctx->kbdev; dev_dbg(kbdev->dev, "Posting event for atom %pK\n", (void *)atom); if (WARN_ON(atom->status != KBASE_JD_ATOM_STATE_COMPLETED)) { - dev_warn(kbdev->dev, - "%s: Atom %d (%pK) not completed (status %d)\n", - __func__, - kbase_jd_atom_id(atom->kctx, atom), - atom->kctx, - atom->status); + dev_warn(kbdev->dev, "%s: Atom %d (%pK) not completed (status %d)\n", __func__, + kbase_jd_atom_id(atom->kctx, atom), atom->kctx, atom->status); return; } if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { if (atom->event_code == BASE_JD_EVENT_DONE) { dev_dbg(kbdev->dev, "Suppressing event (atom done)\n"); - kbase_event_process_noreport(ctx, atom); + kbase_event_process_noreport(kctx, atom); return; } } if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { dev_dbg(kbdev->dev, "Suppressing event (never)\n"); - kbase_event_process_noreport(ctx, atom); + kbase_event_process_noreport(kctx, atom); return; } KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED); if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { /* Don't report the event until other event(s) have completed */ dev_dbg(kbdev->dev, "Deferring event (coalesced)\n"); - mutex_lock(&ctx->event_mutex); - list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); - ++ctx->event_coalesce_count; - mutex_unlock(&ctx->event_mutex); + mutex_lock(&kctx->event_mutex); + list_add_tail(&atom->dep_item[0], &kctx->event_coalesce_list); + ++kctx->event_coalesce_count; + mutex_unlock(&kctx->event_mutex); } else { /* Report the event and any pending events now */ int event_count = 1; - mutex_lock(&ctx->event_mutex); - event_count += kbase_event_coalesce(ctx); - list_add_tail(&atom->dep_item[0], &ctx->event_list); - atomic_add(event_count, &ctx->event_count); - mutex_unlock(&ctx->event_mutex); + mutex_lock(&kctx->event_mutex); + event_count += kbase_event_coalesce(kctx); + list_add_tail(&atom->dep_item[0], &kctx->event_list); + atomic_add(event_count, &kctx->event_count); + mutex_unlock(&kctx->event_mutex); dev_dbg(kbdev->dev, "Reporting %d events\n", event_count); - kbase_event_wakeup(ctx); + kbase_event_wakeup(kctx); /* Post-completion latency */ - trace_sysgraph(SGR_POST, ctx->id, - kbase_jd_atom_id(ctx, atom)); + trace_sysgraph(SGR_POST, kctx->id, kbase_jd_atom_id(kctx, atom)); } } KBASE_EXPORT_TEST_API(kbase_event_post); @@ -232,7 +226,6 @@ int kbase_event_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->event_list); INIT_LIST_HEAD(&kctx->event_coalesce_list); mutex_init(&kctx->event_mutex); - kctx->event_coalesce_count = 0; kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); if (kctx->event_workq == NULL) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.c b/drivers/gpu/arm/bifrost/mali_kbase_fence.c index b16b27659e61..023bc6715224 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,11 +29,9 @@ static DEFINE_SPINLOCK(kbase_fence_lock); #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -struct fence * -kbase_fence_out_new(struct kbase_jd_atom *katom) +struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom) #else -struct dma_fence * -kbase_fence_out_new(struct kbase_jd_atom *katom) +struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom) #endif { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) @@ -48,14 +46,10 @@ kbase_fence_out_new(struct kbase_jd_atom *katom) if (!fence) return NULL; - dma_fence_init(fence, - &kbase_fence_ops, - &kbase_fence_lock, - katom->dma_fence.context, + dma_fence_init(fence, &kbase_fence_ops, &kbase_fence_lock, katom->dma_fence.context, atomic_inc_return(&katom->dma_fence.seqno)); katom->dma_fence.fence = fence; return fence; } - diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h index f4507ac4309b..f170e95b39d9 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h @@ -30,9 +30,9 @@ #if IS_ENABLED(CONFIG_SYNC_FILE) #include -#include "mali_kbase_fence_defs.h" #include "mali_kbase.h" #include "mali_kbase_refcount_defs.h" +#include #if MALI_USE_CSF /* Maximum number of characters in DMA fence timeline name. */ @@ -99,14 +99,13 @@ struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); * * This function will take ownership of one fence reference! */ -#define kbase_fence_fence_in_set(katom, fence) \ - do { \ +#define kbase_fence_fence_in_set(katom, fence) \ + do { \ WARN_ON((katom)->dma_fence.fence_in); \ - (katom)->dma_fence.fence_in = fence; \ + (katom)->dma_fence.fence_in = fence; \ } while (0) #endif - #if !MALI_USE_CSF /** * kbase_fence_out_remove() - Removes the output fence from atom @@ -146,8 +145,7 @@ static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom) */ static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) { - return katom->dma_fence.fence && - katom->dma_fence.fence->ops == &kbase_fence_ops; + return katom->dma_fence.fence && katom->dma_fence.fence->ops == &kbase_fence_ops; } /** @@ -157,19 +155,10 @@ static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) * * Return: 0 on success, < 0 on error */ -static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, - int status) +static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, int status) { - if (status) { -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ - KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) - fence_set_error(katom->dma_fence.fence, status); -#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) - dma_fence_set_error(katom->dma_fence.fence, status); -#else - katom->dma_fence.fence->status = status; -#endif - } + if (status) + dma_fence_set_error_helper(katom->dma_fence.fence, status); return dma_fence_signal(katom->dma_fence.fence); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h deleted file mode 100644 index 7a150bdf2947..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2010-2018, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_FENCE_DEFS_H_ -#define _KBASE_FENCE_DEFS_H_ - -/* - * There was a big rename in the 4.10 kernel (fence* -> dma_fence*) - * This file hides the compatibility issues with this for the rest the driver - */ - -#include - -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - -#include - -#define dma_fence_context_alloc(a) fence_context_alloc(a) -#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) -#define dma_fence_get(a) fence_get(a) -#define dma_fence_put(a) fence_put(a) -#define dma_fence_signal(a) fence_signal(a) -#define dma_fence_is_signaled(a) fence_is_signaled(a) -#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) -#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) - -#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) -#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) -#else -#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) -#endif - -#else - -#include - -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) -#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ - (a)->status ?: 1 \ - : 0) -#endif - -#endif /* < 4.10.0 */ - -#endif /* _KBASE_FENCE_DEFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c index 25b4c9c03b53..7315da8b89f9 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,88 +24,72 @@ #include #include -static const char * -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -kbase_fence_get_driver_name(struct fence *fence) -#else -kbase_fence_get_driver_name(struct dma_fence *fence) -#endif +static const char *kbase_fence_get_driver_name(struct dma_fence *fence) { - return kbase_drv_name; + CSTD_UNUSED(fence); + + return KBASE_DRV_NAME; } -static const char * -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -kbase_fence_get_timeline_name(struct fence *fence) -#else -kbase_fence_get_timeline_name(struct dma_fence *fence) -#endif -{ #if MALI_USE_CSF +static const char *kbase_fence_get_timeline_name(struct dma_fence *fence) +{ struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; return kcpu_fence->metadata->timeline_name; -#else - return kbase_timeline_name; -#endif /* MALI_USE_CSF */ } - -static bool -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -kbase_fence_enable_signaling(struct fence *fence) #else -kbase_fence_enable_signaling(struct dma_fence *fence) -#endif +static const char *kbase_fence_get_timeline_name(struct dma_fence *fence) { + CSTD_UNUSED(fence); + + return KBASE_TIMELINE_NAME; +} +#endif /* MALI_USE_CSF */ + +static bool kbase_fence_enable_signaling(struct dma_fence *fence) +{ + CSTD_UNUSED(fence); + return true; } -static void -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -kbase_fence_fence_value_str(struct fence *fence, char *str, int size) -#else -kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) -#endif +static void kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) { -#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) - const char *format = "%u"; -#else - const char *format = "%llu"; -#endif + char *format; + + if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) + format = "%u"; + else + format = "%llu"; + if (unlikely(!scnprintf(str, size, format, fence->seqno))) pr_err("Fail to encode fence seqno to string"); } #if MALI_USE_CSF -static void -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -kbase_fence_release(struct fence *fence) -#else -kbase_fence_release(struct dma_fence *fence) -#endif +static void kbase_fence_release(struct dma_fence *fence) { struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); kfree(kcpu_fence); } -#endif -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -extern const struct fence_ops kbase_fence_ops; /* silence checker warning */ -const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait, +extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */ +const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait, + .get_driver_name = kbase_fence_get_driver_name, + .get_timeline_name = kbase_fence_get_timeline_name, + .enable_signaling = kbase_fence_enable_signaling, + .fence_value_str = kbase_fence_fence_value_str, + .release = kbase_fence_release }; #else extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */ const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait, -#endif - .get_driver_name = kbase_fence_get_driver_name, - .get_timeline_name = kbase_fence_get_timeline_name, - .enable_signaling = kbase_fence_enable_signaling, -#if MALI_USE_CSF - .fence_value_str = kbase_fence_fence_value_str, - .release = kbase_fence_release -#else - .fence_value_str = kbase_fence_fence_value_str -#endif -}; + .get_driver_name = kbase_fence_get_driver_name, + .get_timeline_name = kbase_fence_get_timeline_name, + .enable_signaling = kbase_fence_enable_signaling, + .fence_value_str = kbase_fence_fence_value_str }; +#endif /* MALI_USE_CSF */ + KBASE_EXPORT_TEST_API(kbase_fence_ops); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator.h b/drivers/gpu/arm/bifrost/mali_kbase_gator.h index dd7df874640a..cbbfd536642d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gator.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gator.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,8 +33,8 @@ #include #define GATOR_JOB_SLOT_START 1 -#define GATOR_JOB_SLOT_STOP 2 -#define GATOR_JOB_SLOT_SOFT_STOPPED 3 +#define GATOR_JOB_SLOT_STOP 2 +#define GATOR_JOB_SLOT_SOFT_STOPPED 3 #ifdef CONFIG_MALI_BIFROST_GATOR_SUPPORT @@ -42,11 +42,12 @@ struct kbase_context; -void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id); +void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, + u8 atom_id); void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value); void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value); void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long event); #endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ -#endif /* _KBASE_GATOR_H_ */ +#endif /* _KBASE_GATOR_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c index bf5f259a0bb6..65c60198bcd4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,6 +41,8 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) struct list_head *entry; const struct list_head *kbdev_list; + CSTD_UNUSED(data); + kbdev_list = kbase_device_get_list(); list_for_each(entry, kbdev_list) { struct kbase_device *kbdev = NULL; @@ -49,9 +51,8 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) kbdev = list_entry(entry, struct kbase_device, entry); /* output the total memory usage and cap for this device */ seq_printf(sfile, " \n"); - seq_printf(sfile, "%-16s %10u\n", - kbdev->devname, - atomic_read(&(kbdev->memdev.used_pages))); + seq_printf(sfile, "%-16s %10u\n", kbdev->devname, + atomic_read(&(kbdev->memdev.used_pages))); mutex_lock(&kbdev->kctx_list_lock); seq_printf(sfile, " \n"); list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { @@ -84,6 +85,8 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) */ static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) { + CSTD_UNUSED(in); + return single_open(file, kbasep_gpu_memory_seq_show, NULL); } @@ -100,13 +103,14 @@ static const struct file_operations kbasep_gpu_memory_debugfs_fops = { */ void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) { - debugfs_create_file("gpu_memory", 0444, - kbdev->mali_debugfs_directory, NULL, - &kbasep_gpu_memory_debugfs_fops); + debugfs_create_file("gpu_memory", 0444, kbdev->mali_debugfs_directory, NULL, + &kbasep_gpu_memory_debugfs_fops); } #else /* * Stub functions for when debugfs is disabled */ -void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) {} +void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) +{ +} #endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h index 6d5423f379d4..6ad773658554 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2014, 2016, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,8 +37,8 @@ * @value: The value that is either read from or written to the register * @write: 1 if it's a register write, 0 if it's a read */ -void kbase_io_history_add(struct kbase_io_history *h, void __iomem const *addr, - u32 value, u8 write); +void kbase_io_history_add(struct kbase_io_history *h, void __iomem const *addr, u32 value, + u8 write); /** * kbasep_gpu_memory_debugfs_init - Initialize gpu_memory debugfs entry @@ -47,4 +47,4 @@ void kbase_io_history_add(struct kbase_io_history *h, void __iomem const *addr, */ void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); -#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ +#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c new file mode 100644 index 000000000000..7b33f86644ea --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include "mali_power_gpu_work_period_trace.h" +#include +#include + +/** + * enum gpu_metrics_ctx_flags - Flags for the GPU metrics context + * + * @ACTIVE_INTERVAL_IN_WP: Flag set when the application first becomes active in + * the current work period. + * + * @INSIDE_ACTIVE_LIST: Flag to track if object is in kbase_device::gpu_metrics::active_list + * + * All members need to be separate bits. This enum is intended for use in a + * bitmask where multiple values get OR-ed together. + */ +enum gpu_metrics_ctx_flags { + ACTIVE_INTERVAL_IN_WP = 1 << 0, + INSIDE_ACTIVE_LIST = 1 << 1, +}; + +static unsigned long gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS; + +module_param(gpu_metrics_tp_emit_interval_ns, ulong, 0444); +MODULE_PARM_DESC(gpu_metrics_tp_emit_interval_ns, + "Time interval in nano seconds at which GPU metrics tracepoints are emitted"); + +static inline bool gpu_metrics_ctx_flag(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, + enum gpu_metrics_ctx_flags flag) +{ + return (gpu_metrics_ctx->flags & flag); +} + +static inline void gpu_metrics_ctx_flag_set(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, + enum gpu_metrics_ctx_flags flag) +{ + gpu_metrics_ctx->flags |= flag; +} + +static inline void gpu_metrics_ctx_flag_clear(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, + enum gpu_metrics_ctx_flags flag) +{ + gpu_metrics_ctx->flags &= ~flag; +} + +static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, + u64 start_time, u64 end_time, u64 total_active) +{ +#ifdef CONFIG_MALI_BIFROST_DEBUG + WARN(total_active > NSEC_PER_SEC, "total_active %llu > 1 second for aid %u active_cnt %u", + total_active, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); + + WARN(start_time >= end_time, "start_time %llu >= end_time %llu for aid %u active_cnt %u", + start_time, end_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); + + WARN(total_active > (end_time - start_time), + "total_active %llu > end_time %llu - start_time %llu for aid %u active_cnt %u", + total_active, end_time, start_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); + + WARN(gpu_metrics_ctx->prev_wp_active_end_time > start_time, + "prev_wp_active_end_time %llu > start_time %llu for aid %u active_cnt %u", + gpu_metrics_ctx->prev_wp_active_end_time, start_time, gpu_metrics_ctx->aid, + gpu_metrics_ctx->active_cnt); +#endif +} + +static void emit_tracepoint_for_active_gpu_metrics_ctx( + struct kbase_device *kbdev, struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 current_time) +{ + const u64 start_time = gpu_metrics_ctx->first_active_start_time; + u64 total_active = gpu_metrics_ctx->total_active; + u64 end_time; + + /* Check if the GPU activity is currently ongoing */ + if (gpu_metrics_ctx->active_cnt) { + /* The following check is to handle the race on CSF GPUs that can happen between + * the draining of trace buffer and FW emitting the ACT=1 event . + */ + if (unlikely(current_time == gpu_metrics_ctx->last_active_start_time)) + current_time++; + end_time = current_time; + total_active += end_time - gpu_metrics_ctx->last_active_start_time; + + gpu_metrics_ctx->first_active_start_time = current_time; + gpu_metrics_ctx->last_active_start_time = current_time; + } else { + end_time = gpu_metrics_ctx->last_active_end_time; + gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP); + } + + trace_gpu_work_period(kbdev->id, gpu_metrics_ctx->aid, start_time, end_time, total_active); + + validate_tracepoint_data(gpu_metrics_ctx, start_time, end_time, total_active); + gpu_metrics_ctx->prev_wp_active_end_time = end_time; + gpu_metrics_ctx->total_active = 0; +} + +void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev, + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx) +{ + WARN_ON(list_empty(&gpu_metrics_ctx->link)); + WARN_ON(!gpu_metrics_ctx->kctx_count); + + gpu_metrics_ctx->kctx_count--; + if (gpu_metrics_ctx->kctx_count) + return; + + if (gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) + emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, + ktime_get_raw_ns()); + + list_del_init(&gpu_metrics_ctx->link); + kfree(gpu_metrics_ctx); +} + +struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbdev, u32 aid) +{ + struct kbase_gpu_metrics *gpu_metrics = &kbdev->gpu_metrics; + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; + + list_for_each_entry(gpu_metrics_ctx, &gpu_metrics->active_list, link) { + if (gpu_metrics_ctx->aid == aid) { + WARN_ON(!gpu_metrics_ctx->kctx_count); + gpu_metrics_ctx->kctx_count++; + return gpu_metrics_ctx; + } + } + + list_for_each_entry(gpu_metrics_ctx, &gpu_metrics->inactive_list, link) { + if (gpu_metrics_ctx->aid == aid) { + WARN_ON(!gpu_metrics_ctx->kctx_count); + gpu_metrics_ctx->kctx_count++; + return gpu_metrics_ctx; + } + } + + return NULL; +} + +void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev, + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, unsigned int aid) +{ + gpu_metrics_ctx->aid = aid; + gpu_metrics_ctx->total_active = 0; + gpu_metrics_ctx->kctx_count = 1; + gpu_metrics_ctx->active_cnt = 0; + gpu_metrics_ctx->prev_wp_active_end_time = 0; + gpu_metrics_ctx->flags = 0; + list_add_tail(&gpu_metrics_ctx->link, &kbdev->gpu_metrics.inactive_list); +} + +void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timestamp_ns) +{ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx; + + gpu_metrics_ctx->active_cnt++; + if (gpu_metrics_ctx->active_cnt == 1) + gpu_metrics_ctx->last_active_start_time = timestamp_ns; + + if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) { + gpu_metrics_ctx->first_active_start_time = timestamp_ns; + gpu_metrics_ctx_flag_set(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP); + } + + if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)) { + list_move_tail(&gpu_metrics_ctx->link, &kctx->kbdev->gpu_metrics.active_list); + gpu_metrics_ctx_flag_set(gpu_metrics_ctx, INSIDE_ACTIVE_LIST); + } +} + +void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestamp_ns) +{ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx; + + if (WARN_ON_ONCE(!gpu_metrics_ctx->active_cnt)) + return; + + if (--gpu_metrics_ctx->active_cnt) + return; + + if (likely(timestamp_ns > gpu_metrics_ctx->last_active_start_time)) { + gpu_metrics_ctx->last_active_end_time = timestamp_ns; + gpu_metrics_ctx->total_active += + timestamp_ns - gpu_metrics_ctx->last_active_start_time; + return; + } + + /* Due to conversion from system timestamp to CPU timestamp (which involves rounding) + * the value for start and end timestamp could come as same on CSF GPUs. + */ + if (timestamp_ns == gpu_metrics_ctx->last_active_start_time) { + gpu_metrics_ctx->last_active_end_time = timestamp_ns + 1; + gpu_metrics_ctx->total_active += 1; + return; + } + + /* The following check is to detect the situation on CSF GPUs, where 'ACT=0' event was not + * visible to the Kbase even though the system timestamp value sampled by FW was less than + * the system timestamp value sampled by Kbase just before the draining of trace buffer. + */ + if (gpu_metrics_ctx->last_active_start_time == gpu_metrics_ctx->first_active_start_time && + gpu_metrics_ctx->prev_wp_active_end_time == gpu_metrics_ctx->first_active_start_time) { + WARN_ON_ONCE(gpu_metrics_ctx->total_active); + gpu_metrics_ctx->last_active_end_time = + gpu_metrics_ctx->prev_wp_active_end_time + 1; + gpu_metrics_ctx->total_active = 1; + return; + } + + WARN_ON_ONCE(1); +} + +void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts) +{ + struct kbase_gpu_metrics *gpu_metrics = &kbdev->gpu_metrics; + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, *tmp; + + list_for_each_entry_safe(gpu_metrics_ctx, tmp, &gpu_metrics->active_list, link) { + if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) { + WARN_ON(!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)); + WARN_ON(gpu_metrics_ctx->active_cnt); + list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list); + gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, INSIDE_ACTIVE_LIST); + continue; + } + + emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts); + } +} + +int kbase_gpu_metrics_init(struct kbase_device *kbdev) +{ + INIT_LIST_HEAD(&kbdev->gpu_metrics.active_list); + INIT_LIST_HEAD(&kbdev->gpu_metrics.inactive_list); + + if (!gpu_metrics_tp_emit_interval_ns || (gpu_metrics_tp_emit_interval_ns >= NSEC_PER_SEC)) { + dev_warn( + kbdev->dev, + "Invalid value (%lu ns) for module param gpu_metrics_tp_emit_interval_ns. Using default value: %u ns", + gpu_metrics_tp_emit_interval_ns, DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS); + gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS; + } + + dev_info(kbdev->dev, "GPU metrics tracepoint support enabled"); + return 0; +} + +void kbase_gpu_metrics_term(struct kbase_device *kbdev) +{ + WARN_ON_ONCE(!list_empty(&kbdev->gpu_metrics.active_list)); + WARN_ON_ONCE(!list_empty(&kbdev->gpu_metrics.inactive_list)); +} + +unsigned long kbase_gpu_metrics_get_tp_emit_interval(void) +{ + return gpu_metrics_tp_emit_interval_ns; +} + +#endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h new file mode 100644 index 000000000000..c89e25996f52 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: GPU metrics frontend APIs + */ + +#ifndef _KBASE_GPU_METRICS_H_ +#define _KBASE_GPU_METRICS_H_ + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include + +/** + * kbase_gpu_metrics_get_tp_emit_interval() - Return the trace point emission interval. + * + * Return: The time interval in nanosecond for GPU metrics trace point emission. + */ +unsigned long kbase_gpu_metrics_get_tp_emit_interval(void); + +/** + * kbase_gpu_metrics_ctx_put() - Decrement the Kbase context count for the GPU metrics + * context and free it if the count becomes 0. + * + * @kbdev: Pointer to the GPU device. + * @gpu_metrics_ctx: Pointer to the GPU metrics context. + * + * This function must be called when a Kbase context is destroyed. + * The function would decrement the Kbase context count for the GPU metrics context and + * free the memory if the count becomes 0. + * The function would emit a power/gpu_work_period tracepoint for the GPU metrics context + * if there was some GPU activity done for it since the last tracepoint was emitted. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + */ +void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev, + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx); + +/** + * kbase_gpu_metrics_ctx_get() - Increment the Kbase context count for the GPU metrics + * context if it exists. + * + * @kbdev: Pointer to the GPU device. + * @aid: Unique identifier of the Application that is creating the Kbase context. + * + * This function must be called when a Kbase context is created. + * The function would increment the Kbase context count for the GPU metrics context, + * corresponding to the @aid, if it exists. + * + * Return: Pointer to the GPU metrics context corresponding to the @aid if it already + * exists otherwise NULL. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + * The caller shall allocate memory for GPU metrics context structure if the + * function returns NULL. + */ +struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbdev, u32 aid); + +/** + * kbase_gpu_metrics_ctx_init() - Initialise the GPU metrics context + * + * @kbdev: Pointer to the GPU device. + * @gpu_metrics_ctx: Pointer to the GPU metrics context. + * @aid: Unique identifier of the Application for which GPU metrics + * context needs to be initialized. + * + * This function must be called when a Kbase context is created, after the call to + * kbase_gpu_metrics_ctx_get() returned NULL and memory for the GPU metrics context + * structure was allocated. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + */ +void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev, + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u32 aid); + +/** + * kbase_gpu_metrics_ctx_start_activity() - Report the start of some GPU activity + * for GPU metrics context. + * + * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context. + * @timestamp_ns: CPU timestamp at which the GPU activity started. + * + * The provided timestamp would be later used as the "start_time_ns" for the + * power/gpu_work_period tracepoint if this is the first GPU activity for the GPU + * metrics context in the current work period. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + */ +void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timestamp_ns); + +/** + * kbase_gpu_metrics_ctx_end_activity() - Report the end of some GPU activity + * for GPU metrics context. + * + * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context. + * @timestamp_ns: CPU timestamp at which the GPU activity ended. + * + * The provided timestamp would be later used as the "end_time_ns" for the + * power/gpu_work_period tracepoint if this is the last GPU activity for the GPU + * metrics context in the current work period. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + */ +void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestamp_ns); + +/** + * kbase_gpu_metrics_emit_tracepoint() - Emit power/gpu_work_period tracepoint + * for active GPU metrics contexts. + * + * @kbdev: Pointer to the GPU device. + * @ts: Timestamp at which the tracepoint is being emitted. + * + * This function would loop through all the active GPU metrics contexts and emit a + * power/gpu_work_period tracepoint for them. + * The GPU metrics context that is found to be inactive since the last tracepoint + * was emitted would be moved to the inactive list. + * The current work period would be considered as over and a new work period would + * begin whenever any application does the GPU activity. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + */ +void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts); + +/** + * kbase_gpu_metrics_init() - Initialise a gpu_metrics instance for a GPU + * + * @kbdev: Pointer to the GPU device. + * + * This function is called once for each @kbdev. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_gpu_metrics_init(struct kbase_device *kbdev); + +/** + * kbase_gpu_metrics_term() - Terminate a gpu_metrics instance + * + * @kbdev: Pointer to the GPU device. + */ +void kbase_gpu_metrics_term(struct kbase_device *kbdev); + +#endif +#endif /* _KBASE_GPU_METRICS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c index 7a7d17ea5f26..190800394292 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,9 +24,12 @@ */ #include -#include +#include + +#include #include #include + #include #include #include @@ -34,87 +37,19 @@ #include #include +#define PRIV_DATA_REGDUMP(kbdev) \ + (((struct kbasep_gpuprops_priv_data *)((kbdev)->gpu_props.priv_data))->regdump) -static void kbase_gpuprops_construct_coherent_groups( - struct base_gpu_props * const props) -{ - struct mali_base_gpu_coherent_group *current_group; - u64 group_present; - u64 group_mask; - u64 first_set, first_set_prev; - u32 num_groups = 0; +/* Default values when registers are not supported by the implemented hardware */ +#define THREAD_MT_DEFAULT 256 +#define THREAD_MWS_DEFAULT 256 +#define THREAD_MBS_DEFAULT 256 +#define THREAD_MR_DEFAULT 1024 +#define THREAD_MTQ_DEFAULT 4 +#define THREAD_MTGS_DEFAULT 10 - KBASE_DEBUG_ASSERT(props != NULL); - - props->coherency_info.coherency = props->raw_props.mem_features; - props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); - - if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { - /* Group is l2 coherent */ - group_present = props->raw_props.l2_present; - } else { - /* Group is l1 coherent */ - group_present = props->raw_props.shader_present; - } - - /* - * The coherent group mask can be computed from the l2 present - * register. - * - * For the coherent group n: - * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1) - * where first_set is group_present with only its nth set-bit kept - * (i.e. the position from where a new group starts). - * - * For instance if the groups are l2 coherent and l2_present=0x0..01111: - * The first mask is: - * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1) - * = (0x0..010 - 1) & ~(0x0..01 - 1) - * = 0x0..00f - * The second mask is: - * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1) - * = (0x0..100 - 1) & ~(0x0..010 - 1) - * = 0x0..0f0 - * And so on until all the bits from group_present have been cleared - * (i.e. there is no group left). - */ - - current_group = props->coherency_info.group; - first_set = group_present & ~(group_present - 1); - - while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) { - group_present -= first_set; /* Clear the current group bit */ - first_set_prev = first_set; - - first_set = group_present & ~(group_present - 1); - group_mask = (first_set - 1) & ~(first_set_prev - 1); - - /* Populate the coherent_group structure for each group */ - current_group->core_mask = group_mask & props->raw_props.shader_present; - current_group->num_cores = hweight64(current_group->core_mask); - - num_groups++; - current_group++; - } - - if (group_present != 0) - pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS); - - props->coherency_info.num_groups = num_groups; -} - -/** - * kbase_gpuprops_get_curr_config_props - Get the current allocated resources - * @kbdev: The &struct kbase_device structure for the device - * @curr_config: The &struct curr_config_props structure to receive the result - * - * Fill the &struct curr_config_props structure with values from the GPU - * configuration registers. - * - * Return: Zero on success, Linux error code on failure - */ int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, - struct curr_config_props * const curr_config) + struct curr_config_props *const curr_config) { struct kbase_current_config_regdump curr_config_regdump; int err; @@ -127,21 +62,17 @@ int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, return 0; /* Dump relevant registers */ - err = kbase_backend_gpuprops_get_curr_config(kbdev, - &curr_config_regdump); + err = kbase_backend_gpuprops_get_curr_config(kbdev, &curr_config_regdump); if (err) return err; - curr_config->l2_slices = - KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1; + { + curr_config->l2_slices = KBASE_UBFX64(curr_config_regdump.mem_features, 8U, 4) + 1; + } - curr_config->l2_present = - ((u64) curr_config_regdump.l2_present_hi << 32) + - curr_config_regdump.l2_present_lo; + curr_config->l2_present = curr_config_regdump.l2_present; - curr_config->shader_present = - ((u64) curr_config_regdump.shader_present_hi << 32) + - curr_config_regdump.shader_present_lo; + curr_config->shader_present = curr_config_regdump.shader_present; curr_config->num_cores = hweight64(curr_config->shader_present); @@ -150,15 +81,6 @@ int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, return 0; } -/** - * kbase_gpuprops_req_curr_config_update - Request Current Config Update - * @kbdev: The &struct kbase_device structure for the device - * - * Requests the current configuration to be updated next time the - * kbase_gpuprops_get_curr_config_props() is called. - * - * Return: Zero on success, Linux error code on failure - */ int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev) { if (WARN_ON(!kbdev)) @@ -168,120 +90,41 @@ int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev) return 0; } -/** - * kbase_gpuprops_get_props - Get the GPU configuration - * @gpu_props: The &struct base_gpu_props structure - * @kbdev: The &struct kbase_device structure for the device - * - * Fill the &struct base_gpu_props structure with values from the GPU - * configuration registers. Only the raw properties are filled in this function. - * - * Return: Zero on success, Linux error code on failure - */ -static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, - struct kbase_device *kbdev) -{ - struct kbase_gpuprops_regdump regdump; - int i; - int err; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(gpu_props != NULL); - - /* Dump relevant registers */ - err = kbase_backend_gpuprops_get(kbdev, ®dump); - if (err) - return err; - - gpu_props->raw_props.gpu_id = regdump.gpu_id; - gpu_props->raw_props.tiler_features = regdump.tiler_features; - gpu_props->raw_props.mem_features = regdump.mem_features; - gpu_props->raw_props.mmu_features = regdump.mmu_features; - gpu_props->raw_props.l2_features = regdump.l2_features; - - gpu_props->raw_props.as_present = regdump.as_present; - gpu_props->raw_props.js_present = regdump.js_present; - gpu_props->raw_props.shader_present = - ((u64) regdump.shader_present_hi << 32) + - regdump.shader_present_lo; - gpu_props->raw_props.tiler_present = - ((u64) regdump.tiler_present_hi << 32) + - regdump.tiler_present_lo; - gpu_props->raw_props.l2_present = - ((u64) regdump.l2_present_hi << 32) + - regdump.l2_present_lo; - gpu_props->raw_props.stack_present = - ((u64) regdump.stack_present_hi << 32) + - regdump.stack_present_lo; - - for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) - gpu_props->raw_props.js_features[i] = regdump.js_features[i]; - - for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) - gpu_props->raw_props.texture_features[i] = regdump.texture_features[i]; - - gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size; - gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; - gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; - gpu_props->raw_props.thread_features = regdump.thread_features; - gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; - - gpu_props->raw_props.gpu_features = - ((u64) regdump.gpu_features_hi << 32) + - regdump.gpu_features_lo; - - return 0; -} - -void kbase_gpuprops_update_core_props_gpu_id( - struct base_gpu_props * const gpu_props) -{ - gpu_props->core_props.version_status = - KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); - gpu_props->core_props.minor_revision = - KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8); - gpu_props->core_props.major_revision = - KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4); - gpu_props->core_props.product_id = - KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); -} - /** * kbase_gpuprops_update_max_config_props - Updates the max config properties in - * the base_gpu_props. - * @base_props: The &struct base_gpu_props structure + * the kbase_gpu_props. * @kbdev: The &struct kbase_device structure for the device * - * Updates the &struct base_gpu_props structure with the max config properties. + * Updates the &struct kbase_gpu_props structure with the max config properties. */ -static void kbase_gpuprops_update_max_config_props( - struct base_gpu_props * const base_props, struct kbase_device *kbdev) +static void kbase_gpuprops_update_max_config_props(struct kbase_device *kbdev) { + struct kbase_gpu_props *gpu_props; int l2_n = 0; - if (WARN_ON(!kbdev) || WARN_ON(!base_props)) + if (WARN_ON(!kbdev)) return; /* return if the max_config is not set during arbif initialization */ if (kbdev->gpu_props.max_config.core_mask == 0) return; + gpu_props = &kbdev->gpu_props; + /* * Set the base_props with the maximum config values to ensure that the * user space will always be based on the maximum resources available. */ - base_props->l2_props.num_l2_slices = - kbdev->gpu_props.max_config.l2_slices; - base_props->raw_props.shader_present = - kbdev->gpu_props.max_config.core_mask; + gpu_props->num_l2_slices = gpu_props->max_config.l2_slices; + gpu_props->shader_present = gpu_props->max_config.core_mask; /* * Update l2_present in the raw data to be consistent with the * max_config.l2_slices number. */ - base_props->raw_props.l2_present = 0; - for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) { - base_props->raw_props.l2_present <<= 1; - base_props->raw_props.l2_present |= 0x1; + gpu_props->l2_present = 0; + for (l2_n = 0; l2_n < gpu_props->num_l2_slices; l2_n++) { + gpu_props->l2_present <<= 1; + gpu_props->l2_present |= 0x1; } /* * Update the coherency_info data using just one core group. For @@ -289,120 +132,13 @@ static void kbase_gpuprops_update_max_config_props( * not necessary to split the shader core groups in different coherent * groups. */ - base_props->coherency_info.coherency = - base_props->raw_props.mem_features; - base_props->coherency_info.num_core_groups = 1; - base_props->coherency_info.num_groups = 1; - base_props->coherency_info.group[0].core_mask = - kbdev->gpu_props.max_config.core_mask; - base_props->coherency_info.group[0].num_cores = - hweight32(kbdev->gpu_props.max_config.core_mask); -} - -/** - * kbase_gpuprops_calculate_props - Calculate the derived properties - * @gpu_props: The &struct base_gpu_props structure - * @kbdev: The &struct kbase_device structure for the device - * - * Fill the &struct base_gpu_props structure with values derived from the GPU - * configuration registers - */ -static void kbase_gpuprops_calculate_props( - struct base_gpu_props * const gpu_props, struct kbase_device *kbdev) -{ - int i; - - /* Populate the base_gpu_props structure */ - kbase_gpuprops_update_core_props_gpu_id(gpu_props); - gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; -#if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE - gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT; -#else - gpu_props->core_props.gpu_available_memory_size = - totalram_pages() << PAGE_SHIFT; -#endif - - for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) - gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; - - gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); - gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); - - /* Field with number of l2 slices is added to MEM_FEATURES register - * since t76x. Below code assumes that for older GPU reserved bits will - * be read as zero. - */ - gpu_props->l2_props.num_l2_slices = - KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; - - gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); - gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); - - if (gpu_props->raw_props.thread_max_threads == 0) - gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT; - else - gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads; - - if (gpu_props->raw_props.thread_max_workgroup_size == 0) - gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; - else - gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size; - - if (gpu_props->raw_props.thread_max_barrier_size == 0) - gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; - else - gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; - - if (gpu_props->raw_props.thread_tls_alloc == 0) - gpu_props->thread_props.tls_alloc = - gpu_props->thread_props.max_threads; - else - gpu_props->thread_props.tls_alloc = - gpu_props->raw_props.thread_tls_alloc; - -#if MALI_USE_CSF - gpu_props->thread_props.max_registers = - KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22); - gpu_props->thread_props.impl_tech = - KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2); - gpu_props->thread_props.max_task_queue = - KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8); - gpu_props->thread_props.max_thread_group_split = 0; -#else - gpu_props->thread_props.max_registers = - KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); - gpu_props->thread_props.max_task_queue = - KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); - gpu_props->thread_props.max_thread_group_split = - KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); - gpu_props->thread_props.impl_tech = - KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); -#endif - - /* If values are not specified, then use defaults */ - if (gpu_props->thread_props.max_registers == 0) { - gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT; - gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; - gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; - } - - /* - * If the maximum resources allocated information is available it is - * necessary to update the base_gpu_props with the max_config info to - * the userspace. This is applicable to systems that receive this - * information from the arbiter. - */ - if (kbdev->gpu_props.max_config.core_mask) - /* Update the max config properties in the base_gpu_props */ - kbase_gpuprops_update_max_config_props(gpu_props, - kbdev); - else - /* Initialize the coherent_group structure for each group */ - kbase_gpuprops_construct_coherent_groups(gpu_props); + gpu_props->num_core_groups = 1; + gpu_props->coherency_info.group.core_mask = gpu_props->max_config.core_mask; + gpu_props->coherency_info.group.num_cores = hweight32(gpu_props->max_config.core_mask); } void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, - const struct max_config_props *max_config) + const struct max_config_props *max_config) { if (WARN_ON(!kbdev) || WARN_ON(!max_config)) return; @@ -411,36 +147,157 @@ void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, kbdev->gpu_props.max_config.core_mask = max_config->core_mask; } -void kbase_gpuprops_set(struct kbase_device *kbdev) +void kbase_gpuprops_update_composite_ids(struct kbase_gpu_id_props *props) +{ + props->product_id = GPU_ID_PRODUCT_ID_MAKE(props->arch_major, props->arch_minor, + props->arch_rev, props->product_major); + props->product_model = GPU_ID_MODEL_MAKE(props->arch_major, props->product_major); + props->version_id = GPU_ID_VERSION_MAKE(props->version_major, props->version_minor, + props->version_status); + props->arch_id = GPU_ID_ARCH_MAKE(props->arch_major, props->arch_minor, props->arch_rev); +} + +void kbase_gpuprops_parse_gpu_id(struct kbase_gpu_id_props *props, u64 gpu_id) +{ + props->arch_major = GPU_ID2_ARCH_MAJOR_GET(gpu_id); + props->version_status = gpu_id & GPU_ID2_VERSION_STATUS; + props->version_minor = GPU_ID2_VERSION_MINOR_GET(gpu_id); + props->version_major = GPU_ID2_VERSION_MAJOR_GET(gpu_id); + props->product_major = GPU_ID2_PRODUCT_MAJOR_GET(gpu_id); + props->arch_rev = GPU_ID2_ARCH_REV_GET(gpu_id); + props->arch_minor = GPU_ID2_ARCH_MINOR_GET(gpu_id); + + kbase_gpuprops_update_composite_ids(props); +} +KBASE_EXPORT_TEST_API(kbase_gpuprops_parse_gpu_id); + +static void kbase_gpuprops_parse_gpu_features(struct kbase_gpu_features_props *props, + u64 gpu_features) +{ + props->ray_intersection = KBASE_UBFX64(gpu_features, 2U, 1); + props->cross_stream_sync = KBASE_UBFX64(gpu_features, 3U, 1); +} + +static void kbase_gpuprops_parse_js_features(struct kbase_js_features_props *props, u32 js_features) +{ + props->null = KBASE_UBFX32(js_features, 1U, 1); + props->write_value = KBASE_UBFX32(js_features, 2U, 1); + props->cache_flush = KBASE_UBFX32(js_features, 3U, 1); + props->compute_shader = KBASE_UBFX32(js_features, 4U, 1); + props->tiler = KBASE_UBFX32(js_features, 7U, 1); + props->fragment_shader = KBASE_UBFX32(js_features, 9U, 1); +} + +/** + * kbase_gpuprops_get_props - Get the GPU configuration + * @kbdev: The &struct kbase_device structure for the device + * + * Fill the &struct base_gpu_props structure with values from the GPU + * configuration registers. Only the raw properties are filled in this function. + * + * Return: Zero on success, Linux error code on failure + */ +static int kbase_gpuprops_get_props(struct kbase_device *kbdev) { struct kbase_gpu_props *gpu_props; - struct gpu_raw_gpu_props *raw; + struct kbasep_gpuprops_regdump *regdump; + + int i, err; + + if (WARN_ON(kbdev == NULL) || WARN_ON(kbdev->gpu_props.priv_data == NULL)) + return -EINVAL; + + gpu_props = &kbdev->gpu_props; + regdump = &PRIV_DATA_REGDUMP(kbdev); + + /* Dump relevant registers */ + err = kbase_backend_gpuprops_get(kbdev, regdump); + if (err) + return err; + + gpu_props->shader_present = regdump->shader_present; + gpu_props->tiler_present = regdump->tiler_present; + gpu_props->stack_present = regdump->stack_present; + gpu_props->l2_present = regdump->l2_present; + + gpu_props->num_cores = hweight64(regdump->shader_present); + gpu_props->num_core_groups = hweight64(regdump->l2_present); + + { + gpu_props->num_address_spaces = hweight32(regdump->as_present); + } + + gpu_props->num_job_slots = hweight32(regdump->js_present); + + gpu_props->log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; + + if (regdump->thread_max_threads == 0) + gpu_props->max_threads = THREAD_MT_DEFAULT; + else + gpu_props->max_threads = regdump->thread_max_threads; + +#if MALI_USE_CSF + gpu_props->impl_tech = KBASE_UBFX32(regdump->thread_features, 22U, 2); +#else /* MALI_USE_CSF */ + gpu_props->impl_tech = KBASE_UBFX32(regdump->thread_features, 30U, 2); +#endif /* MALI_USE_CSF */ + + /* Features */ + kbase_gpuprops_parse_gpu_features(&gpu_props->gpu_features, regdump->gpu_features); + + gpu_props->coherency_info.coherent_core_group = KBASE_UBFX64(regdump->mem_features, 0U, 1); + gpu_props->coherency_info.coherent_super_group = KBASE_UBFX64(regdump->mem_features, 1U, 1); + gpu_props->coherency_info.group.core_mask = gpu_props->shader_present; + gpu_props->coherency_info.group.num_cores = gpu_props->num_cores; + + gpu_props->mmu.va_bits = KBASE_UBFX64(regdump->mmu_features, 0U, 8); + gpu_props->mmu.pa_bits = KBASE_UBFX64(regdump->mmu_features, 8U, 8); + + /* + * this will get turned into the selected coherency mode. + * Additionally, add non-coherent mode, as this is always supported. + */ + + gpu_props->coherency_mode = regdump->coherency_features | + COHERENCY_FEATURE_BIT(COHERENCY_NONE); + + gpu_props->log2_line_size = KBASE_UBFX64(regdump->l2_features, 0U, 8); + { + gpu_props->num_l2_slices = KBASE_UBFX64(regdump->mem_features, 8U, 4) + 1; + } + + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + kbase_gpuprops_parse_js_features(&gpu_props->js_features[i], + regdump->js_features[i]); + + if (gpu_props->max_config.core_mask) + kbase_gpuprops_update_max_config_props(kbdev); + + return 0; +} + +int kbase_gpuprops_init(struct kbase_device *kbdev) +{ + struct kbase_gpu_props *gpu_props; + int err = 0; if (WARN_ON(!kbdev)) - return; + return -EINVAL; + gpu_props = &kbdev->gpu_props; - raw = &gpu_props->props.raw_props; - /* Initialize the base_gpu_props structure from the hardware */ - kbase_gpuprops_get_props(&gpu_props->props, kbdev); + /* Allocate private data for gpuprop backend */ + kbdev->gpu_props.priv_data = kzalloc(sizeof(struct kbasep_gpuprops_priv_data), GFP_KERNEL); - /* Populate the derived properties */ - kbase_gpuprops_calculate_props(&gpu_props->props, kbdev); + if (!gpu_props->priv_data) + return -ENOMEM; - /* Populate kbase-only fields */ - gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8); - gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8); - - gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1); - - gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8); - gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); - - gpu_props->num_cores = hweight64(raw->shader_present); - gpu_props->num_core_groups = - gpu_props->props.coherency_info.num_core_groups; - gpu_props->num_address_spaces = hweight32(raw->as_present); - gpu_props->num_job_slots = hweight32(raw->js_present); + /* Get and populate kbase gpu properties */ + err = kbase_gpuprops_get_props(kbdev); + if (err) { + kbase_gpuprops_term(kbdev); + return err; + } /* * Current configuration is used on HW interactions so that the maximum @@ -450,48 +307,13 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) */ kbase_gpuprops_req_curr_config_update(kbdev); kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config); + return 0; } -int kbase_gpuprops_set_features(struct kbase_device *kbdev) +void kbase_gpuprops_term(struct kbase_device *kbdev) { - struct base_gpu_props *gpu_props; - struct kbase_gpuprops_regdump regdump; - int err; - - gpu_props = &kbdev->gpu_props.props; - - /* Dump relevant registers */ - err = kbase_backend_gpuprops_get_features(kbdev, ®dump); - if (err) - return err; - - /* - * Copy the raw value from the register, later this will get turned - * into the selected coherency mode. - * Additionally, add non-coherent mode, as this is always supported. - */ - gpu_props->raw_props.coherency_mode = regdump.coherency_features | - COHERENCY_FEATURE_BIT(COHERENCY_NONE); - - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT)) - gpu_props->thread_props.max_thread_group_split = 0; - - /* - * The CORE_FEATURES register has different meanings depending on GPU. - * On tGOx, bits[3:0] encode num_exec_engines. - * On CSF GPUs, bits[7:0] is an enumeration that needs to be parsed, - * instead. - * GPUs like tTIx have additional fields like LSC_SIZE that are - * otherwise reserved/RAZ on older GPUs. - */ - gpu_props->raw_props.core_features = regdump.core_features; - -#if !MALI_USE_CSF - gpu_props->core_props.num_exec_engines = - KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4); -#endif - - return err; + kfree(kbdev->gpu_props.priv_data); + kbdev->gpu_props.priv_data = NULL; } /* @@ -509,7 +331,7 @@ static u8 override_l2_hash; module_param(override_l2_hash, byte, 0000); MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); -static u32 l2_hash_values[ASN_HASH_COUNT] = { +static u32 l2_hash_values[GPU_L2_SLICE_HASH_COUNT] = { 0, }; static unsigned int num_override_l2_hash_values; @@ -517,9 +339,9 @@ module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000); MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing"); /* Definitions for range of supported user defined hash functions for GPUs - * that support L2_CONFIG and not ASN_HASH features. Supported hash function - * range from 0b1000-0b1111 inclusive. Selection of any other values will - * lead to undefined behavior. + * that support L2_CONFIG and not L2 cache slice hash features. Supported + * hash function range from 0b1000-0b1111 inclusive. Selection of any other + * values will lead to undefined behavior. */ #define USER_DEFINED_HASH_LO ((u8)0x08) #define USER_DEFINED_HASH_HI ((u8)0x0F) @@ -542,8 +364,7 @@ enum l2_config_override_result { * overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided. * L2_CONFIG_OVERRIDE_FAIL otherwise. */ -static enum l2_config_override_result -kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) +static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) { struct device_node *np = kbdev->dev->of_node; @@ -558,8 +379,7 @@ kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) /* Check overriding value is supported, if not will result in * undefined behavior. */ - if (override_l2_hash >= USER_DEFINED_HASH_LO && - override_l2_hash <= USER_DEFINED_HASH_HI) + if (override_l2_hash >= USER_DEFINED_HASH_LO && override_l2_hash <= USER_DEFINED_HASH_HI) kbdev->l2_hash_override = override_l2_hash; else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override)) kbdev->l2_hash_override = 0; @@ -571,31 +391,26 @@ kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) kbdev->l2_hash_values_override = true; for (i = 0; i < num_override_l2_hash_values; i++) kbdev->l2_hash_values[i] = l2_hash_values[i]; - } else if (!of_property_read_u32_array(np, "l2-hash-values", - kbdev->l2_hash_values, - ASN_HASH_COUNT)) + } else if (!of_property_read_u32_array(np, "l2-hash-values", kbdev->l2_hash_values, + GPU_L2_SLICE_HASH_COUNT)) kbdev->l2_hash_values_override = true; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) && - (kbdev->l2_hash_override)) { + if (kbase_hw_has_l2_slice_hash_feature(kbdev) && (kbdev->l2_hash_override)) { dev_err(kbdev->dev, "l2-hash not supported\n"); return L2_CONFIG_OVERRIDE_FAIL; } - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) && - (kbdev->l2_hash_values_override)) { + if (!kbase_hw_has_l2_slice_hash_feature(kbdev) && (kbdev->l2_hash_values_override)) { dev_err(kbdev->dev, "l2-hash-values not supported\n"); return L2_CONFIG_OVERRIDE_FAIL; } if (kbdev->l2_hash_override && kbdev->l2_hash_values_override) { - dev_err(kbdev->dev, - "both l2-hash & l2-hash-values not supported\n"); + dev_err(kbdev->dev, "both l2-hash & l2-hash-values not supported\n"); return L2_CONFIG_OVERRIDE_FAIL; } - if (kbdev->l2_size_override || kbdev->l2_hash_override || - kbdev->l2_hash_values_override) + if (kbdev->l2_size_override || kbdev->l2_hash_override || kbdev->l2_hash_values_override) return L2_CONFIG_OVERRIDE_OK; return L2_CONFIG_OVERRIDE_NONE; @@ -606,8 +421,7 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) int err = 0; if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { - struct kbase_gpuprops_regdump regdump; - struct base_gpu_props *gpu_props = &kbdev->gpu_props.props; + struct kbasep_gpuprops_regdump *regdump = &PRIV_DATA_REGDUMP(kbdev); /* Check for L2 cache size & hash overrides */ switch (kbase_read_l2_config_from_dt(kbdev)) { @@ -637,35 +451,29 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) kbase_pm_wait_for_l2_powered(kbdev); /* Dump L2_FEATURES register */ - err = kbase_backend_gpuprops_get_l2_features(kbdev, ®dump); + err = kbase_backend_gpuprops_get_l2_features(kbdev, regdump); if (err) goto exit; - dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n", - regdump.l2_features); - dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n", - regdump.l2_config); + dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%llx\n", regdump->l2_features); + dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n", regdump->l2_config); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) { + if (kbase_hw_has_l2_slice_hash_feature(kbdev)) { int idx; - const bool asn_he = regdump.l2_config & - L2_CONFIG_ASN_HASH_ENABLE_MASK; -#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - if (!asn_he && kbdev->l2_hash_values_override) - dev_err(kbdev->dev, - "Failed to use requested ASN_HASH, fallback to default"); -#endif - for (idx = 0; idx < ASN_HASH_COUNT; idx++) - dev_info(kbdev->dev, - "%s ASN_HASH[%d] is [0x%08x]\n", - asn_he ? "Overridden" : "Default", idx, - regdump.l2_asn_hash[idx]); - } + const bool enable = regdump->l2_config & + L2_CONFIG_L2_SLICE_HASH_ENABLE_MASK; - /* Update gpuprops with reflected L2_FEATURES */ - gpu_props->raw_props.l2_features = regdump.l2_features; - gpu_props->l2_props.log2_cache_size = - KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); +#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) + if (!enable && kbdev->l2_hash_values_override) { + dev_err(kbdev->dev, + "Failed to use requested ASN_HASH, fallback to default"); + } +#endif + for (idx = 0; idx < GPU_L2_SLICE_HASH_COUNT; idx++) + dev_info(kbdev->dev, "%s ASN_HASH[%d] is [0x%08x]\n", + enable ? "Overridden" : "Default", idx, + regdump->l2_slice_hash[idx]); + } } exit: @@ -677,9 +485,11 @@ static struct { size_t offset; int size; } gpu_property_mapping[] = { -#define PROP(name, member) \ - {KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \ - sizeof(((struct base_gpu_props *)0)->member)} +#define PROP(name, member) \ + { \ + KBASE_GPUPROP_##name, offsetof(struct gpu_props_user_data, member), \ + sizeof(((struct gpu_props_user_data *)0)->member) \ + } PROP(PRODUCT_ID, core_props.product_id), PROP(VERSION_STATUS, core_props.version_status), PROP(MINOR_REVISION, core_props.minor_revision), @@ -691,16 +501,7 @@ static struct { PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]), PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), - -#if MALI_USE_CSF -#define BACKWARDS_COMPAT_PROP(name, type) \ - { \ - KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \ - } - BACKWARDS_COMPAT_PROP(NUM_EXEC_ENGINES, u8), -#else PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines), -#endif PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), @@ -780,15 +581,151 @@ static struct { #undef PROP }; +/** + * kbase_populate_user_data - Populate user data properties from kbase props and + * raw register values + * @kbdev: The kbase device pointer + * @data: The user properties data struct pointer + */ +static void kbase_populate_user_data(struct kbase_device *kbdev, struct gpu_props_user_data *data) +{ + struct kbase_gpu_props *kprops = &kbdev->gpu_props; + struct kbasep_gpuprops_regdump *regdump = &PRIV_DATA_REGDUMP(kbdev); + int i = 0; + + if (WARN_ON(!kbdev) || WARN_ON(!data) || WARN_ON(!regdump)) + return; + + /* Properties from kbase_gpu_props */ + data->core_props.version_status = kprops->gpu_id.version_status; + data->core_props.minor_revision = kprops->gpu_id.version_minor; + data->core_props.major_revision = kprops->gpu_id.version_major; + data->core_props.gpu_freq_khz_max = kprops->gpu_freq_khz_max; + data->core_props.log2_program_counter_size = kprops->log2_program_counter_size; + data->l2_props.log2_line_size = kprops->log2_line_size; + data->l2_props.num_l2_slices = kprops->num_l2_slices; + data->raw_props.shader_present = kprops->shader_present; + data->raw_props.l2_present = kprops->l2_present; + data->raw_props.tiler_present = kprops->tiler_present; + data->raw_props.stack_present = kprops->stack_present; + + /* On Bifrost+ GPUs, there is only 1 coherent group */ + data->coherency_info.num_groups = 1; + data->coherency_info.num_core_groups = kprops->num_core_groups; + data->coherency_info.group[0].core_mask = kprops->coherency_info.group.core_mask; + data->coherency_info.group[0].num_cores = kprops->coherency_info.group.num_cores; + + data->thread_props.max_threads = kprops->max_threads; + data->thread_props.impl_tech = kprops->impl_tech; + data->raw_props.coherency_mode = kprops->coherency_mode; + + /* Properties (mostly) from raw register values */ + /* For compatibility, we are passing the lower 32-bits of the gpu_id */ + data->raw_props.gpu_id = regdump->gpu_id; + + { + data->core_props.product_id = KBASE_UBFX64(regdump->gpu_id, 16U, 16); + } + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) { + data->core_props.texture_features[i] = regdump->texture_features[i]; + data->raw_props.texture_features[i] = regdump->texture_features[i]; + } + + data->core_props.gpu_available_memory_size = kbase_totalram_pages() << PAGE_SHIFT; + + /* + * The CORE_FEATURES register has different meanings depending on GPU. + * On tGOx, bits[3:0] encode num_exec_engines. + * On CSF GPUs, bits[7:0] is an enumeration that needs to be parsed, + * instead. + * GPUs like tTIx have additional fields like LSC_SIZE that are + * otherwise reserved/RAZ on older GPUs. + */ +#if !MALI_USE_CSF + data->core_props.num_exec_engines = KBASE_UBFX64(regdump->core_features, 0, 4); +#endif + + data->l2_props.log2_cache_size = KBASE_UBFX64(regdump->l2_features, 16U, 8); + data->coherency_info.coherency = regdump->mem_features; + + data->tiler_props.bin_size_bytes = 1 << KBASE_UBFX64(regdump->tiler_features, 0U, 6); + data->tiler_props.max_active_levels = KBASE_UBFX32(regdump->tiler_features, 8U, 4); + + if (regdump->thread_max_workgroup_size == 0) + data->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; + else + data->thread_props.max_workgroup_size = regdump->thread_max_workgroup_size; + + if (regdump->thread_max_barrier_size == 0) + data->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; + else + data->thread_props.max_barrier_size = regdump->thread_max_barrier_size; + + if (regdump->thread_tls_alloc == 0) + data->thread_props.tls_alloc = kprops->max_threads; + else + data->thread_props.tls_alloc = regdump->thread_tls_alloc; + +#if MALI_USE_CSF + data->thread_props.max_registers = KBASE_UBFX32(regdump->thread_features, 0U, 22); + data->thread_props.max_task_queue = KBASE_UBFX32(regdump->thread_features, 24U, 8); + data->thread_props.max_thread_group_split = 0; +#else + data->thread_props.max_registers = KBASE_UBFX32(regdump->thread_features, 0U, 16); + data->thread_props.max_task_queue = KBASE_UBFX32(regdump->thread_features, 16U, 8); + data->thread_props.max_thread_group_split = KBASE_UBFX32(regdump->thread_features, 24U, 6); +#endif + + if (data->thread_props.max_registers == 0) { + data->thread_props.max_registers = THREAD_MR_DEFAULT; + data->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; + data->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; + } + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT)) + data->thread_props.max_thread_group_split = 0; + + /* Raw Register Values */ + data->raw_props.l2_features = regdump->l2_features; + data->raw_props.core_features = regdump->core_features; + data->raw_props.mem_features = regdump->mem_features; + data->raw_props.mmu_features = regdump->mmu_features; + data->raw_props.as_present = regdump->as_present; + data->raw_props.js_present = regdump->js_present; + + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + data->raw_props.js_features[i] = regdump->js_features[i]; + + data->raw_props.tiler_features = regdump->tiler_features; + + data->raw_props.thread_max_threads = regdump->thread_max_threads; + data->raw_props.thread_max_workgroup_size = regdump->thread_max_workgroup_size; + data->raw_props.thread_max_barrier_size = regdump->thread_max_barrier_size; + data->raw_props.thread_features = regdump->thread_features; + data->raw_props.thread_tls_alloc = regdump->thread_tls_alloc; + data->raw_props.gpu_features = regdump->gpu_features; + +} + int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) { struct kbase_gpu_props *kprops = &kbdev->gpu_props; - struct base_gpu_props *props = &kprops->props; + struct gpu_props_user_data props; u32 count = ARRAY_SIZE(gpu_property_mapping); u32 i; u32 size = 0; u8 *p; + memset(&props, 0, sizeof(props)); + + /* Populate user data structure from kbase props and raw register values */ + kbase_populate_user_data(kbdev, &props); + + /* Free private data after used to populate user data structure */ + kfree(kprops->priv_data); + kprops->priv_data = NULL; + for (i = 0; i < count; i++) { /* 4 bytes for the ID, and the size of the property */ size += 4 + gpu_property_mapping[i].size; @@ -804,10 +741,22 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) p = kprops->prop_buffer; -#define WRITE_U8(v) (*p++ = (v) & 0xFF) -#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0) -#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0) -#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0) +#define WRITE_U8(v) (*p++ = (v)&0xFF) +#define WRITE_U16(v) \ + do { \ + WRITE_U8(v); \ + WRITE_U8((v) >> 8); \ + } while (0) +#define WRITE_U32(v) \ + do { \ + WRITE_U16(v); \ + WRITE_U16((v) >> 16); \ + } while (0) +#define WRITE_U64(v) \ + do { \ + WRITE_U32(v); \ + WRITE_U32((v) >> 32); \ + } while (0) for (i = 0; i < count; i++) { u32 type = gpu_property_mapping[i].type; @@ -816,8 +765,8 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) const u64 dummy_backwards_compat_value = (u64)0; const void *field; - if (likely(offset < sizeof(struct base_gpu_props))) - field = ((const u8 *)props) + offset; + if (likely(offset < sizeof(struct gpu_props_user_data))) + field = ((const u8 *)&props) + offset; else field = &dummy_backwards_compat_value; @@ -835,13 +784,13 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) type_size = KBASE_GPUPROP_VALUE_SIZE_U64; break; default: - dev_err(kbdev->dev, - "Invalid gpu_property_mapping type=%d size=%d", - type, gpu_property_mapping[i].size); + dev_err(kbdev->dev, "Invalid gpu_property_mapping type=%d size=%d", type, + gpu_property_mapping[i].size); + kbase_gpuprops_free_user_buffer(kbdev); return -EINVAL; } - WRITE_U32((type<<2) | type_size); + WRITE_U32((type << 2) | type_size); switch (type_size) { case KBASE_GPUPROP_VALUE_SIZE_U8: @@ -872,17 +821,9 @@ void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev) int kbase_device_populate_max_freq(struct kbase_device *kbdev) { - struct mali_base_gpu_core_props *core_props; - /* obtain max configured gpu frequency, if devfreq is enabled then * this will be overridden by the highest operating point found */ - core_props = &(kbdev->gpu_props.props.core_props); -#ifdef GPU_FREQ_KHZ_MAX - core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; -#else - core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX; -#endif - + kbdev->gpu_props.gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX; return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h index f0a97312c0dd..093f9680ece6 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2015, 2017, 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,24 +46,47 @@ struct kbase_device; (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) /** - * kbase_gpuprops_set - Set up Kbase GPU properties. + * KBASE_UBFX64 - Extracts bits from a 64-bit bitfield. + * @value: The value from which to extract bits. + * @offset: The first bit to extract (0 being the LSB). + * @size: The number of bits to extract. + * + * Context: @offset + @size <= 64. + * + * Return: Bits [@offset, @offset + @size) from @value. + */ +/* from mali_cdsb.h */ +#define KBASE_UBFX64(value, offset, size) \ + (((u64)(value) >> (u32)(offset)) & (u64)((1ULL << (u32)(size)) - 1)) + +/** + * kbase_gpuprops_update_composite_ids - update composite ids with new gpu id + * @props: pointer to GPU_ID property structure + */ +void kbase_gpuprops_update_composite_ids(struct kbase_gpu_id_props *props); + +/** + * kbase_gpuprops_parse_gpu_id - parse fields of GPU_ID + * @props: pointer to GPU_ID property structure + * @gpu_id: gpu id register value + */ +void kbase_gpuprops_parse_gpu_id(struct kbase_gpu_id_props *props, u64 gpu_id); + +/** + * kbase_gpuprops_init - Set up Kbase GPU properties. * @kbdev: The struct kbase_device structure for the device * * Set up Kbase GPU properties with information from the GPU registers + * + * Return: Zero on success, Linux error code on failuren */ -void kbase_gpuprops_set(struct kbase_device *kbdev); +int kbase_gpuprops_init(struct kbase_device *kbdev); /** - * kbase_gpuprops_set_features - Set up Kbase GPU properties - * @kbdev: Device pointer - * - * This function sets up GPU properties that are dependent on the hardware - * features bitmask. This function must be preceeded by a call to - * kbase_hw_set_features_mask(). - * - * Return: Zero on success, Linux error code on failure + * kbase_gpuprops_term - Terminate Kbase GPU properties. + * @kbdev: The struct kbase_device structure for the device */ -int kbase_gpuprops_set_features(struct kbase_device *kbdev); +void kbase_gpuprops_term(struct kbase_device *kbdev); /** * kbase_gpuprops_update_l2_features - Update GPU property of L2_FEATURES @@ -106,17 +129,6 @@ void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev); */ int kbase_device_populate_max_freq(struct kbase_device *kbdev); -/** - * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value - * @gpu_props: the &base_gpu_props structure - * - * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into - * separate fields (version_status, minor_revision, major_revision, product_id) - * stored in base_gpu_props::core_props. - */ -void kbase_gpuprops_update_core_props_gpu_id( - struct base_gpu_props * const gpu_props); - /** * kbase_gpuprops_set_max_config - Set the max config information * @kbdev: Device pointer @@ -125,7 +137,7 @@ void kbase_gpuprops_update_core_props_gpu_id( * This function sets max_config in the kbase_gpu_props. */ void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, - const struct max_config_props *max_config); + const struct max_config_props *max_config); /** * kbase_gpuprops_get_curr_config_props - Get the current allocated resources @@ -138,7 +150,7 @@ void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, * Return: Zero on success, Linux error code on failure */ int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, - struct curr_config_props * const curr_config); + struct curr_config_props *const curr_config); /** * kbase_gpuprops_req_curr_config_update - Request Current Config Update @@ -151,4 +163,4 @@ int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, */ int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev); -#endif /* _KBASE_GPUPROPS_H_ */ +#endif /* _KBASE_GPUPROPS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_private_types.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_private_types.h new file mode 100644 index 000000000000..5ff790880f45 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_private_types.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_KBASE_GPUPROPS_PRIVATE_TYPES_H_ +#define _MALI_KBASE_GPUPROPS_PRIVATE_TYPES_H_ + +#include + +/** + * struct kbasep_gpuprops_regdump - structure containing raw GPU register values. + * + * @shader_present: Shader core present bitmap + * @tiler_present: Tiler core present bitmap + * @l2_present: L2 cache present bitmap + * @stack_present: Core stack present bitmap + * @core_features: Shader core features + * @tiler_features: Tiler features + * @l2_features: Level 2 cache features + * @mem_features: Memory system features + * @mmu_features: GPU Memory Management Unit configuration + * @gpu_features: Supported GPU features + * @as_present: Address spaces present + * @js_present: Job slots present + * @js_features: Job slot features + * @texture_features: Support flags for texture formats + * @gpu_id: GPU ID + * @thread_max_threads: Maximum number of threads per core + * @thread_max_workgroup_size: Maximum number of threads per workgroup + * @thread_max_barrier_size: Maximum number of threads per barrier + * @thread_features: Thread features + * @coherency_features: Coherency/AMBA features + * @thread_tls_alloc: Number of threads per core to allocate TLS storage for + * @l2_config: Level 2 cache configuration + * @l2_slice_hash: ASN Hash function arguments + * + * This structure is used to store raw GPU register values that will be used as-is + * or parsed into respective properties. + */ +struct kbasep_gpuprops_regdump { + u64 shader_present; + u64 tiler_present; + u64 l2_present; + u64 stack_present; + u64 core_features; + u64 tiler_features; + u64 l2_features; + u64 mem_features; + u64 mmu_features; + u64 gpu_features; + u32 as_present; + u32 js_present; + u32 js_features[GPU_MAX_JOB_SLOTS]; + u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + u64 gpu_id; + u32 thread_max_threads; + u32 thread_max_workgroup_size; + u32 thread_max_barrier_size; + u32 thread_features; + u32 coherency_features; + u32 thread_tls_alloc; + u32 l2_config; + u32 l2_slice_hash[GPU_ASN_HASH_COUNT]; +}; + +struct kbasep_gpuprops_priv_data { + struct kbasep_gpuprops_regdump regdump; +}; + +#endif /* _MALI_KBASE_GPUPROPS_PRIVATE_TYPES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h index 45cb603fab82..6c193b7024f8 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,71 +28,34 @@ #include -#define KBASE_GPU_SPEED_MHZ 123 +#define KBASE_GPU_SPEED_MHZ 123 #define KBASE_GPU_PC_SIZE_LOG2 24U -struct kbase_gpuprops_regdump { - u32 gpu_id; - u32 l2_features; - u32 l2_config; - u32 l2_asn_hash[ASN_HASH_COUNT]; - u32 core_features; - u32 tiler_features; - u32 mem_features; - u32 mmu_features; - u32 as_present; - u32 js_present; - u32 thread_max_threads; - u32 thread_max_workgroup_size; - u32 thread_max_barrier_size; - u32 thread_features; - u32 thread_tls_alloc; - u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; - u32 js_features[GPU_MAX_JOB_SLOTS]; - u32 shader_present_lo; - u32 shader_present_hi; - u32 tiler_present_lo; - u32 tiler_present_hi; - u32 l2_present_lo; - u32 l2_present_hi; - u32 stack_present_lo; - u32 stack_present_hi; - u32 coherency_features; - u32 gpu_features_lo; - u32 gpu_features_hi; -}; - /** * struct kbase_current_config_regdump - Register dump for current resources * allocated to the GPU. * @mem_features: Memory system features. Contains information about the * features of the memory system. Used here to get the L2 slice * count. - * @shader_present_lo: Shader core present bitmap. Low word. - * @shader_present_hi: Shader core present bitmap. High word. - * @l2_present_lo: L2 cache present bitmap. Low word. - * @l2_present_hi: L2 cache present bitmap. High word. + * @l2_features: L2 cache features + * @shader_present: Shader core present bitmap. + * @l2_present: L2 cache present bitmap. * * Register dump structure used to store the resgisters data realated to the * current resources allocated to the GPU. */ struct kbase_current_config_regdump { - u32 mem_features; - u32 shader_present_lo; - u32 shader_present_hi; - u32 l2_present_lo; - u32 l2_present_hi; -}; - -struct kbase_gpu_cache_props { - u8 associativity; - u8 external_bus_width; -}; - -struct kbase_gpu_mem_props { - u8 core_group; + u64 mem_features; + u64 l2_features; + u64 shader_present; + u64 l2_present; }; +/** + * struct kbase_gpu_mmu_props - MMU properties + * @va_bits: Number of bits supported in virtual addresses + * @pa_bits: Number of bits supported in physical addresses + */ struct kbase_gpu_mmu_props { u8 va_bits; u8 pa_bits; @@ -142,16 +105,156 @@ struct curr_config_props { u8 padding[4]; }; +/** + * struct kbase_gpu_id_props - Properties based on GPU_ID register. + * @version_status: field indicating the status of the GPU release + * @version_minor: minor release version number (p1 in r0p1) + * @version_major: major release version number (r0 in r0p1) + * @product_major: product identifier + * @arch_rev: architecture patch version + * @arch_minor: architecture minor revision + * @arch_major: architecture major revision + * @product_id: arch_major << 24 | arch_minor << 16 | arch_rev << 8 | product_major + * @product_model: arch_major << 24 | product_major + * @version_id: version_major << 16 | version_minor << 8 | version_status + * @arch_id: id composed of arch_major << 16 | arch_minor << 8 | arch_rev + * + * Use GPU_ID_PRODUCT_ID_MAKE, GPU_ID_VERSION_MAKE or GPU_ID_ARCH_MAKE to perform + * comparisons between product_id, version_id or arch_id respectively + */ +struct kbase_gpu_id_props { + u16 version_status; + u16 version_minor; + u16 version_major; + u16 product_major; + u16 arch_rev; + u16 arch_minor; + u16 arch_major; + /* Composite ids */ + u32 product_id; + u32 product_model; + u32 version_id; + u32 arch_id; +}; + +/** + * struct kbase_gpu_features_props - boolean struct indicating feature support + * from GPU_FEATURES register. + * @ray_intersection: Ray tracing intersection instructions supported + * @cross_stream_sync: Cross stream sync supported + * + * This register is only present on certain CSF GPUs. + */ +struct kbase_gpu_features_props { + bool ray_intersection; + bool cross_stream_sync; +}; + +/** + * struct kbase_coherent_group_props - Coherency goup properties + * @core_mask: Coherent group core mask + * @num_cores: Number of cores in coherent group + */ +struct kbase_coherent_group_props { + u64 core_mask; + u16 num_cores; +}; + +/** + * struct kbase_coherency_props - Coherency group information + * @coherent_core_group: Core group is coherent (MEM_FEATURES register) + * @coherent_super_group: Core supergroup is coherent (MEM_FEATURES register) + * @group: Descriptors of coherent groups + * + * The groups are sorted by core mask. The core masks are non-repeating and do + * not intersect. + */ +struct kbase_coherency_props { + bool coherent_core_group; + bool coherent_super_group; + struct kbase_coherent_group_props group; +}; + +/** + * struct kbase_js_features_props - Boolean struct of fields in JSn_FEATURES register + * @null: Supports null jobs + * @write_value: Supports write value jobs + * @cache_flush: Supports cache flush jobs + * @compute_shader: Supports compute shader jobs + * @tiler: Supports tiler jobs + * @fragment_shader: Supports fragment shader jobs + */ +struct kbase_js_features_props { + bool null; + bool write_value; + bool cache_flush; + bool compute_shader; + bool tiler; + bool fragment_shader; +}; + +/** + * struct kbase_gpu_props - parsed gpu properties used by kbase. + * @shader_present: Shader core present bitmap + * @stack_present: Core stack present bitmap + * @tiler_present: Tiler present bitmap + * @l2_present: L2 cache present bitmap + * @num_cores: Number of shader cores present + * @num_core_groups: Number of L2 cache present + * @num_address_spaces: Number of address spaces + * @num_job_slots: Number of job slots + * @coherency_mode: Coherency mode bitmask + * @gpu_freq_khz_max: Max configured gpu frequency + * @log2_program_counter_size: Program counter size in log2 + * @log2_line_size: L2 cache line size in log2 + * @num_l2_slices: Number of l2 slices + * @max_threads: Total number of registers per core + * @impl_tech: Implementation technology type + * @js_features: Job slot features + * @gpu_id: struct kbase_gpu_id_props + * @gpu_features: struct kbase_gpu_features_props + * @coherency_info: struct kbase_coherency_props + * @mmu: MMU props + * @curr_config: struct curr_config_props current resource available + * @max_config: struct max_config_props maximum resource available + * @prop_buffer_size: prop_buffer size + * @prop_buffer: buffer containing encoded gpu props for userspace + * @priv_data: private data structure freed after kbase_gpuprops_populate_user_buffer() + * + * @note Structure should be kbase specific, it should not contain userspace (e.g. base) + * structures nor should it ever contain raw register values unless it is + * a bitmask (e.g. shader_present, stack_present). + */ struct kbase_gpu_props { /* kernel-only properties */ + u64 shader_present; + u64 stack_present; + u64 tiler_present; + u64 l2_present; + u8 num_cores; u8 num_core_groups; u8 num_address_spaces; u8 num_job_slots; - struct kbase_gpu_cache_props l2_props; + u32 coherency_mode; + u32 gpu_freq_khz_max; + u32 log2_program_counter_size; + + u8 log2_line_size; + u8 num_l2_slices; + + u32 max_threads; + u8 impl_tech; + + struct kbase_js_features_props js_features[GPU_MAX_JOB_SLOTS]; + + struct kbase_gpu_id_props gpu_id; + + struct kbase_gpu_features_props gpu_features; + + struct kbase_coherency_props coherency_info; - struct kbase_gpu_mem_props mem; struct kbase_gpu_mmu_props mmu; /* Properties based on the current resource available */ @@ -160,11 +263,9 @@ struct kbase_gpu_props { /* Properties based on the maximum resource available */ struct max_config_props max_config; - /* Properties shared with userspace */ - struct base_gpu_props props; - u32 prop_buffer_size; void *prop_buffer; + void *priv_data; }; -#endif /* _KBASE_GPUPROPS_TYPES_H_ */ +#endif /* _KBASE_GPUPROPS_TYPES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c index 0eba889e5b19..79c3d5129324 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,10 +22,8 @@ #include "mali_kbase_gwt.h" #include -static inline void kbase_gpu_gwt_setup_page_permission( - struct kbase_context *kctx, - unsigned long flag, - struct rb_node *node) +static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kctx, + unsigned long flag, struct rb_node *node) { struct rb_node *rbnode = node; @@ -35,12 +33,11 @@ static inline void kbase_gpu_gwt_setup_page_permission( reg = rb_entry(rbnode, struct kbase_va_region, rblink); if (reg->nr_pages && !kbase_is_region_invalid_or_free(reg) && - (reg->flags & KBASE_REG_GPU_WR)) { + (reg->flags & KBASE_REG_GPU_WR)) { err = kbase_mmu_update_pages(kctx, reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - reg->gpu_alloc->nents, - reg->flags & flag, - reg->gpu_alloc->group_id); + kbase_get_gpu_phy_pages(reg), + reg->gpu_alloc->nents, reg->flags & flag, + reg->gpu_alloc->group_id); if (err) dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages failure\n"); } @@ -49,16 +46,14 @@ static inline void kbase_gpu_gwt_setup_page_permission( } } -static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, - unsigned long flag) +static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, unsigned long flag) { kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&(kctx->reg_rbtree_same))); + rb_first(&kctx->reg_zone[SAME_VA_ZONE].reg_rbtree)); kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&(kctx->reg_rbtree_custom))); + rb_first(&kctx->reg_zone[CUSTOM_VA_ZONE].reg_rbtree)); } - int kbase_gpu_gwt_start(struct kbase_context *kctx) { kbase_gpu_vm_lock(kctx); @@ -77,8 +72,7 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx) * Status will be restored on end of dumping in gwt_stop. */ kctx->kbdev->backup_serialize_jobs = kctx->kbdev->serialize_jobs; - kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT | - KBASE_SERIALIZE_INTER_SLOT; + kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT; #endif /* Mark gwt enabled before making pages read only in case a @@ -128,8 +122,7 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx) #if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) static int list_cmp_function(void *priv, const struct list_head *a, const struct list_head *b) #else -static int list_cmp_function(void *priv, struct list_head *a, - struct list_head *b) +static int list_cmp_function(void *priv, struct list_head *a, struct list_head *b) #endif { const struct kbasep_gwt_list_element *elementA = @@ -144,22 +137,20 @@ static int list_cmp_function(void *priv, struct list_head *a, return -1; } -static void kbase_gpu_gwt_collate(struct kbase_context *kctx, - struct list_head *snapshot_list) +static void kbase_gpu_gwt_collate(struct kbase_context *kctx, struct list_head *snapshot_list) { struct kbasep_gwt_list_element *pos, *n; struct kbasep_gwt_list_element *collated = NULL; + CSTD_UNUSED(kctx); + /* Sort the list */ list_sort(NULL, snapshot_list, list_cmp_function); /* Combine contiguous areas. */ list_for_each_entry_safe(pos, n, snapshot_list, link) { - if (collated == NULL || collated->region != - pos->region || - (collated->page_addr + - (collated->num_pages * PAGE_SIZE)) != - pos->page_addr) { + if (collated == NULL || collated->region != pos->region || + (collated->page_addr + (collated->num_pages * PAGE_SIZE)) != pos->page_addr) { /* This is the first time through, a new region or * is not contiguous - start collating to this element */ @@ -174,15 +165,12 @@ static void kbase_gpu_gwt_collate(struct kbase_context *kctx, } } -int kbase_gpu_gwt_dump(struct kbase_context *kctx, - union kbase_ioctl_cinstr_gwt_dump *gwt_dump) +int kbase_gpu_gwt_dump(struct kbase_context *kctx, union kbase_ioctl_cinstr_gwt_dump *gwt_dump) { const u32 ubuf_size = gwt_dump->in.len; u32 ubuf_count = 0; - __user void *user_addr = (__user void *) - (uintptr_t)gwt_dump->in.addr_buffer; - __user void *user_sizes = (__user void *) - (uintptr_t)gwt_dump->in.size_buffer; + __user void *user_addr = (__user void *)(uintptr_t)gwt_dump->in.addr_buffer; + __user void *user_sizes = (__user void *)(uintptr_t)gwt_dump->in.size_buffer; kbase_gpu_vm_lock(kctx); @@ -192,8 +180,7 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, return -EPERM; } - if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer - || !gwt_dump->in.size_buffer) { + if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer || !gwt_dump->in.size_buffer) { kbase_gpu_vm_unlock(kctx); /* We don't have any valid user space buffer to copy the * write modified addresses. @@ -201,11 +188,8 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, return -EINVAL; } - if (list_empty(&kctx->gwt_snapshot_list) && - !list_empty(&kctx->gwt_current_list)) { - - list_replace_init(&kctx->gwt_current_list, - &kctx->gwt_snapshot_list); + if (list_empty(&kctx->gwt_snapshot_list) && !list_empty(&kctx->gwt_current_list)) { + list_replace_init(&kctx->gwt_current_list, &kctx->gwt_snapshot_list); /* We have collected all write faults so far * and they will be passed on to user space. @@ -225,32 +209,26 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, int err; struct kbasep_gwt_list_element *dump_info, *n; - list_for_each_entry_safe(dump_info, n, - &kctx->gwt_snapshot_list, link) { + list_for_each_entry_safe(dump_info, n, &kctx->gwt_snapshot_list, link) { addr_buffer[count] = dump_info->page_addr; num_page_buffer[count] = dump_info->num_pages; count++; list_del(&dump_info->link); kfree(dump_info); - if (ARRAY_SIZE(addr_buffer) == count || - ubuf_size == (ubuf_count + count)) + if (ARRAY_SIZE(addr_buffer) == count || ubuf_size == (ubuf_count + count)) break; } if (count) { - err = copy_to_user((user_addr + - (ubuf_count * sizeof(u64))), - (void *)addr_buffer, - count * sizeof(u64)); + err = copy_to_user((user_addr + (ubuf_count * sizeof(u64))), + (void *)addr_buffer, count * sizeof(u64)); if (err) { dev_err(kctx->kbdev->dev, "Copy to user failure\n"); kbase_gpu_vm_unlock(kctx); return err; } - err = copy_to_user((user_sizes + - (ubuf_count * sizeof(u64))), - (void *)num_page_buffer, - count * sizeof(u64)); + err = copy_to_user((user_sizes + (ubuf_count * sizeof(u64))), + (void *)num_page_buffer, count * sizeof(u64)); if (err) { dev_err(kctx->kbdev->dev, "Copy to user failure\n"); kbase_gpu_vm_unlock(kctx); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.h b/drivers/gpu/arm/bifrost/mali_kbase_gwt.h index 9fdd68d62fb8..e184375d0589 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2017, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -48,7 +48,6 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx); * * Return: 0 on success, error on failure. */ -int kbase_gpu_gwt_dump(struct kbase_context *kctx, - union kbase_ioctl_cinstr_gwt_dump *gwt_dump); +int kbase_gpu_gwt_dump(struct kbase_context *kctx, union kbase_ioctl_cinstr_gwt_dump *gwt_dump); #endif /* _KBASE_GWT_H */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c index b07327a55c0a..dd0873f03125 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c @@ -25,67 +25,68 @@ #include #include -#include "gpu/mali_kbase_gpu_regmap.h" +#include #include "mali_kbase.h" #include "mali_kbase_hw.h" void kbase_hw_set_features_mask(struct kbase_device *kbdev) { const enum base_hw_feature *features; - u32 gpu_id; - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - - switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { - case GPU_ID2_PRODUCT_TMIX: + switch (kbdev->gpu_props.gpu_id.product_model) { + case GPU_ID_PRODUCT_TMIX: features = base_hw_features_tMIx; break; - case GPU_ID2_PRODUCT_THEX: + case GPU_ID_PRODUCT_THEX: features = base_hw_features_tHEx; break; - case GPU_ID2_PRODUCT_TSIX: + case GPU_ID_PRODUCT_TSIX: features = base_hw_features_tSIx; break; - case GPU_ID2_PRODUCT_TDVX: + case GPU_ID_PRODUCT_TDVX: features = base_hw_features_tDVx; break; - case GPU_ID2_PRODUCT_TNOX: + case GPU_ID_PRODUCT_TNOX: features = base_hw_features_tNOx; break; - case GPU_ID2_PRODUCT_TGOX: + case GPU_ID_PRODUCT_TGOX: features = base_hw_features_tGOx; break; - case GPU_ID2_PRODUCT_TTRX: + case GPU_ID_PRODUCT_TTRX: features = base_hw_features_tTRx; break; - case GPU_ID2_PRODUCT_TNAX: + case GPU_ID_PRODUCT_TNAX: features = base_hw_features_tNAx; break; - case GPU_ID2_PRODUCT_LBEX: - case GPU_ID2_PRODUCT_TBEX: + case GPU_ID_PRODUCT_LBEX: + case GPU_ID_PRODUCT_TBEX: features = base_hw_features_tBEx; break; - case GPU_ID2_PRODUCT_TBAX: + case GPU_ID_PRODUCT_TBAX: features = base_hw_features_tBAx; break; - case GPU_ID2_PRODUCT_TODX: - case GPU_ID2_PRODUCT_LODX: + case GPU_ID_PRODUCT_TODX: + case GPU_ID_PRODUCT_LODX: features = base_hw_features_tODx; break; - case GPU_ID2_PRODUCT_TGRX: + case GPU_ID_PRODUCT_TGRX: features = base_hw_features_tGRx; break; - case GPU_ID2_PRODUCT_TVAX: + case GPU_ID_PRODUCT_TVAX: features = base_hw_features_tVAx; break; - case GPU_ID2_PRODUCT_TTUX: - case GPU_ID2_PRODUCT_LTUX: + case GPU_ID_PRODUCT_TTUX: + case GPU_ID_PRODUCT_LTUX: features = base_hw_features_tTUx; break; - case GPU_ID2_PRODUCT_TTIX: - case GPU_ID2_PRODUCT_LTIX: + case GPU_ID_PRODUCT_TTIX: + case GPU_ID_PRODUCT_LTIX: features = base_hw_features_tTIx; break; + case GPU_ID_PRODUCT_TKRX: + case GPU_ID_PRODUCT_LKRX: + features = base_hw_features_tKRx; + break; default: features = base_hw_features_generic; break; @@ -103,8 +104,7 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) * unclear or ambiguous ARCH spec. */ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) - clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION, - &kbdev->hw_features_mask[0]); + clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION, &kbdev->hw_features_mask[0]); #endif } @@ -123,8 +123,7 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) * Note: The GPU configuration must have been read by kbase_gpuprops_get_props() * before calling this function. */ -static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( - struct kbase_device *kbdev) +static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_device *kbdev) { const enum base_hw_issue *issues = NULL; @@ -137,130 +136,135 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( }; static const struct base_hw_product base_hw_products[] = { - { GPU_ID2_PRODUCT_TMIX, - { { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tMIx_r0p0_05dev0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 }, + { GPU_ID_PRODUCT_TMIX, + { { GPU_ID_VERSION_MAKE(0, 0, 1), base_hw_issues_tMIx_r0p0_05dev0 }, + { GPU_ID_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 }, { U32_MAX /* sentinel value */, NULL } } }, - { GPU_ID2_PRODUCT_THEX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1 }, - { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1 }, - { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2 }, - { GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3 }, + { GPU_ID_PRODUCT_THEX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1 }, + { GPU_ID_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1 }, + { GPU_ID_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2 }, + { GPU_ID_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TSIX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1 }, - { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0 }, - { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1 }, + { GPU_ID_PRODUCT_TSIX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1 }, + { GPU_ID_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0 }, + { GPU_ID_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TDVX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0 }, + { GPU_ID_PRODUCT_TDVX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TNOX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0 }, + { GPU_ID_PRODUCT_TNOX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TGOX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0 }, - { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0 }, + { GPU_ID_PRODUCT_TGOX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0 }, + { GPU_ID_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TTRX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1 }, - { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1 }, - { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2 }, + { GPU_ID_PRODUCT_TTRX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1 }, + { GPU_ID_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1 }, + { GPU_ID_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TNAX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1 }, - { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1 }, + { GPU_ID_PRODUCT_TNAX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1 }, + { GPU_ID_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_LBEX, - { { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0 }, - { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1 }, + { GPU_ID_PRODUCT_LBEX, + { { GPU_ID_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0 }, + { GPU_ID_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TBEX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1 }, - { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0 }, + { GPU_ID_PRODUCT_TBEX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1 }, + { GPU_ID_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TBAX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tBAx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 }, + { GPU_ID_PRODUCT_TBAX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 1), base_hw_issues_tBAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TODX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0 }, + { GPU_ID_PRODUCT_TODX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_LODX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, + { GPU_ID_PRODUCT_LODX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TGRX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0 }, + { GPU_ID_PRODUCT_TGRX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TVAX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 }, + { GPU_ID_PRODUCT_TVAX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TTUX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, - { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 }, - { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, - { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, - { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, - { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 }, + { GPU_ID_PRODUCT_TTUX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 }, + { GPU_ID_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, + { GPU_ID_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, + { GPU_ID_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, + { GPU_ID_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_LTUX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, - { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, - { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, - { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, - { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 }, + { GPU_ID_PRODUCT_LTUX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, + { GPU_ID_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, + { GPU_ID_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, + { GPU_ID_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, + { GPU_ID_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_TTIX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 }, + { GPU_ID_PRODUCT_TTIX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 }, { U32_MAX, NULL } } }, - { GPU_ID2_PRODUCT_LTIX, - { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 }, + { GPU_ID_PRODUCT_LTIX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 }, { U32_MAX, NULL } } }, + { GPU_ID_PRODUCT_TKRX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tKRx_r0p0 }, + { U32_MAX, NULL } } }, + { GPU_ID_PRODUCT_LKRX, + { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tKRx_r0p0 }, + { U32_MAX, NULL } } }, }; - u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; + struct kbase_gpu_id_props *gpu_id = &kbdev->gpu_props.gpu_id; const struct base_hw_product *product = NULL; size_t p; /* Stop when we reach the end of the products array. */ for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) { - if (product_model == base_hw_products[p].product_model) { + if (gpu_id->product_model == base_hw_products[p].product_model) { product = &base_hw_products[p]; break; } @@ -268,14 +272,13 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( if (product != NULL) { /* Found a matching product. */ - const u32 version = gpu_id & GPU_ID2_VERSION; + const u32 version = gpu_id->version_id; u32 fallback_version = 0; const enum base_hw_issue *fallback_issues = NULL; size_t v; /* Stop when we reach the end of the map. */ for (v = 0; product->map[v].version != U32_MAX; ++v) { - if (version == product->map[v].version) { /* Exact match so stop. */ issues = product->map[v].issues; @@ -286,11 +289,11 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( * known version not later than the actual version. */ if ((version > product->map[v].version) && - (product->map[v].version >= fallback_version)) { + (product->map[v].version >= fallback_version)) { #if MALI_CUSTOMER_RELEASE /* Match on version's major and minor fields */ - if (((version ^ product->map[v].version) >> - GPU_ID2_VERSION_MINOR_SHIFT) == 0) + if (GPU_ID_VERSION_ID_MAJOR_MINOR_GET(version ^ + product->map[v].version) == 0) #endif { fallback_version = product->map[v].version; @@ -300,32 +303,29 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( } if ((issues == NULL) && (fallback_issues != NULL)) { + u16 fallback_version_major = GPU_ID_VERSION_ID_MAJOR_GET(fallback_version); + u16 fallback_version_minor = GPU_ID_VERSION_ID_MINOR_GET(fallback_version); + u16 fallback_version_status = + GPU_ID_VERSION_ID_STATUS_GET(fallback_version); + /* Fall back to the issue set of the most recent known * version not later than the actual version. */ issues = fallback_issues; dev_notice(kbdev->dev, "r%dp%d status %d not found in HW issues table;\n", - (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT, - (gpu_id & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT, - (gpu_id & GPU_ID2_VERSION_STATUS) >> - GPU_ID2_VERSION_STATUS_SHIFT); + gpu_id->version_major, gpu_id->version_minor, + gpu_id->version_status); dev_notice(kbdev->dev, "falling back to closest match: r%dp%d status %d\n", - (fallback_version & GPU_ID2_VERSION_MAJOR) >> - GPU_ID2_VERSION_MAJOR_SHIFT, - (fallback_version & GPU_ID2_VERSION_MINOR) >> - GPU_ID2_VERSION_MINOR_SHIFT, - (fallback_version & GPU_ID2_VERSION_STATUS) >> - GPU_ID2_VERSION_STATUS_SHIFT); + fallback_version_major, fallback_version_minor, + fallback_version_status); dev_notice(kbdev->dev, "Execution proceeding normally with fallback match\n"); - gpu_id &= ~GPU_ID2_VERSION; - gpu_id |= fallback_version; - kbdev->gpu_props.props.raw_props.gpu_id = gpu_id; - - kbase_gpuprops_update_core_props_gpu_id( - &kbdev->gpu_props.props); + gpu_id->version_major = fallback_version_major; + gpu_id->version_minor = fallback_version_minor; + gpu_id->version_status = fallback_version_status; + gpu_id->version_id = fallback_version; } } return issues; @@ -334,101 +334,85 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( int kbase_hw_set_issues_mask(struct kbase_device *kbdev) { const enum base_hw_issue *issues; - u32 gpu_id; + struct kbase_gpu_id_props *gpu_id; u32 impl_tech; - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; + gpu_id = &kbdev->gpu_props.gpu_id; + impl_tech = kbdev->gpu_props.impl_tech; - if (impl_tech != IMPLEMENTATION_MODEL) { + if (impl_tech != THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_SOFTWARE) { issues = kbase_hw_get_issues_for_new_id(kbdev); if (issues == NULL) { - dev_err(kbdev->dev, - "HW product - Unknown GPU ID %x", gpu_id); + dev_err(kbdev->dev, "HW product - Unknown GPU Product ID %x", + gpu_id->product_id); return -EINVAL; } - -#if !MALI_CUSTOMER_RELEASE - /* The GPU ID might have been replaced with the last - * known version of the same GPU. - */ - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -#endif } else { /* Software model */ - switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { - case GPU_ID2_PRODUCT_TMIX: + switch (gpu_id->product_model) { + case GPU_ID_PRODUCT_TMIX: issues = base_hw_issues_model_tMIx; break; - case GPU_ID2_PRODUCT_THEX: + case GPU_ID_PRODUCT_THEX: issues = base_hw_issues_model_tHEx; break; - case GPU_ID2_PRODUCT_TSIX: + case GPU_ID_PRODUCT_TSIX: issues = base_hw_issues_model_tSIx; break; - case GPU_ID2_PRODUCT_TDVX: + case GPU_ID_PRODUCT_TDVX: issues = base_hw_issues_model_tDVx; break; - case GPU_ID2_PRODUCT_TNOX: + case GPU_ID_PRODUCT_TNOX: issues = base_hw_issues_model_tNOx; break; - case GPU_ID2_PRODUCT_TGOX: + case GPU_ID_PRODUCT_TGOX: issues = base_hw_issues_model_tGOx; break; - case GPU_ID2_PRODUCT_TTRX: + case GPU_ID_PRODUCT_TTRX: issues = base_hw_issues_model_tTRx; break; - case GPU_ID2_PRODUCT_TNAX: + case GPU_ID_PRODUCT_TNAX: issues = base_hw_issues_model_tNAx; break; - case GPU_ID2_PRODUCT_LBEX: - case GPU_ID2_PRODUCT_TBEX: + case GPU_ID_PRODUCT_LBEX: + case GPU_ID_PRODUCT_TBEX: issues = base_hw_issues_model_tBEx; break; - case GPU_ID2_PRODUCT_TBAX: + case GPU_ID_PRODUCT_TBAX: issues = base_hw_issues_model_tBAx; break; - case GPU_ID2_PRODUCT_TODX: - case GPU_ID2_PRODUCT_LODX: + case GPU_ID_PRODUCT_TODX: + case GPU_ID_PRODUCT_LODX: issues = base_hw_issues_model_tODx; break; - case GPU_ID2_PRODUCT_TGRX: + case GPU_ID_PRODUCT_TGRX: issues = base_hw_issues_model_tGRx; break; - case GPU_ID2_PRODUCT_TVAX: + case GPU_ID_PRODUCT_TVAX: issues = base_hw_issues_model_tVAx; break; - case GPU_ID2_PRODUCT_TTUX: - case GPU_ID2_PRODUCT_LTUX: + case GPU_ID_PRODUCT_TTUX: + case GPU_ID_PRODUCT_LTUX: issues = base_hw_issues_model_tTUx; break; - case GPU_ID2_PRODUCT_TTIX: - case GPU_ID2_PRODUCT_LTIX: + case GPU_ID_PRODUCT_TTIX: + case GPU_ID_PRODUCT_LTIX: issues = base_hw_issues_model_tTIx; break; + case GPU_ID_PRODUCT_TKRX: + case GPU_ID_PRODUCT_LKRX: + issues = base_hw_issues_model_tKRx; + break; default: - dev_err(kbdev->dev, - "HW issues - Unknown GPU ID %x", gpu_id); + dev_err(kbdev->dev, "HW issues - Unknown Product ID %x", + gpu_id->product_id); return -EINVAL; } } - dev_info(kbdev->dev, - "GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d", - (gpu_id & GPU_ID2_PRODUCT_MAJOR) >> - GPU_ID2_PRODUCT_MAJOR_SHIFT, - (gpu_id & GPU_ID2_ARCH_MAJOR) >> - GPU_ID2_ARCH_MAJOR_SHIFT, - (gpu_id & GPU_ID2_ARCH_MINOR) >> - GPU_ID2_ARCH_MINOR_SHIFT, - (gpu_id & GPU_ID2_ARCH_REV) >> - GPU_ID2_ARCH_REV_SHIFT, - (gpu_id & GPU_ID2_VERSION_MAJOR) >> - GPU_ID2_VERSION_MAJOR_SHIFT, - (gpu_id & GPU_ID2_VERSION_MINOR) >> - GPU_ID2_VERSION_MINOR_SHIFT, - (gpu_id & GPU_ID2_VERSION_STATUS) >> - GPU_ID2_VERSION_STATUS_SHIFT); + dev_info(kbdev->dev, "GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d", + gpu_id->product_major, gpu_id->arch_major, gpu_id->arch_minor, gpu_id->arch_rev, + gpu_id->version_major, gpu_id->version_minor, gpu_id->version_status); for (; *issues != BASE_HW_ISSUE_END; issues++) set_bit(*issues, &kbdev->hw_issues_mask[0]); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.h b/drivers/gpu/arm/bifrost/mali_kbase_hw.h index ddcddaaa429d..44e1ee4a4a50 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hw.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2017, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,16 +33,21 @@ * @kbdev: Device pointer * @issue: issue to be checked */ -#define kbase_hw_has_issue(kbdev, issue)\ - test_bit(issue, &(kbdev)->hw_issues_mask[0]) +#define kbase_hw_has_issue(kbdev, issue) test_bit(issue, &(kbdev)->hw_issues_mask[0]) /** * kbase_hw_has_feature - Tell whether a feature is supported * @kbdev: Device pointer * @feature: feature to be checked */ -#define kbase_hw_has_feature(kbdev, feature)\ - test_bit(feature, &(kbdev)->hw_features_mask[0]) +#define kbase_hw_has_feature(kbdev, feature) test_bit(feature, &(kbdev)->hw_features_mask[0]) + +/** + * kbase_hw_has_l2_slice_hash_feature - Tell if the feature is supported + * @kbdev: Device pointer + */ +#define kbase_hw_has_l2_slice_hash_feature(kbdev) \ + test_bit(BASE_HW_FEATURE_L2_SLICE_HASH, &(kbdev)->hw_features_mask[0]) /** * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID @@ -68,4 +73,4 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev); */ void kbase_hw_set_features_mask(struct kbase_device *kbdev); -#endif /* _KBASE_HW_H_ */ +#endif /* _KBASE_HW_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h index f537b7f0ac90..3bda54d23c83 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,18 +26,19 @@ #ifndef _KBASE_HWACCESS_GPUPROPS_H_ #define _KBASE_HWACCESS_GPUPROPS_H_ +#include + /** * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from * GPU - * @kbdev: Device pointer - * @regdump: Pointer to struct kbase_gpuprops_regdump structure + * @kbdev: Device pointer + * @regdump: Pointer to a zero initialised kbasep_gpuprops_regdump structure * * The caller should ensure that GPU remains powered-on during this function. * * Return: Zero for succeess or a Linux error code */ -int kbase_backend_gpuprops_get(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump); +int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbasep_gpuprops_regdump *regdump); /** * kbase_backend_gpuprops_get_curr_config() - Fill @curr_config_regdump with @@ -53,22 +54,8 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, * * Return: Zero for succeess or a Linux error code */ -int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, - struct kbase_current_config_regdump *curr_config_regdump); - -/** - * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read - * from GPU - * @kbdev: Device pointer - * @regdump: Pointer to struct kbase_gpuprops_regdump structure - * - * This function reads GPU properties that are dependent on the hardware - * features bitmask. It will power-on the GPU if required. - * - * Return: Zero for succeess or a Linux error code - */ -int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump); +int kbase_backend_gpuprops_get_curr_config( + struct kbase_device *kbdev, struct kbase_current_config_regdump *curr_config_regdump); /** * kbase_backend_gpuprops_get_l2_features - Fill @regdump with L2_FEATURES read @@ -82,7 +69,6 @@ int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, * Return: Zero on success, Linux error code on failure */ int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump); - + struct kbasep_gpuprops_regdump *regdump); #endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h index 37663101a346..198a79e28fb2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2017-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,9 +58,8 @@ struct kbase_instr_hwcnt_enable { * * Return: 0 on success */ -int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_instr_hwcnt_enable *enable); +int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_instr_hwcnt_enable *enable); /** * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection @@ -104,8 +103,7 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx); * * Return: true if the dump is complete */ -bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, - bool * const success); +bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, bool *const success); /** * kbase_instr_hwcnt_clear() - Clear HW counters diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h index ca77c192deea..93003754820d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,8 +33,7 @@ * * Caller must hold the HW access lock */ -void kbase_backend_run_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); +void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom); /** * kbase_backend_slot_update - Update state based on slot ringbuffers @@ -62,8 +61,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev); * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none * available */ -int kbase_backend_find_and_release_free_address_space( - struct kbase_device *kbdev, struct kbase_context *kctx); +int kbase_backend_find_and_release_free_address_space(struct kbase_device *kbdev, + struct kbase_context *kctx); /** * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the @@ -76,9 +75,7 @@ int kbase_backend_find_and_release_free_address_space( * * Return: true if successful, false if ASID not assigned. */ -bool kbase_backend_use_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int as_nr); +bool kbase_backend_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr); /** * kbase_backend_use_ctx_sched() - Activate a context. @@ -108,8 +105,7 @@ bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_contex * * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock */ -void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, - struct kbase_context *kctx); +void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, struct kbase_context *kctx); /** * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will @@ -122,8 +118,7 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, * This function must perform any operations that could not be performed in IRQ * context by kbase_backend_release_ctx_irq(). */ -void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, - struct kbase_context *kctx); +void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, struct kbase_context *kctx); /** * kbase_backend_cache_clean - Perform a cache clean if the given atom requires @@ -134,9 +129,7 @@ void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, * On some GPUs, the GPU cache must be cleaned following a failed atom. This * function performs a clean if it is required by @katom. */ -void kbase_backend_cache_clean(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - +void kbase_backend_cache_clean(struct kbase_device *kbdev, struct kbase_jd_atom *katom); /** * kbase_backend_complete_wq() - Perform backend-specific actions required on @@ -149,8 +142,7 @@ void kbase_backend_cache_clean(struct kbase_device *kbdev, * * Return: true if atom has completed, false if atom should be re-submitted */ -void kbase_backend_complete_wq(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); +void kbase_backend_complete_wq(struct kbase_device *kbdev, struct kbase_jd_atom *katom); #if !MALI_USE_CSF /** @@ -163,8 +155,7 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, * This function should only be called from kbase_jd_done_worker() or * js_return_worker(). */ -void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, - base_jd_core_req core_req); +void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, base_jd_core_req core_req); #endif /* !MALI_USE_CSF */ /** @@ -242,8 +233,7 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js); * Work out whether to leave disjoint state when finishing an atom that was * originated by kbase_job_check_enter_disjoint(). */ -void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, - struct kbase_jd_atom *target_katom); +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, struct kbase_jd_atom *target_katom); /** * kbase_backend_jm_kill_running_jobs_from_kctx - Kill all jobs that are diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h index effb2ffeb349..fc43a3fcd69f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,7 @@ #ifndef _KBASE_HWACCESS_PM_H_ #define _KBASE_HWACCESS_PM_H_ -#include +#include #include #include @@ -41,7 +41,8 @@ struct kbase_device; * * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * Must be called before any other power management function + * This function must be called only when a kbase device is initialized and + * must be called before any other power management function. * * Return: 0 if the power management framework was successfully initialized. */ @@ -66,8 +67,7 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev); * * Return: 0 if powerup was successful. */ -int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, - unsigned int flags); +int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, unsigned int flags); /** * kbase_hwaccess_pm_halt - Halt the power management framework. @@ -123,8 +123,7 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); * * This determines which cores the power manager is allowed to use. */ -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, - u64 new_core_mask); +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask); #else /** * kbase_pm_set_debug_core_mask - Set the debug core mask. @@ -136,9 +135,8 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, * * This determines which cores the power manager is allowed to use. */ -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, - u64 new_core_mask_js0, u64 new_core_mask_js1, - u64 new_core_mask_js2); +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, + u64 new_core_mask_js1, u64 new_core_mask_js2); #endif /* MALI_USE_CSF */ /** @@ -150,8 +148,7 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, * * Return: The current policy */ -const struct kbase_pm_ca_policy -*kbase_pm_ca_get_policy(struct kbase_device *kbdev); +const struct kbase_pm_ca_policy *kbase_pm_ca_get_policy(struct kbase_device *kbdev); /** * kbase_pm_ca_set_policy - Change the policy to the one specified. @@ -160,8 +157,7 @@ const struct kbase_pm_ca_policy * @policy: The policy to change to (valid pointer returned from * @ref kbase_pm_ca_list_policies) */ -void kbase_pm_ca_set_policy(struct kbase_device *kbdev, - const struct kbase_pm_ca_policy *policy); +void kbase_pm_ca_set_policy(struct kbase_device *kbdev, const struct kbase_pm_ca_policy *policy); /** * kbase_pm_ca_list_policies - Retrieve a static list of the available policies. @@ -171,8 +167,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, * * Return: The number of policies */ -int -kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); +int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy *const **policies); /** * kbase_pm_get_policy - Get the current policy. @@ -193,8 +188,7 @@ const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); * @policy: The policy to change to (valid pointer returned from * @ref kbase_pm_list_policies) */ -void kbase_pm_set_policy(struct kbase_device *kbdev, - const struct kbase_pm_policy *policy); +void kbase_pm_set_policy(struct kbase_device *kbdev, const struct kbase_pm_policy *policy); /** * kbase_pm_list_policies - Retrieve a static list of the available policies. @@ -205,8 +199,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, * * Return: The number of policies */ -int kbase_pm_list_policies(struct kbase_device *kbdev, - const struct kbase_pm_policy * const **list); +int kbase_pm_list_policies(struct kbase_device *kbdev, const struct kbase_pm_policy *const **list); /** * kbase_pm_protected_mode_enable() - Enable protected mode diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h index ac2a26d28d89..8e5a8137be45 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h @@ -22,13 +22,16 @@ #ifndef _KBASE_BACKEND_TIME_H_ #define _KBASE_BACKEND_TIME_H_ -#if MALI_USE_CSF /** * struct kbase_backend_time - System timestamp attributes. * * @multiplier: Numerator of the converter's fraction. * @divisor: Denominator of the converter's fraction. * @offset: Converter's offset term. + * @device_scaled_timeouts: Timeouts in milliseconds that were scaled to be + * consistent with the minimum MCU frequency. This + * array caches the results of all of the conversions + * for ease of use later on. * * According to Generic timer spec, system timer: * - Increments at a fixed frequency @@ -49,11 +52,15 @@ * */ struct kbase_backend_time { +#if MALI_USE_CSF u64 multiplier; u64 divisor; s64 offset; +#endif + unsigned int device_scaled_timeouts[KBASE_TIMEOUT_SELECTOR_COUNT]; }; +#if MALI_USE_CSF /** * kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp. * @@ -73,8 +80,8 @@ u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kb * @ts: Pointer to struct timespec to store current monotonic * time in */ -void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec64 *ts); +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, u64 *system_time, + struct timespec64 *ts); /** * kbase_backend_get_gpu_time_norequest() - Get current GPU time without @@ -85,10 +92,42 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, * @ts: Pointer to struct timespec to store current monotonic * time in */ -void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, - u64 *cycle_counter, - u64 *system_time, - struct timespec64 *ts); +void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec64 *ts); + +/** + * kbase_device_set_timeout_ms - Set an unscaled device timeout in milliseconds, + * subject to the maximum timeout constraint. + * + * @kbdev: KBase device pointer. + * @selector: The specific timeout that should be scaled. + * @timeout_ms: The timeout in cycles which should be scaled. + * + * This function writes the absolute timeout in milliseconds to the table of + * precomputed device timeouts, while estabilishing an upped bound on the individual + * timeout of UINT_MAX milliseconds. + */ +void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + unsigned int timeout_ms); + +/** + * kbase_device_set_timeout - Calculate the given timeout using the provided + * timeout cycles and multiplier. + * + * @kbdev: KBase device pointer. + * @selector: The specific timeout that should be scaled. + * @timeout_cycles: The timeout in cycles which should be scaled. + * @cycle_multiplier: A multiplier applied to the number of cycles, allowing + * the callsite to scale the minimum timeout based on the + * host device. + * + * This function writes the scaled timeout to the per-device table to avoid + * having to recompute the timeouts every single time that the related methods + * are called. + */ +void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + u64 timeout_cycles, u32 cycle_multiplier); + /** * kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled * GPU frequency, using a choice from @@ -99,8 +138,7 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, * * Return: Timeout in milliseconds, as an unsigned integer. */ -unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, - enum kbase_timeout_selector selector); +unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector); /** * kbase_backend_get_cycle_cnt - Reads the GPU cycle counter diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c index f44426a736ca..f66529485975 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jd.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,9 +47,9 @@ /* Return whether katom will run on the GPU or not. Currently only soft jobs and * dependency-only atoms do not run on the GPU */ -#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ - ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ - BASE_JD_REQ_DEP))) +#define IS_GPU_ATOM(katom) \ + (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ + ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP))) /* * This is the kernel side of the API. Only entry points are: @@ -61,8 +61,7 @@ * - to the event subsystem (signals the completion/failure of bag/job-chains). */ -static void __user * -get_compat_pointer(struct kbase_context *kctx, const u64 p) +static void __user *get_compat_pointer(struct kbase_context *kctx, const u64 p) { #if IS_ENABLED(CONFIG_COMPAT) if (kbase_ctx_flag(kctx, KCTX_COMPAT)) @@ -76,8 +75,7 @@ static void jd_mark_atom_complete(struct kbase_jd_atom *katom) { katom->status = KBASE_JD_ATOM_STATE_COMPLETED; kbase_kinstr_jm_atom_complete(katom); - dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n", - (void *)katom); + dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n", (void *)katom); KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE(katom->kctx->kbdev, katom); } @@ -92,15 +90,13 @@ static bool jd_run_atom(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; - dev_dbg(kctx->kbdev->dev, "JD run atom %pK in kctx %pK\n", - (void *)katom, (void *)kctx); + dev_dbg(kctx->kbdev->dev, "JD run atom %pK in kctx %pK\n", (void *)katom, (void *)kctx); KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { /* Dependency only atom */ - trace_sysgraph(SGR_SUBMIT, kctx->id, - kbase_jd_atom_id(katom->kctx, katom)); + trace_sysgraph(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(katom->kctx, katom)); jd_mark_atom_complete(katom); return false; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { @@ -136,19 +132,16 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) * the dependencies, hence we may attempt to submit it before they are * met. Other atoms must have had both dependencies resolved. */ - if (IS_GPU_ATOM(katom) || - (!kbase_jd_katom_dep_atom(&katom->dep[0]) && - !kbase_jd_katom_dep_atom(&katom->dep[1]))) { + if (IS_GPU_ATOM(katom) || (!kbase_jd_katom_dep_atom(&katom->dep[0]) && + !kbase_jd_katom_dep_atom(&katom->dep[1]))) { /* katom dep complete, attempt to run it */ bool resched = false; - KBASE_TLSTREAM_TL_RUN_ATOM_START( - katom->kctx->kbdev, katom, - kbase_jd_atom_id(katom->kctx, katom)); + KBASE_TLSTREAM_TL_RUN_ATOM_START(katom->kctx->kbdev, katom, + kbase_jd_atom_id(katom->kctx, katom)); resched = jd_run_atom(katom); KBASE_TLSTREAM_TL_RUN_ATOM_END(katom->kctx->kbdev, katom, - kbase_jd_atom_id(katom->kctx, - katom)); + kbase_jd_atom_id(katom->kctx, katom)); if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { /* The atom has already finished */ @@ -162,6 +155,7 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) { + CSTD_UNUSED(katom); } static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) @@ -190,7 +184,8 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) * jctx.lock must be held when this is called. */ -static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) +static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, + const struct base_jd_atom *user_atom) { int err = -EINVAL; u32 res_no; @@ -213,9 +208,8 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st goto failed_input_alloc; } - if (copy_from_user(input_extres, - get_compat_pointer(katom->kctx, user_atom->extres_list), - sizeof(*input_extres) * katom->nr_extres) != 0) { + if (copy_from_user(input_extres, get_compat_pointer(katom->kctx, user_atom->extres_list), + sizeof(*input_extres) * katom->nr_extres) != 0) { err = -EINVAL; goto failed_input_copy; } @@ -238,7 +232,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st } if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && - (reg->flags & KBASE_REG_PROTECTED)) { + (reg->flags & KBASE_REG_PROTECTED)) { katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; } @@ -284,9 +278,8 @@ failed_input_alloc: return err; } -static inline void jd_resolve_dep(struct list_head *out_list, - struct kbase_jd_atom *katom, - u8 d, bool ctx_is_dying) +static inline void jd_resolve_dep(struct list_head *out_list, struct kbase_jd_atom *katom, u8 d, + bool ctx_is_dying) { u8 other_d = !d; @@ -295,28 +288,26 @@ static inline void jd_resolve_dep(struct list_head *out_list, struct kbase_jd_atom *other_dep_atom; u8 dep_type; - dep_atom = list_entry(katom->dep_head[d].next, - struct kbase_jd_atom, dep_item[d]); + dep_atom = list_entry(katom->dep_head[d].next, struct kbase_jd_atom, dep_item[d]); list_del(katom->dep_head[d].next); dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); kbase_jd_katom_dep_clear(&dep_atom->dep[d]); if (katom->event_code != BASE_JD_EVENT_DONE && - (dep_type != BASE_JD_DEP_TYPE_ORDER)) { + (dep_type != BASE_JD_DEP_TYPE_ORDER)) { dep_atom->event_code = katom->event_code; - KBASE_DEBUG_ASSERT(dep_atom->status != - KBASE_JD_ATOM_STATE_UNUSED); + KBASE_DEBUG_ASSERT(dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED); dep_atom->will_fail_event_code = dep_atom->event_code; } - other_dep_atom = (struct kbase_jd_atom *) - kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); + other_dep_atom = + (struct kbase_jd_atom *)kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); - if (!dep_atom->in_jd_list && (!other_dep_atom || - (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && - !dep_atom->will_fail_event_code && - !other_dep_atom->will_fail_event_code))) { + if (!dep_atom->in_jd_list && + (!other_dep_atom || + (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && !dep_atom->will_fail_event_code && + !other_dep_atom->will_fail_event_code))) { dep_atom->in_jd_list = true; list_add_tail(&dep_atom->jd_item, out_list); } @@ -352,15 +343,13 @@ static bool is_dep_valid(struct kbase_jd_atom *katom) * not valid */ if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && - (katom->event_code != BASE_JD_EVENT_DONE || - katom->will_fail_event_code)) + (katom->event_code != BASE_JD_EVENT_DONE || katom->will_fail_event_code)) return false; return true; } -static void jd_try_submitting_deps(struct list_head *out_list, - struct kbase_jd_atom *node) +static void jd_try_submitting_deps(struct list_head *out_list, struct kbase_jd_atom *node) { int i; @@ -368,15 +357,13 @@ static void jd_try_submitting_deps(struct list_head *out_list, struct list_head *pos; list_for_each(pos, &node->dep_head[i]) { - struct kbase_jd_atom *dep_atom = list_entry(pos, - struct kbase_jd_atom, dep_item[i]); + struct kbase_jd_atom *dep_atom = + list_entry(pos, struct kbase_jd_atom, dep_item[i]); if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { /*Check if atom deps look sane*/ - bool dep0_valid = is_dep_valid( - dep_atom->dep[0].atom); - bool dep1_valid = is_dep_valid( - dep_atom->dep[1].atom); + bool dep0_valid = is_dep_valid(dep_atom->dep[0].atom); + bool dep1_valid = is_dep_valid(dep_atom->dep[1].atom); if (dep0_valid && dep1_valid) { dep_atom->in_jd_list = true; @@ -412,9 +399,7 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) /* If this atom wrote to JIT memory, find out how much it has written * and update the usage information in the region. */ - for (idx = 0; - idx < ARRAY_SIZE(katom->jit_ids) && katom->jit_ids[idx]; - idx++) { + for (idx = 0; idx < ARRAY_SIZE(katom->jit_ids) && katom->jit_ids[idx]; idx++) { enum heap_pointer { LOW = 0, HIGH, COUNT }; size_t size_to_read; u64 read_val; @@ -422,16 +407,15 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) reg = kctx->jit_alloc[katom->jit_ids[idx]]; if (!reg) { - dev_warn(kctx->kbdev->dev, - "%s: JIT id[%u]=%u has no region\n", - __func__, idx, katom->jit_ids[idx]); + dev_warn(kctx->kbdev->dev, "%s: JIT id[%u]=%u has no region\n", __func__, + idx, katom->jit_ids[idx]); continue; } if (reg == KBASE_RESERVED_REG_JIT_ALLOC) { dev_warn(kctx->kbdev->dev, - "%s: JIT id[%u]=%u has failed to allocate a region\n", - __func__, idx, katom->jit_ids[idx]); + "%s: JIT id[%u]=%u has failed to allocate a region\n", __func__, + idx, katom->jit_ids[idx]); continue; } @@ -444,15 +428,14 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) size_to_read = sizeof(u64[COUNT]); - ptr = kbase_vmap_prot(kctx, reg->heap_info_gpu_addr, size_to_read, - KBASE_REG_CPU_RD, &mapping); + ptr = kbase_vmap_prot(kctx, reg->heap_info_gpu_addr, size_to_read, KBASE_REG_CPU_RD, + &mapping); if (!ptr) { dev_warn(kctx->kbdev->dev, - "%s: JIT id[%u]=%u start=0x%llx unable to map end marker %llx\n", - __func__, idx, katom->jit_ids[idx], - reg->start_pfn << PAGE_SHIFT, - reg->heap_info_gpu_addr); + "%s: JIT id[%u]=%u start=0x%llx unable to map end marker %llx\n", + __func__, idx, katom->jit_ids[idx], reg->start_pfn << PAGE_SHIFT, + reg->heap_info_gpu_addr); continue; } @@ -463,8 +446,7 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) u64 addr_end; if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { - const unsigned long extension_bytes = - reg->extension << PAGE_SHIFT; + const unsigned long extension_bytes = reg->extension << PAGE_SHIFT; const u64 low_ptr = ptr[LOW]; const u64 high_ptr = ptr[HIGH]; @@ -504,15 +486,13 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) * region */ if (used_pages > reg->nr_pages) { - dev_warn(kctx->kbdev->dev, + dev_warn( + kctx->kbdev->dev, "%s: JIT id[%u]=%u start=0x%llx used_pages %llx > %zx (read 0x%llx as %s%s)\n", - __func__, idx, katom->jit_ids[idx], - reg->start_pfn << PAGE_SHIFT, + __func__, idx, katom->jit_ids[idx], reg->start_pfn << PAGE_SHIFT, used_pages, reg->nr_pages, read_val, - (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ? - "size" : "addr", - (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ? - " with align" : ""); + (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ? "size" : "addr", + (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ? " with align" : ""); used_pages = reg->nr_pages; } /* Note: one real use case has an atom correctly reporting 0 @@ -571,8 +551,7 @@ bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately) KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); for (i = 0; i < 2; i++) - jd_resolve_dep(&runnable_jobs, katom, i, - kbase_ctx_flag(kctx, KCTX_DYING)); + jd_resolve_dep(&runnable_jobs, katom, i, kbase_ctx_flag(kctx, KCTX_DYING)); if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_post_external_resources(katom); @@ -580,32 +559,28 @@ bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately) while (!list_empty(&runnable_jobs)) { struct kbase_jd_atom *node; - node = list_entry(runnable_jobs.next, - struct kbase_jd_atom, jd_item); + node = list_entry(runnable_jobs.next, struct kbase_jd_atom, jd_item); list_del(runnable_jobs.next); node->in_jd_list = false; - dev_dbg(kctx->kbdev->dev, "List node %pK has status %d\n", - node, node->status); + dev_dbg(kctx->kbdev->dev, "List node %pK has status %d\n", node, + node->status); KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); if (node->status == KBASE_JD_ATOM_STATE_IN_JS) continue; if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && - !kbase_ctx_flag(kctx, KCTX_DYING)) { - KBASE_TLSTREAM_TL_RUN_ATOM_START( - kctx->kbdev, node, - kbase_jd_atom_id(kctx, node)); + !kbase_ctx_flag(kctx, KCTX_DYING)) { + KBASE_TLSTREAM_TL_RUN_ATOM_START(kctx->kbdev, node, + kbase_jd_atom_id(kctx, node)); need_to_try_schedule_context |= jd_run_atom(node); - KBASE_TLSTREAM_TL_RUN_ATOM_END( - kctx->kbdev, node, - kbase_jd_atom_id(kctx, node)); + KBASE_TLSTREAM_TL_RUN_ATOM_END(kctx->kbdev, node, + kbase_jd_atom_id(kctx, node)); } else { node->event_code = katom->event_code; - if (node->core_req & - BASE_JD_REQ_SOFT_JOB) { + if (node->core_req & BASE_JD_REQ_SOFT_JOB) { WARN_ON(!list_empty(&node->queue)); kbase_finish_soft_job(node); } @@ -615,7 +590,7 @@ bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately) if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { list_add_tail(&node->jd_item, &completed_jobs); } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && - !node->will_fail_event_code) { + !node->will_fail_event_code) { /* Node successfully submitted, try submitting * dependencies as they may now be representable * in JS @@ -664,7 +639,7 @@ enum { CORE_REQ_VERTEX_TILER, CORE_REQ_UNKNOWN }; -static const char * const core_req_strings[] = { +static const char *const core_req_strings[] = { "Dependency Only Job", "Soft Job", "Compute Shader Job", @@ -707,8 +682,7 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) /* Trace an atom submission. */ static void jd_trace_atom_submit(struct kbase_context *const kctx, - struct kbase_jd_atom *const katom, - int *priority) + struct kbase_jd_atom *const katom, int *priority) { struct kbase_device *const kbdev = kctx->kbdev; @@ -721,9 +695,9 @@ static void jd_trace_atom_submit(struct kbase_context *const kctx, } static bool jd_submit_atom(struct kbase_context *const kctx, - const struct base_jd_atom *const user_atom, - const struct base_jd_fragment *const user_jc_incr, - struct kbase_jd_atom *const katom) + const struct base_jd_atom *const user_atom, + const struct base_jd_fragment *const user_jc_incr, + struct kbase_jd_atom *const katom) { struct kbase_device *kbdev = kctx->kbdev; struct kbase_jd_context *jctx = &kctx->jctx; @@ -801,11 +775,10 @@ static bool jd_submit_atom(struct kbase_context *const kctx, if (dep_atom_number) { if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER && - dep_atom_type != BASE_JD_DEP_TYPE_DATA) { + dep_atom_type != BASE_JD_DEP_TYPE_DATA) { katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - dev_dbg(kbdev->dev, - "Atom %pK status to completed\n", + dev_dbg(kbdev->dev, "Atom %pK status to completed\n", (void *)katom); /* Wrong dependency setup. Atom will be sent @@ -831,8 +804,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, continue; if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || - dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { - + dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { if (dep_atom->event_code == BASE_JD_EVENT_DONE) continue; /* don't stop this atom if it has an order dependency @@ -840,15 +812,14 @@ static bool jd_submit_atom(struct kbase_context *const kctx, * the normal path */ if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && - dep_atom->event_code > BASE_JD_EVENT_ACTIVE) { + dep_atom->event_code > BASE_JD_EVENT_ACTIVE) { continue; } /* Atom has completed, propagate the error code if any */ katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kbdev->dev, "Atom %pK status to queued\n", - (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom); /* This atom will be sent back to user space. * Do not record any dependencies. @@ -897,18 +868,16 @@ static bool jd_submit_atom(struct kbase_context *const kctx, sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; /* Cap the priority to jctx.max_priority */ - katom->sched_priority = (sched_prio < kctx->jctx.max_priority) ? - kctx->jctx.max_priority : sched_prio; + katom->sched_priority = (sched_prio < kctx->jctx.max_priority) ? kctx->jctx.max_priority : + sched_prio; /* Create a new atom. */ jd_trace_atom_submit(kctx, katom, &katom->sched_priority); #if !MALI_INCREMENTAL_RENDERING_JM /* Reject atoms for incremental rendering if not supported */ - if (katom->core_req & - (BASE_JD_REQ_START_RENDERPASS|BASE_JD_REQ_END_RENDERPASS)) { - dev_err(kctx->kbdev->dev, - "Rejecting atom with unsupported core_req 0x%x\n", + if (katom->core_req & (BASE_JD_REQ_START_RENDERPASS | BASE_JD_REQ_END_RENDERPASS)) { + dev_err(kctx->kbdev->dev, "Rejecting atom with unsupported core_req 0x%x\n", katom->core_req); katom->event_code = BASE_JD_EVENT_JOB_INVALID; return kbase_jd_done_nolock(katom, true); @@ -918,8 +887,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) { WARN_ON(katom->jc != 0); katom->jc_fragment = *user_jc_incr; - } else if (!katom->jc && - (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + } else if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { /* Reject atoms with job chain = NULL, as these cause issues * with soft-stop */ @@ -931,18 +899,16 @@ static bool jd_submit_atom(struct kbase_context *const kctx, /* Reject atoms with an invalid device_nr */ if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { - dev_err(kctx->kbdev->dev, - "Rejecting atom with invalid device_nr %d\n", - katom->device_nr); + dev_err(kctx->kbdev->dev, "Rejecting atom with invalid device_nr %d\n", + katom->device_nr); katom->event_code = BASE_JD_EVENT_JOB_INVALID; return kbase_jd_done_nolock(katom, true); } /* Reject atoms with invalid core requirements */ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && - (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { - dev_err(kctx->kbdev->dev, - "Rejecting atom with invalid core requirements\n"); + (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { + dev_err(kctx->kbdev->dev, "Rejecting atom with invalid core requirements\n"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; return kbase_jd_done_nolock(katom, true); @@ -950,11 +916,10 @@ static bool jd_submit_atom(struct kbase_context *const kctx, /* Reject soft-job atom of certain types from accessing external resources */ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && - (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) || - ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) || - ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) { - dev_err(kctx->kbdev->dev, - "Rejecting soft-job atom accessing external resources\n"); + (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) || + ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) || + ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) { + dev_err(kctx->kbdev->dev, "Rejecting soft-job atom accessing external resources\n"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; return kbase_jd_done_nolock(katom, true); } @@ -970,7 +935,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, #if !MALI_JIT_PRESSURE_LIMIT_BASE if (mali_kbase_supports_jit_pressure_limit(kctx->api_version) && - (user_atom->jit_id[0] || user_atom->jit_id[1])) { + (user_atom->jit_id[0] || user_atom->jit_id[1])) { /* JIT pressure limit is disabled, but we are receiving non-0 * JIT IDs - atom is invalid. */ @@ -1002,7 +967,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) katom->work_id = atomic_inc_return(&jctx->work_id); trace_gpu_job_enqueue(kctx->id, katom->work_id, - kbasep_map_core_reqs_to_string(katom->core_req)); + kbasep_map_core_reqs_to_string(katom->core_req)); #endif if (queued && !IS_GPU_ATOM(katom)) @@ -1020,8 +985,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, bool need_to_try_schedule_context; katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", - (void *)katom); + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); /* If job was cancelled then resolve immediately */ @@ -1033,9 +997,8 @@ static bool jd_submit_atom(struct kbase_context *const kctx, status = katom->status; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (status == KBASE_JD_ATOM_STATE_HW_COMPLETED) { - dev_dbg(kctx->kbdev->dev, - "Atom %d cancelled on HW\n", - kbase_jd_atom_id(katom->kctx, katom)); + dev_dbg(kctx->kbdev->dev, "Atom %d cancelled on HW\n", + kbase_jd_atom_id(katom->kctx, katom)); return need_to_try_schedule_context; } } @@ -1044,19 +1007,20 @@ static bool jd_submit_atom(struct kbase_context *const kctx, return kbase_jd_done_nolock(katom, true); } -int kbase_jd_submit(struct kbase_context *kctx, - void __user *user_addr, u32 nr_atoms, u32 stride, - bool uk6_atom) +int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_atoms, u32 stride, + bool uk6_atom) { struct kbase_jd_context *jctx = &kctx->jctx; int err = 0; - int i; + u32 i; bool need_to_try_schedule_context = false; struct kbase_device *kbdev; u32 latest_flush; bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) || - stride == offsetof(struct base_jd_atom_v2, renderpass_id)); + stride == offsetof(struct base_jd_atom_v2, renderpass_id)); + + CSTD_UNUSED(uk6_atom); /* * kbase_jd_submit isn't expected to fail and so all errors with the @@ -1065,17 +1029,17 @@ int kbase_jd_submit(struct kbase_context *kctx, kbdev = kctx->kbdev; if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it\n"); + dev_err(kbdev->dev, + "Attempt to submit to a context that has SUBMIT_DISABLED set on it\n"); return -EINVAL; } if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && - stride != sizeof(struct base_jd_atom_v2) && - stride != offsetof(struct base_jd_atom, renderpass_id) && - stride != sizeof(struct base_jd_atom)) { + stride != sizeof(struct base_jd_atom_v2) && + stride != offsetof(struct base_jd_atom, renderpass_id) && + stride != sizeof(struct base_jd_atom)) { dev_err(kbdev->dev, - "Stride %u passed to job_submit isn't supported by the kernel\n", - stride); + "Stride %u passed to job_submit isn't supported by the kernel\n", stride); return -EINVAL; } @@ -1096,7 +1060,8 @@ int kbase_jd_submit(struct kbase_context *kctx, struct kbase_jd_atom *katom; if (unlikely(jd_atom_is_v2)) { - if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) { + if (copy_from_user(&user_atom.jc, user_addr, + sizeof(struct base_jd_atom_v2)) != 0) { dev_dbg(kbdev->dev, "Invalid atom address %pK passed to job_submit\n", user_addr); @@ -1125,13 +1090,11 @@ int kbase_jd_submit(struct kbase_context *kctx, */ size_t j; - dev_dbg(kbdev->dev, "Renderpass ID is %d\n", - user_atom.renderpass_id); + dev_dbg(kbdev->dev, "Renderpass ID is %d\n", user_atom.renderpass_id); for (j = 0; j < sizeof(user_atom.padding); j++) { if (user_atom.padding[j]) { - dev_err(kbdev->dev, - "Bad padding byte %zu: %d\n", - j, user_atom.padding[j]); + dev_err(kbdev->dev, "Bad padding byte %zu: %d\n", j, + user_atom.padding[j]); err = -EINVAL; break; } @@ -1144,9 +1107,8 @@ int kbase_jd_submit(struct kbase_context *kctx, * instead of a GPU address of a job chain. */ if (user_atom.core_req & BASE_JD_REQ_END_RENDERPASS) { - if (copy_from_user(&user_jc_incr, - u64_to_user_ptr(user_atom.jc), - sizeof(user_jc_incr))) { + if (copy_from_user(&user_jc_incr, u64_to_user_ptr(user_atom.jc), + sizeof(user_jc_incr))) { dev_err(kbdev->dev, "Invalid jc address 0x%llx passed to job_submit\n", user_atom.jc); @@ -1157,20 +1119,24 @@ int kbase_jd_submit(struct kbase_context *kctx, user_atom.jc = 0; } - user_addr = (void __user *)((uintptr_t) user_addr + stride); + user_addr = (void __user *)((uintptr_t)user_addr + stride); mutex_lock(&jctx->lock); #ifndef compiletime_assert #define compiletime_assert_defined -#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \ -while (false) +#define compiletime_assert(x, msg) \ + do { \ + switch (0) { \ + case 0: \ + case (x):; \ + } \ + } while (false) #endif - compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) == - BASE_JD_ATOM_COUNT, - "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); + compiletime_assert((1 << (8 * sizeof(user_atom.atom_number))) == BASE_JD_ATOM_COUNT, + "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) == - sizeof(user_atom.atom_number), - "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); + sizeof(user_atom.atom_number), + "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); #ifdef compiletime_assert_defined #undef compiletime_assert #undef compiletime_assert_defined @@ -1196,8 +1162,7 @@ while (false) kbase_js_sched_all(kbdev); if (wait_event_killable(katom->completed, - katom->status == - KBASE_JD_ATOM_STATE_UNUSED) != 0) { + katom->status == KBASE_JD_ATOM_STATE_UNUSED) != 0) { /* We're being killed so the result code * doesn't really matter */ @@ -1206,8 +1171,8 @@ while (false) mutex_lock(&jctx->lock); } KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START(kbdev, katom); - need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom, - &user_jc_incr, katom); + need_to_try_schedule_context |= + jd_submit_atom(kctx, &user_atom, &user_jc_incr, katom); KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END(kbdev, katom); /* Register a completed job as a disjoint event when the GPU is in a disjoint state * (ie. being reset). @@ -1216,8 +1181,8 @@ while (false) mutex_unlock(&jctx->lock); if (fatal_signal_pending(current)) { - dev_dbg(kbdev->dev, "Fatal signal pending for kctx %d_%d", - kctx->tgid, kctx->id); + dev_dbg(kbdev->dev, "Fatal signal pending for kctx %d_%d", kctx->tgid, + kctx->id); /* We're being killed so the result code doesn't really matter */ return 0; } @@ -1253,8 +1218,8 @@ void kbase_jd_done_worker(struct work_struct *data) js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; - dev_dbg(kbdev->dev, "Enter atom %pK done worker for kctx %pK\n", - (void *)katom, (void *)kctx); + dev_dbg(kbdev->dev, "Enter atom %pK done worker for kctx %pK\n", (void *)katom, + (void *)kctx); KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); @@ -1277,16 +1242,14 @@ void kbase_jd_done_worker(struct work_struct *data) if (katom->event_code == BASE_JD_EVENT_STOPPED) { unsigned long flags; - dev_dbg(kbdev->dev, "Atom %pK has been promoted to stopped\n", - (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK has been promoted to stopped\n", (void *)katom); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", - (void *)katom); + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); kbase_js_unpull(kctx, katom); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -1295,14 +1258,10 @@ void kbase_jd_done_worker(struct work_struct *data) return; } - if ((katom->event_code != BASE_JD_EVENT_DONE) && - (!kbase_ctx_flag(katom->kctx, KCTX_DYING))) { + if ((katom->event_code != BASE_JD_EVENT_DONE) && (!kbase_ctx_flag(katom->kctx, KCTX_DYING))) if (!kbase_is_quick_reset_enabled(kbdev)) - dev_err(kbdev->dev, - "t6xx: GPU fault 0x%02lx from job slot %d\n", - (unsigned long)katom->event_code, - katom->slot_nr); - } + dev_err(kbdev->dev, "t6xx: GPU fault 0x%02lx from job slot %d\n", + (unsigned long)katom->event_code, katom->slot_nr); /* Retain state before the katom disappears */ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); @@ -1347,8 +1306,7 @@ void kbase_jd_done_worker(struct work_struct *data) * hwaccess_lock is should be impossible for this to race * with the scheduler code. */ - if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || - !kbase_jsctx_atoms_pulled(kctx)) { + if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || !kbase_jsctx_atoms_pulled(kctx)) { /* Calling kbase_jm_idle_ctx() here will ensure that * atoms are not fast-started when we drop the * hwaccess_lock. This is not performed if @@ -1386,9 +1344,8 @@ void kbase_jd_done_worker(struct work_struct *data) */ mutex_lock(&jctx->lock); while (!list_empty(&kctx->completed_jobs)) { - struct kbase_jd_atom *atom = list_entry( - kctx->completed_jobs.next, - struct kbase_jd_atom, jd_item); + struct kbase_jd_atom *atom = list_entry(kctx->completed_jobs.next, + struct kbase_jd_atom, jd_item); list_del(kctx->completed_jobs.next); kbase_event_post(kctx, atom); @@ -1403,8 +1360,8 @@ void kbase_jd_done_worker(struct work_struct *data) KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); - dev_dbg(kbdev->dev, "Leave atom %pK done worker for kctx %pK\n", - (void *)katom, (void *)kctx); + dev_dbg(kbdev->dev, "Leave atom %pK done worker for kctx %pK\n", (void *)katom, + (void *)kctx); } /** @@ -1488,12 +1445,14 @@ static void jd_cancel_worker(struct work_struct *data) * This can be called safely from atomic context. * The caller must hold kbdev->hwaccess_lock */ -void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, - ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) +void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, + kbasep_js_atom_done_code done_code) { struct kbase_context *kctx; struct kbase_device *kbdev; + CSTD_UNUSED(end_timestamp); + KBASE_DEBUG_ASSERT(katom); kctx = katom->kctx; KBASE_DEBUG_ASSERT(kctx); @@ -1515,8 +1474,7 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, #if IS_ENABLED(CONFIG_DEBUG_FS) /* a failed job happened and is waiting for dumping*/ - if (!katom->will_fail_event_code && - kbase_debug_job_fault_process(katom, katom->event_code)) + if (!katom->will_fail_event_code && kbase_debug_job_fault_process(katom, katom->event_code)) return; #endif @@ -1550,7 +1508,6 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } - void kbase_jd_zap_context(struct kbase_context *kctx) { struct kbase_jd_atom *katom; @@ -1599,8 +1556,7 @@ int kbase_jd_init(struct kbase_context *kctx) pcm_device = kctx->kbdev->pcm_dev; kctx->jctx.max_priority = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; - kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", - WQ_HIGHPRI | WQ_UNBOUND, 1); + kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", WQ_HIGHPRI | WQ_UNBOUND, 1); if (kctx->jctx.job_done_wq == NULL) { mali_err = -ENOMEM; goto out1; @@ -1617,9 +1573,7 @@ int kbase_jd_init(struct kbase_context *kctx) kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; #if IS_ENABLED(CONFIG_SYNC_FILE) - kctx->jctx.atoms[i].dma_fence.context = - dma_fence_context_alloc(1); - atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); + kctx->jctx.atoms[i].dma_fence.context = dma_fence_context_alloc(1); #endif } @@ -1632,18 +1586,16 @@ int kbase_jd_init(struct kbase_context *kctx) spin_lock_init(&kctx->jctx.tb_lock); - kctx->jctx.job_nr = 0; INIT_LIST_HEAD(&kctx->completed_jobs); - atomic_set(&kctx->work_count, 0); /* Check if there are platform rules for maximum priority */ if (pcm_device) kctx->jctx.max_priority = pcm_device->ops.pcm_scheduler_priority_check( - pcm_device, current, KBASE_JS_ATOM_SCHED_PRIO_REALTIME); + pcm_device, current, KBASE_JS_ATOM_SCHED_PRIO_REALTIME); return 0; - out1: +out1: return mali_err; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c index 6196c0985c7e..8a30f35fb188 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,8 +34,7 @@ struct kbase_jd_debugfs_depinfo { char type; }; -static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, - struct seq_file *sfile) +static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, struct seq_file *sfile) { #if IS_ENABLED(CONFIG_SYNC_FILE) struct kbase_sync_fence_info info; @@ -45,14 +44,12 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, case BASE_JD_REQ_SOFT_FENCE_TRIGGER: res = kbase_sync_fence_out_info_get(atom, &info); if (res == 0) - seq_printf(sfile, "Sa([%pK]%d) ", - info.fence, info.status); + seq_printf(sfile, "Sa([%pK]%d) ", info.fence, info.status); break; case BASE_JD_REQ_SOFT_FENCE_WAIT: res = kbase_sync_fence_in_info_get(atom, &info); if (res == 0) - seq_printf(sfile, "Wa([%pK]%d) ", - info.fence, info.status); + seq_printf(sfile, "Wa([%pK]%d) ", info.fence, info.status); break; default: break; @@ -60,16 +57,16 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, #endif /* CONFIG_SYNC_FILE */ } -static void kbasep_jd_debugfs_atom_deps( - struct kbase_jd_debugfs_depinfo *deps, - struct kbase_jd_atom *atom) +static void kbasep_jd_debugfs_atom_deps(struct kbase_jd_debugfs_depinfo *deps, + struct kbase_jd_atom *atom) { struct kbase_context *kctx = atom->kctx; int i; - for (i = 0; i < 2; i++) { + for (i = 0; i < 2; i++) { deps[i].id = (unsigned int)(atom->dep[i].atom ? - kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0); + kbase_jd_atom_id(kctx, atom->dep[i].atom) : + 0); switch (atom->dep[i].dep_type) { case BASE_JD_DEP_TYPE_INVALID: @@ -105,14 +102,15 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) unsigned long irq_flags; int i; + CSTD_UNUSED(data); + KBASE_DEBUG_ASSERT(kctx != NULL); /* Print version */ seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION); /* Print U/K API version */ - seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR, - BASE_UK_VERSION_MINOR); + seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR, BASE_UK_VERSION_MINOR); /* Print table heading */ seq_puts(sfile, " ID, Core req, St, Predeps, Start time, Additional info...\n"); @@ -140,13 +138,9 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) kbasep_jd_debugfs_atom_deps(deps, atom); - seq_printf(sfile, - "%3u, %8x, %2u, %c%3u %c%3u, %20lld, ", - i, atom->core_req, atom->status, - deps[0].type, deps[0].id, - deps[1].type, deps[1].id, - start_timestamp); - + seq_printf(sfile, "%3u, %8x, %2u, %c%3u %c%3u, %20lld, ", i, atom->core_req, + atom->status, deps[0].type, deps[0].id, deps[1].type, deps[1].id, + start_timestamp); kbase_jd_debugfs_fence_info(atom, sfile); @@ -158,7 +152,6 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) return 0; } - /** * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file * @in: &struct inode pointer @@ -186,14 +179,11 @@ void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) /* Caller already ensures this, but we keep the pattern for * maintenance safety. */ - if (WARN_ON(!kctx) || - WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) return; /* Expose all atoms */ - debugfs_create_file("atoms", mode, kctx->kctx_dentry, kctx, - &kbasep_jd_debugfs_atoms_fops); - + debugfs_create_file("atoms", mode, kctx->kctx_dentry, kctx, &kbasep_jd_debugfs_atoms_fops); } #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h index 8e6140c43538..8ea18b36d0d8 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,4 +40,4 @@ struct kbase_context; */ void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); -#endif /*_KBASE_JD_DEBUGFS_H*/ +#endif /*_KBASE_JD_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_jm.c index 1ac5cd3eafff..15b0706e82d0 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -123,18 +123,18 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) } struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) + struct kbase_jd_atom *katom) { lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n", - (void *)katom, katom->event_code); + dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n", (void *)katom, + katom->event_code); - KBASE_KTRACE_ADD_JM(kbdev, JM_RETURN_ATOM_TO_JS, katom->kctx, katom, - katom->jc, katom->event_code); + KBASE_KTRACE_ADD_JM(kbdev, JM_RETURN_ATOM_TO_JS, katom->kctx, katom, katom->jc, + katom->event_code); if (katom->event_code != BASE_JD_EVENT_STOPPED && - katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { + katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { return kbase_js_complete_atom(katom, NULL); } @@ -143,8 +143,8 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, return NULL; } -struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, ktime_t *end_timestamp) +struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + ktime_t *end_timestamp) { lockdep_assert_held(&kbdev->hwaccess_lock); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_jm.h index eeafcb6b1a77..977bcc8dcb92 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2013-2014, 2016, 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -100,7 +100,7 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); * Return: Atom that has now been unblocked and can now be run, or NULL if none */ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); + struct kbase_jd_atom *katom); /** * kbase_jm_complete() - Complete an atom @@ -110,8 +110,8 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, * * Return: Atom that has now been unblocked and can now be run, or NULL if none */ -struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, ktime_t *end_timestamp); +struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + ktime_t *end_timestamp); #endif /* !MALI_USE_CSF */ #endif /* _KBASE_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c index 8ce09212a57e..1dca014c82d4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c @@ -36,6 +36,9 @@ #include "mali_kbase_hwaccess_jm.h" #include #include +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include +#endif /* * Private types @@ -56,32 +59,29 @@ enum { typedef u32 kbasep_js_release_result; const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { - KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ - KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ - KBASE_JS_ATOM_SCHED_PRIO_LOW, /* BASE_JD_PRIO_LOW */ - KBASE_JS_ATOM_SCHED_PRIO_REALTIME /* BASE_JD_PRIO_REALTIME */ + KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ + KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ + KBASE_JS_ATOM_SCHED_PRIO_LOW, /* BASE_JD_PRIO_LOW */ + KBASE_JS_ATOM_SCHED_PRIO_REALTIME /* BASE_JD_PRIO_REALTIME */ }; -const base_jd_prio -kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { - BASE_JD_PRIO_REALTIME, /* KBASE_JS_ATOM_SCHED_PRIO_REALTIME */ - BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ - BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ - BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ +const base_jd_prio kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { + BASE_JD_PRIO_REALTIME, /* KBASE_JS_ATOM_SCHED_PRIO_REALTIME */ + BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ + BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ + BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ }; - /* * Private function prototypes */ -static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( - struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbasep_js_atom_retained_state *katom_retained_state); +static kbasep_js_release_result +kbasep_js_runpool_release_ctx_internal(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state); static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom); -static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, - kbasep_js_ctx_job_cb *callback); +static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbasep_js_ctx_job_cb *callback); /* Helper for ktrace */ #if KBASE_KTRACE_ENABLE @@ -101,31 +101,142 @@ static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) * Private functions */ +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +/** + * gpu_metrics_timer_callback() - Callback function for the GPU metrics hrtimer + * + * @timer: Pointer to the GPU metrics hrtimer + * + * This function will emit power/gpu_work_period tracepoint for all the active + * GPU metrics contexts. The timer will be restarted if needed. + * + * Return: enum value to indicate that timer should not be restarted. + */ +static enum hrtimer_restart gpu_metrics_timer_callback(struct hrtimer *timer) +{ + struct kbasep_js_device_data *js_devdata = + container_of(timer, struct kbasep_js_device_data, gpu_metrics_timer); + struct kbase_device *kbdev = container_of(js_devdata, struct kbase_device, js_data); + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns()); + WARN_ON_ONCE(!js_devdata->gpu_metrics_timer_running); + if (js_devdata->gpu_metrics_timer_needed) { + hrtimer_start(&js_devdata->gpu_metrics_timer, + HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_tp_emit_interval()), + HRTIMER_MODE_REL); + } else + js_devdata->gpu_metrics_timer_running = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return HRTIMER_NORESTART; +} + +/** + * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists, + * otherwise allocate and initialise one. + * + * @kctx: Pointer to the Kbase context. + * + * The GPU metrics context represents an "Application" for the purposes of GPU metrics + * reporting. There may be multiple kbase_contexts contributing data to a single GPU + * metrics context. + * This function takes a reference on GPU metrics context if it already exists + * corresponding to the Application that is creating the Kbase context, otherwise + * memory is allocated for it and initialised. + * + * Return: 0 on success, or negative on failure. + */ +static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) +{ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; + struct kbase_device *kbdev = kctx->kbdev; + unsigned long flags; + int ret = 0; + + const struct cred *cred = get_current_cred(); + const unsigned int aid = cred->euid.val; + + put_cred(cred); + + /* Return early if this is not a Userspace created context */ + if (unlikely(!kctx->kfile)) + return 0; + + /* Serialize against the other threads trying to create/destroy Kbase contexts. */ + mutex_lock(&kbdev->kctx_list_lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + gpu_metrics_ctx = kbase_gpu_metrics_ctx_get(kbdev, aid); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!gpu_metrics_ctx) { + gpu_metrics_ctx = kmalloc(sizeof(*gpu_metrics_ctx), GFP_KERNEL); + + if (gpu_metrics_ctx) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_gpu_metrics_ctx_init(kbdev, gpu_metrics_ctx, aid); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + dev_err(kbdev->dev, "Allocation for gpu_metrics_ctx failed"); + ret = -ENOMEM; + } + } + + kctx->gpu_metrics_ctx = gpu_metrics_ctx; + mutex_unlock(&kbdev->kctx_list_lock); + + return ret; +} + +/** + * gpu_metrics_ctx_term() - Drop a reference on a GPU metrics context and free it + * if the refcount becomes 0. + * + * @kctx: Pointer to the Kbase context. + */ +static inline void gpu_metrics_ctx_term(struct kbase_context *kctx) +{ + unsigned long flags; + + /* Return early if this is not a Userspace created context */ + if (unlikely(!kctx->kfile)) + return; + + /* Serialize against the other threads trying to create/destroy Kbase contexts. */ + mutex_lock(&kctx->kbdev->kctx_list_lock); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + kbase_gpu_metrics_ctx_put(kctx->kbdev, kctx->gpu_metrics_ctx); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->kbdev->kctx_list_lock); +} +#endif + /** * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements - * @features: JSn_FEATURE register value + * @features: parsed JSn_FEATURE register value * * Given a JSn_FEATURE register value returns the core requirements that match * * Return: Core requirement bit mask */ -static base_jd_core_req core_reqs_from_jsn_features(u16 features) +static base_jd_core_req core_reqs_from_jsn_features(struct kbase_js_features_props *features) { base_jd_core_req core_req = 0u; - if ((features & JS_FEATURE_SET_VALUE_JOB) != 0) + if (features->write_value) core_req |= BASE_JD_REQ_V; - if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0) + if (features->cache_flush) core_req |= BASE_JD_REQ_CF; - if ((features & JS_FEATURE_COMPUTE_JOB) != 0) + if (features->compute_shader) core_req |= BASE_JD_REQ_CS; - if ((features & JS_FEATURE_TILER_JOB) != 0) + if (features->tiler) core_req |= BASE_JD_REQ_T; - if ((features & JS_FEATURE_FRAGMENT_JOB) != 0) + if (features->fragment_shader) core_req |= BASE_JD_REQ_FS; return core_req; @@ -183,8 +294,7 @@ static inline bool jsctx_rb_none_to_pull(struct kbase_context *kctx, unsigned in lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; - prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) return false; } @@ -218,8 +328,8 @@ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { struct rb_node *node = rb_first(&queue->runnable_tree); - struct kbase_jd_atom *entry = rb_entry(node, - struct kbase_jd_atom, runnable_tree_node); + struct kbase_jd_atom *entry = + rb_entry(node, struct kbase_jd_atom, runnable_tree_node); rb_erase(node, &queue->runnable_tree); callback(kctx->kbdev, entry); @@ -229,31 +339,24 @@ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js * to avoid calling the callback twice. */ if (entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) { - WARN_ON(!(entry->core_req & - BASE_JD_REQ_END_RENDERPASS)); - dev_dbg(kctx->kbdev->dev, - "Del runnable atom %pK from X_DEP list\n", + WARN_ON(!(entry->core_req & BASE_JD_REQ_END_RENDERPASS)); + dev_dbg(kctx->kbdev->dev, "Del runnable atom %pK from X_DEP list\n", (void *)entry); list_del(&entry->queue); - entry->atom_flags &= - ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + entry->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; } } while (!list_empty(&queue->x_dep_head)) { - struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, - struct kbase_jd_atom, queue); + struct kbase_jd_atom *entry = + list_entry(queue->x_dep_head.next, struct kbase_jd_atom, queue); - WARN_ON(!(entry->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); - dev_dbg(kctx->kbdev->dev, - "Del blocked atom %pK from X_DEP list\n", - (void *)entry); + WARN_ON(!(entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + dev_dbg(kctx->kbdev->dev, "Del blocked atom %pK from X_DEP list\n", (void *)entry); list_del(queue->x_dep_head.next); - entry->atom_flags &= - ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + entry->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; callback(kctx->kbdev, entry); } @@ -274,8 +377,7 @@ static inline void jsctx_queue_foreach(struct kbase_context *kctx, unsigned int { int prio; - for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; - prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) jsctx_queue_foreach_prio(kctx, js, prio, callback); } @@ -328,8 +430,7 @@ static inline struct kbase_jd_atom *jsctx_rb_peek(struct kbase_context *kctx, un lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; - prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; katom = jsctx_rb_peek_prio(kctx, js, prio); @@ -349,8 +450,7 @@ static inline struct kbase_jd_atom *jsctx_rb_peek(struct kbase_context *kctx, un * * @katom must currently be at the head of the ring buffer. */ -static inline void -jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +static inline void jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; unsigned int js = katom->slot_nr; @@ -367,8 +467,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); } -static void -jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) +static void jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) { struct kbase_device *kbdev = kctx->kbdev; int prio = katom->sched_priority; @@ -382,8 +481,8 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) (void *)kctx, js); while (*new) { - struct kbase_jd_atom *entry = container_of(*new, - struct kbase_jd_atom, runnable_tree_node); + struct kbase_jd_atom *entry = + container_of(*new, struct kbase_jd_atom, runnable_tree_node); parent = *new; if (kbase_jd_atom_is_younger(katom, entry)) @@ -409,13 +508,11 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) * jsctx_rb_unpull() must be called on atoms in the same order the atoms were * pulled. */ -static inline void -jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +static inline void jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - KBASE_KTRACE_ADD_JM(kctx->kbdev, JS_UNPULL_JOB, kctx, katom, katom->jc, - 0u); + KBASE_KTRACE_ADD_JM(kctx->kbdev, JS_UNPULL_JOB, kctx, katom, katom->jc, 0u); jsctx_tree_add(kctx, katom); } @@ -426,18 +523,15 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, unsigned int js); -typedef bool(katom_ordering_func)(const struct kbase_jd_atom *, - const struct kbase_jd_atom *); +typedef bool(katom_ordering_func)(const struct kbase_jd_atom *, const struct kbase_jd_atom *); -bool kbase_js_atom_runs_before(struct kbase_device *kbdev, - const struct kbase_jd_atom *katom_a, +bool kbase_js_atom_runs_before(struct kbase_device *kbdev, const struct kbase_jd_atom *katom_a, const struct kbase_jd_atom *katom_b, const kbase_atom_ordering_flag_t order_flags) { struct kbase_context *kctx_a = katom_a->kctx; struct kbase_context *kctx_b = katom_b->kctx; - katom_ordering_func *samectxatomprio_ordering_func = - kbase_jd_atom_is_younger; + katom_ordering_func *samectxatomprio_ordering_func = kbase_jd_atom_is_younger; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -447,8 +541,7 @@ bool kbase_js_atom_runs_before(struct kbase_device *kbdev, /* It only makes sense to make this test for atoms on the same slot */ WARN_ON(katom_a->slot_nr != katom_b->slot_nr); - if (kbdev->js_ctx_scheduling_mode == - KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) { + if (kbdev->js_ctx_scheduling_mode == KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) { /* In local priority mode, querying either way around for "a * should run before b" and "b should run before a" should * always be false when they're from different contexts @@ -490,7 +583,7 @@ bool kbase_js_atom_runs_before(struct kbase_device *kbdev, /* * Functions private to KBase ('Protected' functions) */ -int kbasep_js_devdata_init(struct kbase_device * const kbdev) +int kbasep_js_devdata_init(struct kbase_device *const kbdev) { struct kbasep_js_device_data *jsdd; int i, j; @@ -499,24 +592,6 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) jsdd = &kbdev->js_data; -#ifdef CONFIG_MALI_BIFROST_DEBUG - /* Soft-stop will be disabled on a single context by default unless - * softstop_always is set - */ - jsdd->softstop_always = false; -#endif /* CONFIG_MALI_BIFROST_DEBUG */ - jsdd->nr_all_contexts_running = 0; - jsdd->nr_user_contexts_running = 0; - jsdd->nr_contexts_pullable = 0; - atomic_set(&jsdd->nr_contexts_runnable, 0); - /* No ctx allowed to submit */ - jsdd->runpool_irq.submit_allowed = 0u; - memset(jsdd->runpool_irq.ctx_attr_ref_count, 0, - sizeof(jsdd->runpool_irq.ctx_attr_ref_count)); - memset(jsdd->runpool_irq.slot_affinities, 0, - sizeof(jsdd->runpool_irq.slot_affinities)); - memset(jsdd->runpool_irq.slot_affinity_refcount, 0, - sizeof(jsdd->runpool_irq.slot_affinity_refcount)); INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list); /* Config attributes */ @@ -535,57 +610,46 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) jsdd->js_free_wait_time_ms = kbase_get_timeout_ms(kbdev, JM_DEFAULT_JS_FREE_TIMEOUT); dev_dbg(kbdev->dev, "JS Config Attribs: "); - dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", - jsdd->scheduling_period_ns); - dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", - jsdd->soft_stop_ticks); - dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", - jsdd->soft_stop_ticks_cl); - dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", - jsdd->hard_stop_ticks_ss); - dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", - jsdd->hard_stop_ticks_cl); - dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u", - jsdd->hard_stop_ticks_dumping); - dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", - jsdd->gpu_reset_ticks_ss); - dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", - jsdd->gpu_reset_ticks_cl); - dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u", - jsdd->gpu_reset_ticks_dumping); - dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", - jsdd->ctx_timeslice_ns); - dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", - atomic_read(&jsdd->soft_job_timeout_ms)); + dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", jsdd->scheduling_period_ns); + dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", jsdd->soft_stop_ticks); + dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", jsdd->soft_stop_ticks_cl); + dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", jsdd->hard_stop_ticks_ss); + dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", jsdd->hard_stop_ticks_cl); + dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u", jsdd->hard_stop_ticks_dumping); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", jsdd->gpu_reset_ticks_ss); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", jsdd->gpu_reset_ticks_cl); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u", jsdd->gpu_reset_ticks_dumping); + dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", jsdd->ctx_timeslice_ns); + dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", atomic_read(&jsdd->soft_job_timeout_ms)); dev_dbg(kbdev->dev, "\tjs_free_wait_time_ms:%u", jsdd->js_free_wait_time_ms); if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && - jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && - jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping && - jsdd->hard_stop_ticks_dumping < - jsdd->gpu_reset_ticks_dumping)) { - dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n"); + jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && + jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping && + jsdd->hard_stop_ticks_dumping < jsdd->gpu_reset_ticks_dumping)) { + dev_err(kbdev->dev, + "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n"); return -EINVAL; } #if KBASE_DISABLE_SCHEDULING_SOFT_STOPS - dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", - jsdd->soft_stop_ticks, - jsdd->scheduling_period_ns); + dev_dbg(kbdev->dev, + "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", + jsdd->soft_stop_ticks, jsdd->scheduling_period_ns); #endif #if KBASE_DISABLE_SCHEDULING_HARD_STOPS - dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.", - jsdd->hard_stop_ticks_ss, - jsdd->hard_stop_ticks_dumping, - jsdd->scheduling_period_ns); + dev_dbg(kbdev->dev, + "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.", + jsdd->hard_stop_ticks_ss, jsdd->hard_stop_ticks_dumping, + jsdd->scheduling_period_ns); #endif #if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS - dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing."); + dev_dbg(kbdev->dev, + "Note: The JS tick timer (if coded) will still be run, but do nothing."); #endif for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) - jsdd->js_reqs[i] = core_reqs_from_jsn_features( - kbdev->gpu_props.props.raw_props.js_features[i]); + jsdd->js_reqs[i] = core_reqs_from_jsn_features(&kbdev->gpu_props.js_features[i]); /* On error, we could continue on: providing none of the below resources * rely on the ones above @@ -602,6 +666,13 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) } } +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + hrtimer_init(&jsdd->gpu_metrics_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + jsdd->gpu_metrics_timer.function = gpu_metrics_timer_callback; + jsdd->gpu_metrics_timer_needed = false; + jsdd->gpu_metrics_timer_running = false; +#endif + return 0; } @@ -613,7 +684,9 @@ void kbasep_js_devdata_halt(struct kbase_device *kbdev) void kbasep_js_devdata_term(struct kbase_device *kbdev) { struct kbasep_js_device_data *js_devdata; - s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; + s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { + 0, + }; CSTD_UNUSED(js_devdata); KBASE_DEBUG_ASSERT(kbdev != NULL); @@ -623,34 +696,39 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev) /* The caller must de-register all contexts before calling this */ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); - KBASE_DEBUG_ASSERT(memcmp( - js_devdata->runpool_irq.ctx_attr_ref_count, - zero_ctx_attr_ref_count, - sizeof(zero_ctx_attr_ref_count)) == 0); + KBASE_DEBUG_ASSERT(memcmp(js_devdata->runpool_irq.ctx_attr_ref_count, + zero_ctx_attr_ref_count, sizeof(zero_ctx_attr_ref_count)) == 0); CSTD_UNUSED(zero_ctx_attr_ref_count); + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + js_devdata->gpu_metrics_timer_needed = false; + hrtimer_cancel(&js_devdata->gpu_metrics_timer); +#endif } int kbasep_js_kctx_init(struct kbase_context *const kctx) { struct kbasep_js_kctx_info *js_kctx_info; int i, j; + int ret; CSTD_UNUSED(js_kctx_info); KBASE_DEBUG_ASSERT(kctx != NULL); - kbase_ctx_sched_init_ctx(kctx); + CSTD_UNUSED(ret); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + ret = gpu_metrics_ctx_init(kctx); + if (ret) + return ret; +#endif for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); js_kctx_info = &kctx->jctx.sched_info; - kctx->slots_pullable = 0; - js_kctx_info->ctx.nr_jobs = 0; kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); kbase_ctx_flag_clear(kctx, KCTX_DYING); - memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, - sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); /* Initially, the context is disabled from submission until the create * flags are set @@ -719,6 +797,9 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) } kbase_ctx_sched_remove_ctx(kctx); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + gpu_metrics_ctx_term(kctx); +#endif } /* @@ -729,14 +810,13 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, unsigned int js, int sched_prio) { - struct kbase_jsctx_slot_tracking *slot_tracking = - &kctx->slot_tracking[js]; + struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); slot_tracking->blocked &= ~(((kbase_js_prio_bitmap_t)1) << sched_prio); - KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_UNBLOCKED, kctx, - NULL, 0, js, (unsigned int)sched_prio); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_UNBLOCKED, kctx, NULL, 0, js, + (unsigned int)sched_prio); } static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, unsigned int js) @@ -752,8 +832,7 @@ static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, unsigned in static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, unsigned int js, int sched_prio) { - struct kbase_jsctx_slot_tracking *slot_tracking = - &kctx->slot_tracking[js]; + struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; kbase_js_prio_bitmap_t prio_bit, higher_prios_mask; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -791,8 +870,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx, { unsigned int js = katom->slot_nr; int sched_prio = katom->sched_priority; - struct kbase_jsctx_slot_tracking *slot_tracking = - &kctx->slot_tracking[js]; + struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; int nr_atoms_pulled; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -830,8 +908,7 @@ static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx, unsigned int js = katom->slot_nr; int sched_prio = katom->sched_priority; int atoms_pulled_pri; - struct kbase_jsctx_slot_tracking *slot_tracking = - &kctx->slot_tracking[js]; + struct kbase_jsctx_slot_tracking *slot_tracking = &kctx->slot_tracking[js]; bool slot_prio_became_unblocked = false; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -845,8 +922,7 @@ static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx, * higher priority levels are still blocked: a subsequent query to * kbase_jsctx_slot_prio_is_blocked() will still return true */ - if (!atoms_pulled_pri && - kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio)) { + if (!atoms_pulled_pri && kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio)) { kbase_jsctx_slot_prio_blocked_clear(kctx, js, sched_prio); if (!kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio)) @@ -854,10 +930,8 @@ static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx, } if (slot_prio_became_unblocked) - KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, - JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED, - kctx, katom, katom->jc, js, - (unsigned int)sched_prio); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED, kctx, + katom, katom->jc, js, (unsigned int)sched_prio); return slot_prio_became_unblocked; } @@ -887,7 +961,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); + &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); if (!kctx->slots_pullable) { kbdev->js_data.nr_contexts_pullable++; @@ -928,7 +1002,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(struct kbase_device *kbde list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); + &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); if (!kctx->slots_pullable) { kbdev->js_data.nr_contexts_pullable++; @@ -998,9 +1072,9 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%u)\n", (void *)kctx, js); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); + &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); - if (kctx->slots_pullable == (1 << js)) { + if (kctx->slots_pullable == (1UL << js)) { kbdev->js_data.nr_contexts_pullable--; ret = true; if (!kbase_jsctx_atoms_pulled(kctx)) { @@ -1041,7 +1115,7 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, struct k list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - if (kctx->slots_pullable == (1 << js)) { + if (kctx->slots_pullable == (1UL << js)) { kbdev->js_data.nr_contexts_pullable--; ret = true; if (!kbase_jsctx_atoms_pulled(kctx)) { @@ -1080,8 +1154,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(struct kbase_devi continue; kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next, - struct kbase_context, - jctx.sched_info.ctx.ctx_list_entry[js]); + struct kbase_context, jctx.sched_info.ctx.ctx_list_entry[js]); list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); dev_dbg(kbdev->dev, "Popped %pK from the pullable queue (s:%u)\n", (void *)kctx, @@ -1136,8 +1209,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, b if (is_scheduled) { if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { - dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", - (void *)kctx); + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", (void *)kctx); return false; } } @@ -1147,30 +1219,27 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, b return false; /* No pullable atoms */ } if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) { - KBASE_KTRACE_ADD_JM_SLOT_INFO( - kctx->kbdev, JS_SLOT_PRIO_IS_BLOCKED, kctx, katom, - katom->jc, js, (unsigned int)katom->sched_priority); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_IS_BLOCKED, kctx, katom, + katom->jc, js, (unsigned int)katom->sched_priority); dev_dbg(kbdev->dev, "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n", (void *)kctx, katom->sched_priority, js); return false; } if (atomic_read(&katom->blocked)) { - dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n", - (void *)katom); + dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n", (void *)katom); return false; /* next atom blocked */ } if (kbase_js_atom_blocked_on_x_dep(katom)) { - if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || - katom->x_pre_dep->will_fail_event_code) { + if (katom->x_pre_dep->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { dev_dbg(kbdev->dev, "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", (void *)katom->x_pre_dep); return false; } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && - kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { + kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { dev_dbg(kbdev->dev, "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n", (void *)katom, js); @@ -1184,8 +1253,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, b return true; } -static bool kbase_js_dep_validate(struct kbase_context *kctx, - struct kbase_jd_atom *katom) +static bool kbase_js_dep_validate(struct kbase_context *kctx, struct kbase_jd_atom *katom) { struct kbase_device *kbdev = kctx->kbdev; bool ret = true; @@ -1201,15 +1269,12 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, unsigned int dep_js = kbase_js_get_slot(kbdev, dep_atom); int dep_prio = dep_atom->sched_priority; - dev_dbg(kbdev->dev, - "Checking dep %d of atom %pK (s:%d) on %pK (s:%d)\n", - i, (void *)katom, js, (void *)dep_atom, dep_js); + dev_dbg(kbdev->dev, "Checking dep %d of atom %pK (s:%d) on %pK (s:%d)\n", i, + (void *)katom, js, (void *)dep_atom, dep_js); /* Dependent atom must already have been submitted */ - if (!(dep_atom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { - dev_dbg(kbdev->dev, - "Blocker not submitted yet\n"); + if (!(dep_atom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + dev_dbg(kbdev->dev, "Blocker not submitted yet\n"); ret = false; break; } @@ -1218,8 +1283,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, * be represented in the ringbuffer */ if (prio != dep_prio) { - dev_dbg(kbdev->dev, - "Different atom priorities\n"); + dev_dbg(kbdev->dev, "Different atom priorities\n"); ret = false; break; } @@ -1229,8 +1293,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, * represented in the ringbuffer */ if (has_dep) { - dev_dbg(kbdev->dev, - "Too many same-slot deps\n"); + dev_dbg(kbdev->dev, "Too many same-slot deps\n"); ret = false; break; } @@ -1238,8 +1301,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, * same-slot dependency */ if (dep_atom->post_dep) { - dev_dbg(kbdev->dev, - "Too many same-slot successors\n"); + dev_dbg(kbdev->dev, "Too many same-slot successors\n"); ret = false; break; } @@ -1249,8 +1311,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, * represented in the ringbuffer */ if (has_x_dep) { - dev_dbg(kbdev->dev, - "Too many cross-slot deps\n"); + dev_dbg(kbdev->dev, "Too many cross-slot deps\n"); ret = false; break; } @@ -1258,16 +1319,14 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, * cross-slot dependency */ if (dep_atom->x_post_dep) { - dev_dbg(kbdev->dev, - "Too many cross-slot successors\n"); + dev_dbg(kbdev->dev, "Too many cross-slot successors\n"); ret = false; break; } /* The dependee atom can not already be in the * HW access ringbuffer */ - if (dep_atom->gpu_rb_state != - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + if (dep_atom->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { dev_dbg(kbdev->dev, "Blocker already in ringbuffer (state:%d)\n", dep_atom->gpu_rb_state); @@ -1277,8 +1336,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* The dependee atom can not already have * completed */ - if (dep_atom->status != - KBASE_JD_ATOM_STATE_IN_JS) { + if (dep_atom->status != KBASE_JD_ATOM_STATE_IN_JS) { dev_dbg(kbdev->dev, "Blocker already completed (status:%d)\n", dep_atom->status); @@ -1301,38 +1359,30 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, struct kbase_jd_atom *dep_atom = katom->dep[i].atom; if (dep_atom) { - int dep_js = kbase_js_get_slot(kbdev, dep_atom); + unsigned int dep_js = kbase_js_get_slot(kbdev, dep_atom); dev_dbg(kbdev->dev, - "Clearing dep %d of atom %pK (s:%d) on %pK (s:%d)\n", - i, (void *)katom, js, (void *)dep_atom, - dep_js); + "Clearing dep %d of atom %pK (s:%d) on %pK (s:%d)\n", i, + (void *)katom, js, (void *)dep_atom, dep_js); if ((js != dep_js) && - (dep_atom->status != - KBASE_JD_ATOM_STATE_COMPLETED) - && (dep_atom->status != - KBASE_JD_ATOM_STATE_HW_COMPLETED) - && (dep_atom->status != - KBASE_JD_ATOM_STATE_UNUSED)) { - - katom->atom_flags |= - KBASE_KATOM_FLAG_X_DEP_BLOCKED; + (dep_atom->status != KBASE_JD_ATOM_STATE_COMPLETED) && + (dep_atom->status != KBASE_JD_ATOM_STATE_HW_COMPLETED) && + (dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED)) { + katom->atom_flags |= KBASE_KATOM_FLAG_X_DEP_BLOCKED; dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n", (void *)katom); katom->x_pre_dep = dep_atom; dep_atom->x_post_dep = katom; - if (kbase_jd_katom_dep_type( - &katom->dep[i]) == - BASE_JD_DEP_TYPE_DATA) - katom->atom_flags |= - KBASE_KATOM_FLAG_FAIL_BLOCKER; + if (kbase_jd_katom_dep_type(&katom->dep[i]) == + BASE_JD_DEP_TYPE_DATA) + katom->atom_flags |= KBASE_KATOM_FLAG_FAIL_BLOCKER; } - if ((kbase_jd_katom_dep_type(&katom->dep[i]) - == BASE_JD_DEP_TYPE_DATA) && - (js == dep_js)) { + if ((kbase_jd_katom_dep_type(&katom->dep[i]) == + BASE_JD_DEP_TYPE_DATA) && + (js == dep_js)) { katom->pre_dep = dep_atom; dep_atom->post_dep = katom; } @@ -1342,8 +1392,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, } } } else { - dev_dbg(kbdev->dev, - "Deps of atom %pK (s:%d) could not be represented\n", + dev_dbg(kbdev->dev, "Deps of atom %pK (s:%d) could not be represented\n", (void *)katom, js); } @@ -1362,9 +1411,10 @@ void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority) for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kctx->slots_pullable & (1 << js)) list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_pullable[js][new_priority]); + &kbdev->js_data.ctx_list_pullable[js][new_priority]); else - list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + list_move_tail( + &kctx->jctx.sched_info.ctx.ctx_list_entry[js], &kbdev->js_data.ctx_list_unpullable[js][new_priority]); } @@ -1384,8 +1434,8 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) /* Determine the new priority for context, as per the priority * of currently in-use atoms. */ - for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; - prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; + prio++) { if (kctx->atoms_count[prio]) { new_priority = prio; break; @@ -1418,16 +1468,16 @@ static int js_add_start_rp(struct kbase_jd_atom *const start_katom) return -EINVAL; compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; if (rp->state != KBASE_JD_RP_COMPLETE) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n", - (void *)start_katom, start_katom->renderpass_id); + dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n", (void *)start_katom, + start_katom->renderpass_id); /* The following members are read when updating the job slot * ringbuffer/fifo therefore they require additional locking. @@ -1464,13 +1514,13 @@ static int js_add_end_rp(struct kbase_jd_atom *const end_katom) return -EINVAL; compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; - dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n", - (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n", (void *)end_katom, + (int)rp->state, end_katom->renderpass_id); if (rp->state == KBASE_JD_RP_COMPLETE) return -EINVAL; @@ -1494,8 +1544,7 @@ static int js_add_end_rp(struct kbase_jd_atom *const end_katom) return 0; } -bool kbasep_js_add_job(struct kbase_context *kctx, - struct kbase_jd_atom *atom) +bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom) { unsigned long flags; struct kbasep_js_kctx_info *js_kctx_info; @@ -1536,8 +1585,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Refcount ctx.nr_jobs */ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); ++(js_kctx_info->ctx.nr_jobs); - dev_dbg(kbdev->dev, "Add atom %pK to kctx %pK; now %d in ctx\n", - (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, "Add atom %pK to kctx %pK; now %d in ctx\n", (void *)atom, (void *)kctx, + js_kctx_info->ctx.nr_jobs); /* Lock for state available during IRQ */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1548,9 +1597,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx, if (!kbase_js_dep_validate(kctx, atom)) { /* Dependencies could not be represented */ --(js_kctx_info->ctx.nr_jobs); - dev_dbg(kbdev->dev, - "Remove atom %pK from kctx %pK; now %d in ctx\n", - (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, "Remove atom %pK from kctx %pK; now %d in ctx\n", (void *)atom, + (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Setting atom status back to queued as it still has unresolved * dependencies @@ -1580,24 +1628,23 @@ bool kbasep_js_add_job(struct kbase_context *kctx, enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, - kbase_ktrace_get_ctx_refcnt(kctx)); + kbase_ktrace_get_ctx_refcnt(kctx)); /* Context Attribute Refcounting */ kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); if (enqueue_required) { if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) - timer_sync = kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, atom->slot_nr); + timer_sync = + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, atom->slot_nr); else - timer_sync = kbase_js_ctx_list_add_unpullable_nolock( - kbdev, kctx, atom->slot_nr); + timer_sync = + kbase_js_ctx_list_add_unpullable_nolock(kbdev, kctx, atom->slot_nr); } /* If this context is active and the atom is the first on its slot, * kick the job manager to attempt to fast-start the atom */ - if (enqueue_required && kctx == - kbdev->hwaccess.active_kctx[atom->slot_nr]) + if (enqueue_required && kctx == kbdev->hwaccess.active_kctx[atom->slot_nr]) kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -1612,8 +1659,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, * was called on a non-scheduled context. Kill that job * by killing the context. */ - kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, - false); + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); } else if (js_kctx_info->ctx.nr_jobs == 1) { /* Handle Refcount going from 0 to 1: schedule the * context on the Queue @@ -1628,8 +1674,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx, } } out_unlock: - dev_dbg(kbdev->dev, "Enqueue of kctx %pK is %srequired\n", - kctx, enqueue_required ? "" : "not "); + dev_dbg(kbdev->dev, "Enqueue of kctx %pK is %srequired\n", kctx, + enqueue_required ? "" : "not "); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -1638,8 +1684,8 @@ out_unlock: return enqueue_required; } -void kbasep_js_remove_job(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_jd_atom *atom) +void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *atom) { struct kbasep_js_kctx_info *js_kctx_info; unsigned long flags; @@ -1651,14 +1697,13 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, js_kctx_info = &kctx->jctx.sched_info; KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, - kbase_ktrace_get_ctx_refcnt(kctx)); + kbase_ktrace_get_ctx_refcnt(kctx)); /* De-refcount ctx.nr_jobs */ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); --(js_kctx_info->ctx.nr_jobs); - dev_dbg(kbdev->dev, - "Remove atom %pK from kctx %pK; now %d in ctx\n", - (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, "Remove atom %pK from kctx %pK; now %d in ctx\n", (void *)atom, + (void *)kctx, js_kctx_info->ctx.nr_jobs); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (--kctx->atoms_count[atom->sched_priority] == 0) @@ -1666,8 +1711,8 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_jd_atom *katom) +bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *katom) { unsigned long flags; struct kbasep_js_atom_retained_state katom_retained_state; @@ -1689,8 +1734,8 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, * want to override that, because we're cancelling an atom regardless of * whether it was soft-stopped or not */ - attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, - &katom_retained_state); + attr_state_changed = + kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, &katom_retained_state); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return attr_state_changed; @@ -1716,10 +1761,8 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, * changed. The caller should try scheduling all contexts */ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( - struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbasep_js_atom_retained_state *katom_retained_state, - bool runpool_ctx_attr_change) + struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state, bool runpool_ctx_attr_change) { struct kbasep_js_device_data *js_devdata; kbasep_js_release_result result = 0; @@ -1739,8 +1782,7 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( */ result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, - kctx, NULL, 0u, 0); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, kctx, NULL, 0u, 0); } return result; } @@ -1771,10 +1813,9 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( * the result of releasing a context that whether the caller should try * scheduling a new context or should try scheduling all contexts. */ -static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( - struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbasep_js_atom_retained_state *katom_retained_state) +static kbasep_js_release_result +kbasep_js_runpool_release_ctx_internal(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state) { unsigned long flags; struct kbasep_js_device_data *js_devdata; @@ -1815,14 +1856,14 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( /* Release the atom if it finished (i.e. wasn't soft-stopped) */ if (kbasep_js_has_atom_finished(katom_retained_state)) - runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom( - kbdev, kctx, katom_retained_state); + runpool_ctx_attr_change |= + kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, katom_retained_state); if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && #ifdef CONFIG_MALI_ARBITER_SUPPORT - !kbase_pm_is_gpu_lost(kbdev) && + !kbase_pm_is_gpu_lost(kbdev) && #endif - !kbase_pm_is_suspending(kbdev)) { + !kbase_pm_is_suspending(kbdev)) { /* Context is kept scheduled into an address space even when * there are no jobs, in this case we have to handle the * situation where all jobs have been evicted from the GPU and @@ -1838,21 +1879,21 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( * Note that there'll always be at least 1 reference to the context * which was previously acquired by kbasep_js_schedule_ctx(). */ - if (new_ref_count == 1 && - (!kbasep_js_is_submit_allowed(js_devdata, kctx) || + if (new_ref_count == 1 && (!kbasep_js_is_submit_allowed(js_devdata, kctx) || #ifdef CONFIG_MALI_ARBITER_SUPPORT - kbase_pm_is_gpu_lost(kbdev) || + kbase_pm_is_gpu_lost(kbdev) || #endif - kbase_pm_is_suspending(kbdev))) { + kbase_pm_is_suspending(kbdev))) { int num_slots = kbdev->gpu_props.num_job_slots; int slot; /* Last reference, and we've been told to remove this context * from the Run Pool */ - dev_dbg(kbdev->dev, "JS: RunPool Remove Context %pK because refcount=%d, jobs=%d, allowed=%d", - kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, - kbasep_js_is_submit_allowed(js_devdata, kctx)); + dev_dbg(kbdev->dev, + "JS: RunPool Remove Context %pK because refcount=%d, jobs=%d, allowed=%d", + kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, + kbasep_js_is_submit_allowed(js_devdata, kctx)); KBASE_TLSTREAM_TL_NRET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); @@ -1872,16 +1913,13 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( * after the KCTX_SHEDULED flag is changed, otherwise we * double-decount the attributes */ - runpool_ctx_attr_change |= - kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); + runpool_ctx_attr_change |= kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); /* Releasing the context and katom retained state can allow * more jobs to run */ - release_result |= - kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, - kctx, katom_retained_state, - runpool_ctx_attr_change); + release_result |= kbasep_js_run_jobs_after_ctx_and_atom_release( + kbdev, kctx, katom_retained_state, runpool_ctx_attr_change); /* * Transaction ends on AS and runpool_irq: @@ -1898,8 +1936,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( /* Recalculate pullable status for all slots */ for (slot = 0; slot < num_slots; slot++) { if (kbase_js_ctx_pullable(kctx, slot, false)) - kbase_js_ctx_list_add_pullable_nolock(kbdev, - kctx, slot); + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, slot); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -1923,10 +1960,10 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( /* Queue an action to occur after we've dropped the lock */ release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | - KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + KBASEP_JS_RELEASE_RESULT_SCHED_ALL; } else { - kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, - katom_retained_state, runpool_ctx_attr_change); + kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, + runpool_ctx_attr_change); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->pm.lock); @@ -1935,21 +1972,21 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( return release_result; } -void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx) +void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx) { struct kbasep_js_atom_retained_state katom_retained_state; /* Setup a dummy katom_retained_state */ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); - kbasep_js_runpool_release_ctx_internal(kbdev, kctx, - &katom_retained_state); + kbasep_js_runpool_release_ctx_internal(kbdev, kctx, &katom_retained_state); } -void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, bool has_pm_ref) +void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, + bool has_pm_ref) { + CSTD_UNUSED(has_pm_ref); + KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(kctx != NULL); @@ -1962,15 +1999,14 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, /* Dying: don't requeue, but kill all jobs on the context. This * happens asynchronously */ - dev_dbg(kbdev->dev, - "JS: ** Killing Context %pK on RunPool Remove **", kctx); + dev_dbg(kbdev->dev, "JS: ** Killing Context %pK on RunPool Remove **", kctx); kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); } } void kbasep_js_runpool_release_ctx_and_katom_retained_state( - struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbasep_js_atom_retained_state *katom_retained_state) + struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state) { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; @@ -1985,8 +2021,7 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state( mutex_lock(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); - release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, - katom_retained_state); + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, katom_retained_state); /* Drop the runpool mutex to allow requeing kctx */ mutex_unlock(&js_devdata->runpool_mutex); @@ -2003,29 +2038,26 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state( kbase_js_sched_all(kbdev); } -void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) +void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { struct kbasep_js_atom_retained_state katom_retained_state; kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); - kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, - &katom_retained_state); + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); } /* Variant of kbasep_js_runpool_release_ctx() that doesn't call into * kbase_js_sched_all() */ -static void kbasep_js_runpool_release_ctx_no_schedule( - struct kbase_device *kbdev, struct kbase_context *kctx) +static void kbasep_js_runpool_release_ctx_no_schedule(struct kbase_device *kbdev, + struct kbase_context *kctx) { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; kbasep_js_release_result release_result; struct kbasep_js_atom_retained_state katom_retained_state_struct; - struct kbasep_js_atom_retained_state *katom_retained_state = - &katom_retained_state_struct; + struct kbasep_js_atom_retained_state *katom_retained_state = &katom_retained_state_struct; KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(kctx != NULL); @@ -2036,8 +2068,7 @@ static void kbasep_js_runpool_release_ctx_no_schedule( mutex_lock(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); - release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, - katom_retained_state); + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, katom_retained_state); /* Drop the runpool mutex to allow requeing kctx */ mutex_unlock(&js_devdata->runpool_mutex); @@ -2080,8 +2111,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_cont spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); if (as_nr == KBASEP_AS_NR_INVALID) { - as_nr = kbase_backend_find_and_release_free_address_space( - kbdev, kctx); + as_nr = kbase_backend_find_and_release_free_address_space(kbdev, kctx); if (as_nr != KBASEP_AS_NR_INVALID) { /* Attempt to retain the context again, this should * succeed @@ -2119,9 +2149,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_cont return false; } - KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, - 0u, - kbase_ktrace_get_ctx_refcnt(kctx)); + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, 0u, + kbase_ktrace_get_ctx_refcnt(kctx)); kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); @@ -2210,15 +2239,13 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *k spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && - kbase_backend_use_ctx_sched(kbdev, kctx, js)) { + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && kbase_backend_use_ctx_sched(kbdev, kctx, js)) { dev_dbg(kbdev->dev, "kctx %pK already has ASID - mark as active (s:%u)\n", (void *)kctx, js); if (kbdev->hwaccess.active_kctx[js] != kctx) { kbdev->hwaccess.active_kctx[js] = kctx; - kbase_ctx_flag_clear(kctx, - KCTX_PULLED_SINCE_ACTIVE_JS0 << js); + kbase_ctx_flag_clear(kctx, KCTX_PULLED_SINCE_ACTIVE_JS0 << js); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return true; /* Context already scheduled */ @@ -2228,8 +2255,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *k return kbasep_js_schedule_ctx(kbdev, kctx, js); } -void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) +void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { struct kbasep_js_kctx_info *js_kctx_info; struct kbasep_js_device_data *js_devdata; @@ -2301,8 +2327,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, } KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx); -void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) +void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { struct kbasep_js_kctx_info *js_kctx_info; @@ -2362,9 +2387,7 @@ void kbasep_js_suspend(struct kbase_device *kbdev) /* De-ref the previous retain to ensure each context gets pulled out * sometime later. */ - for (i = 0; - i < BASE_MAX_NR_AS; - ++i, retained = retained >> 1) { + for (i = 0; i < BASE_MAX_NR_AS; ++i, retained = retained >> 1) { struct kbase_context *kctx = kbdev->as_to_kctx[i]; if (retained & 1u) @@ -2387,8 +2410,8 @@ void kbasep_js_resume(struct kbase_device *kbdev) mutex_lock(&js_devdata->queue_mutex); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; - prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; + prio++) { struct kbase_context *kctx, *n; unsigned long flags; @@ -2396,14 +2419,13 @@ void kbasep_js_resume(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); list_for_each_entry_safe(kctx, n, - &kbdev->js_data.ctx_list_unpullable[js][prio], - jctx.sched_info.ctx.ctx_list_entry[js]) { + &kbdev->js_data.ctx_list_unpullable[js][prio], + jctx.sched_info.ctx.ctx_list_entry[js]) { struct kbasep_js_kctx_info *js_kctx_info; bool timer_sync = false; /* Drop lock so we can take kctx mutexes */ - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); js_kctx_info = &kctx->jctx.sched_info; @@ -2412,13 +2434,11 @@ void kbasep_js_resume(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && - kbase_js_ctx_pullable(kctx, js, false)) - timer_sync = - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync = kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (timer_sync) kbase_backend_ctx_count_changed(kbdev); @@ -2436,14 +2456,12 @@ void kbasep_js_resume(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); list_for_each_entry_safe(kctx, n, - &kbdev->js_data.ctx_list_unpullable[js][prio], - jctx.sched_info.ctx.ctx_list_entry[js]) { - + &kbdev->js_data.ctx_list_unpullable[js][prio], + jctx.sched_info.ctx.ctx_list_entry[js]) { if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && - kbase_js_ctx_pullable(kctx, js, false)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync |= kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -2464,16 +2482,15 @@ void kbasep_js_resume(struct kbase_device *kbdev) /* JS Resume complete */ } -bool kbase_js_is_atom_valid(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +bool kbase_js_is_atom_valid(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { + CSTD_UNUSED(kbdev); + if ((katom->core_req & BASE_JD_REQ_FS) && - (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | - BASE_JD_REQ_T))) + (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T))) return false; - if ((katom->core_req & BASE_JD_REQ_JOB_SLOT) && - (katom->jobslot >= BASE_JM_MAX_NR_SLOTS)) + if ((katom->core_req & BASE_JD_REQ_JOB_SLOT) && (katom->jobslot >= BASE_JM_MAX_NR_SLOTS)) return false; return true; @@ -2488,16 +2505,14 @@ static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_j return 0; if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { - if (katom->device_nr == 1 && - kbdev->gpu_props.num_core_groups == 2) + if (katom->device_nr == 1 && kbdev->gpu_props.num_core_groups == 2) return 2; } return 1; } -bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom) +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_atom *katom) { bool enqueue_required, add_required = true; @@ -2515,8 +2530,8 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, enqueue_required = false; if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || - (katom->pre_dep && (katom->pre_dep->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { + (katom->pre_dep && + (katom->pre_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { int prio = katom->sched_priority; unsigned int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; @@ -2530,8 +2545,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, add_required = false; } } else { - dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n", - (void *)katom); + dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n", (void *)katom); } if (add_required) { @@ -2543,9 +2557,8 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; } - dev_dbg(kctx->kbdev->dev, - "Enqueue of kctx %pK is %srequired to submit atom %pK\n", - kctx, enqueue_required ? "" : "not ", katom); + dev_dbg(kctx->kbdev->dev, "Enqueue of kctx %pK is %srequired to submit atom %pK\n", kctx, + enqueue_required ? "" : "not ", katom); return enqueue_required; } @@ -2566,30 +2579,24 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); while (katom) { - WARN_ON(!(katom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); if (!kbase_js_atom_blocked_on_x_dep(katom)) { dev_dbg(kctx->kbdev->dev, - "Del atom %pK from X_DEP list in js_move_to_tree\n", - (void *)katom); + "Del atom %pK from X_DEP list in js_move_to_tree\n", (void *)katom); list_del(&katom->queue); - katom->atom_flags &= - ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; /* For incremental rendering, an end-of-renderpass atom * may have had its dependency on start-of-renderpass * ignored and may therefore already be in the tree. */ - if (!(katom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + if (!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { jsctx_tree_add(kctx, katom); - katom->atom_flags |= - KBASE_KATOM_FLAG_JSCTX_IN_TREE; + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; } } else { - dev_dbg(kctx->kbdev->dev, - "Atom %pK blocked on x-dep in js_move_to_tree\n", + dev_dbg(kctx->kbdev->dev, "Atom %pK blocked on x-dep in js_move_to_tree\n", (void *)katom); break; } @@ -2598,7 +2605,6 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) } } - /** * kbase_js_evict_deps - Evict dependencies of a failed atom. * @kctx: Context pointer @@ -2618,23 +2624,23 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom struct kbase_jd_atom *x_dep = katom->x_post_dep; struct kbase_jd_atom *next_katom = katom->post_dep; + CSTD_UNUSED(js); + CSTD_UNUSED(prio); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); if (next_katom) { - KBASE_DEBUG_ASSERT(next_katom->status != - KBASE_JD_ATOM_STATE_HW_COMPLETED); + KBASE_DEBUG_ASSERT(next_katom->status != KBASE_JD_ATOM_STATE_HW_COMPLETED); next_katom->will_fail_event_code = katom->event_code; - } /* Has cross slot depenency. */ - if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | - KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { + if (x_dep && (x_dep->atom_flags & + (KBASE_KATOM_FLAG_JSCTX_IN_TREE | KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { /* Remove dependency.*/ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %pK\n", - (void *)x_dep); + dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %pK\n", (void *)x_dep); /* Fail if it had a data dependency. */ if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) @@ -2661,8 +2667,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) lockdep_assert_held(&kbdev->hwaccess_lock); if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { - dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", - (void *)kctx); + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", (void *)kctx); return NULL; } #ifdef CONFIG_MALI_ARBITER_SUPPORT @@ -2684,8 +2689,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) return NULL; } if (atomic_read(&katom->blocked)) { - dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_pull\n", - (void *)katom); + dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_pull\n", (void *)katom); return NULL; } @@ -2694,24 +2698,22 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) * present on the same slot */ if (katom->pre_dep && kbase_jsctx_slot_atoms_pulled(kctx, js)) { - struct kbase_jd_atom *prev_atom = - kbase_backend_inspect_tail(kbdev, js); + struct kbase_jd_atom *prev_atom = kbase_backend_inspect_tail(kbdev, js); if (prev_atom && prev_atom->kctx != kctx) return NULL; } if (kbase_js_atom_blocked_on_x_dep(katom)) { - if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || - katom->x_pre_dep->will_fail_event_code) { + if (katom->x_pre_dep->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { dev_dbg(kbdev->dev, "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", (void *)katom->x_pre_dep); return NULL; } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && - kbase_backend_nr_atoms_on_slot(kbdev, js)) { + kbase_backend_nr_atoms_on_slot(kbdev, js)) { dev_dbg(kbdev->dev, "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n", (void *)katom, js); @@ -2719,8 +2721,8 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) } } - KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JS_PULL_JOB, kctx, katom, - katom->jc, js, katom->sched_priority); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JS_PULL_JOB, kctx, katom, katom->jc, js, + katom->sched_priority); kbase_ctx_flag_set(kctx, KCTX_PULLED); kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js)); @@ -2770,18 +2772,16 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) return; compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; if (WARN_ON(rp->start_katom != start_katom)) return; - dev_dbg(kctx->kbdev->dev, - "JS return start atom %pK in state %d of RP %d\n", - (void *)start_katom, (int)rp->state, - start_katom->renderpass_id); + dev_dbg(kctx->kbdev->dev, "JS return start atom %pK in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, start_katom->renderpass_id); if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) return; @@ -2790,51 +2790,42 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) * than running out of memory. */ if (rp->state == KBASE_JD_RP_START || rp->state == KBASE_JD_RP_RETRY) { - dev_dbg(kctx->kbdev->dev, - "JS return isn't OOM in state %d of RP %d\n", + dev_dbg(kctx->kbdev->dev, "JS return isn't OOM in state %d of RP %d\n", (int)rp->state, start_katom->renderpass_id); return; } - dev_dbg(kctx->kbdev->dev, - "JS return confirm OOM in state %d of RP %d\n", - (int)rp->state, start_katom->renderpass_id); + dev_dbg(kctx->kbdev->dev, "JS return confirm OOM in state %d of RP %d\n", (int)rp->state, + start_katom->renderpass_id); - if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM && - rp->state != KBASE_JD_RP_RETRY_PEND_OOM)) + if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM && rp->state != KBASE_JD_RP_RETRY_PEND_OOM)) return; /* Prevent the tiler job being pulled for execution in the * job scheduler again. */ - dev_dbg(kbdev->dev, "Blocking start atom %pK\n", - (void *)start_katom); + dev_dbg(kbdev->dev, "Blocking start atom %pK\n", (void *)start_katom); atomic_inc(&start_katom->blocked); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ? - KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM; + rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ? KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM; /* Was the fragment job chain submitted to kbase yet? */ end_katom = rp->end_katom; if (end_katom) { - dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n", - (void *)end_katom); + dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n", (void *)end_katom); if (rp->state == KBASE_JD_RP_RETRY_OOM) { /* Allow the end of the renderpass to be pulled for * execution again to continue incremental rendering. */ - dev_dbg(kbdev->dev, "Unblocking end atom %pK\n", - (void *)end_katom); + dev_dbg(kbdev->dev, "Unblocking end atom %pK\n", (void *)end_katom); atomic_dec(&end_katom->blocked); - WARN_ON(!(end_katom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_TREE)); + WARN_ON(!(end_katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); - kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, - end_katom->slot_nr); + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, end_katom->slot_nr); /* Expect the fragment job chain to be scheduled without * further action because this function is called when @@ -2842,8 +2833,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) */ end_katom = NULL; } else { - WARN_ON(end_katom->status != - KBASE_JD_ATOM_STATE_QUEUED && + WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_QUEUED && end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); } } @@ -2879,20 +2869,18 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) return; compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; if (WARN_ON(rp->end_katom != end_katom)) return; - dev_dbg(kctx->kbdev->dev, - "JS return end atom %pK in state %d of RP %d\n", + dev_dbg(kctx->kbdev->dev, "JS return end atom %pK in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); - if (WARN_ON(rp->state != KBASE_JD_RP_OOM && - rp->state != KBASE_JD_RP_RETRY_OOM)) + if (WARN_ON(rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM)) return; /* Reduce the number of mapped pages in the memory regions that @@ -2904,21 +2892,18 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) while (!list_empty(&rp->oom_reg_list)) { struct kbase_va_region *reg = - list_first_entry(&rp->oom_reg_list, - struct kbase_va_region, link); + list_first_entry(&rp->oom_reg_list, struct kbase_va_region, link); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, - "Reset backing to %zu pages for region %pK\n", + dev_dbg(kbdev->dev, "Reset backing to %zu pages for region %pK\n", reg->threshold_pages, (void *)reg); if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) kbase_mem_shrink(kctx, reg, reg->threshold_pages); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "Deleting region %pK from list\n", - (void *)reg); + dev_dbg(kbdev->dev, "Deleting region %pK from list\n", (void *)reg); list_del_init(®->link); kbase_va_region_alloc_put(kctx, reg); } @@ -2935,11 +2920,9 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) */ start_katom = rp->start_katom; if (!WARN_ON(!start_katom)) { - dev_dbg(kbdev->dev, "Unblocking start atom %pK\n", - (void *)start_katom); + dev_dbg(kbdev->dev, "Unblocking start atom %pK\n", (void *)start_katom); atomic_dec(&start_katom->blocked); - (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, - start_katom->slot_nr); + (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, start_katom->slot_nr); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -2947,8 +2930,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) static void js_return_worker(struct work_struct *data) { - struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, - work); + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); struct kbase_context *kctx = katom->kctx; struct kbase_device *kbdev = kctx->kbdev; struct kbasep_js_device_data *js_devdata = &kbdev->js_data; @@ -2962,8 +2944,8 @@ static void js_return_worker(struct work_struct *data) base_jd_core_req core_req = katom->core_req; u64 cache_jc = katom->jc; - dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n", - __func__, (void *)katom, katom->event_code); + dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n", __func__, (void *)katom, + katom->event_code); KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER, kctx, katom, katom->jc, 0); @@ -2984,8 +2966,7 @@ static void js_return_worker(struct work_struct *data) slot_became_unblocked = kbase_jsctx_slot_atom_pulled_dec(kctx, katom); - if (!kbase_jsctx_slot_atoms_pulled(kctx, js) && - jsctx_rb_none_to_pull(kctx, js)) + if (!kbase_jsctx_slot_atoms_pulled(kctx, js) && jsctx_rb_none_to_pull(kctx, js)) timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); /* If the context is now unblocked on this slot after soft-stopped @@ -2994,20 +2975,14 @@ static void js_return_worker(struct work_struct *data) */ if (slot_became_unblocked && kbase_jsctx_atoms_pulled(kctx) && kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, js); + timer_sync |= kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, js); if (!kbase_jsctx_atoms_pulled(kctx)) { - dev_dbg(kbdev->dev, - "No atoms currently pulled from context %pK\n", - (void *)kctx); + dev_dbg(kbdev->dev, "No atoms currently pulled from context %pK\n", (void *)kctx); if (!kctx->slots_pullable) { - dev_dbg(kbdev->dev, - "Context %pK %s counted as runnable\n", - (void *)kctx, - kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? - "is" : "isn't"); + dev_dbg(kbdev->dev, "Context %pK %s counted as runnable\n", (void *)kctx, + kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? "is" : "isn't"); WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); @@ -3015,8 +2990,7 @@ static void js_return_worker(struct work_struct *data) timer_sync = true; } - if (kctx->as_nr != KBASEP_AS_NR_INVALID && - !kbase_ctx_flag(kctx, KCTX_DYING)) { + if (kctx->as_nr != KBASEP_AS_NR_INVALID && !kbase_ctx_flag(kctx, KCTX_DYING)) { int num_slots = kbdev->gpu_props.num_job_slots; int slot; @@ -3025,9 +2999,8 @@ static void js_return_worker(struct work_struct *data) for (slot = 0; slot < num_slots; slot++) { if (kbase_js_ctx_pullable(kctx, slot, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, slot); + timer_sync |= kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, slot); } } @@ -3039,11 +3012,8 @@ static void js_return_worker(struct work_struct *data) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (context_idle) { - dev_dbg(kbdev->dev, - "Context %pK %s counted as active\n", - (void *)kctx, - kbase_ctx_flag(kctx, KCTX_ACTIVE) ? - "is" : "isn't"); + dev_dbg(kbdev->dev, "Context %pK %s counted as active\n", (void *)kctx, + kbase_ctx_flag(kctx, KCTX_ACTIVE) ? "is" : "isn't"); WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); kbase_pm_context_idle(kbdev); @@ -3066,29 +3036,24 @@ static void js_return_worker(struct work_struct *data) } dev_dbg(kbdev->dev, "JS: retained state %s finished", - kbasep_js_has_atom_finished(&retained_state) ? - "has" : "hasn't"); + kbasep_js_has_atom_finished(&retained_state) ? "has" : "hasn't"); WARN_ON(kbasep_js_has_atom_finished(&retained_state)); - kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, - &retained_state); + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &retained_state); kbase_js_sched_all(kbdev); kbase_backend_complete_wq_post_sched(kbdev, core_req); - KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER_END, kctx, NULL, cache_jc, - 0); + KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER_END, kctx, NULL, cache_jc, 0); - dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n", - __func__, (void *)katom); + dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n", __func__, (void *)katom); } void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - dev_dbg(kctx->kbdev->dev, "Unpulling atom %pK in kctx %pK\n", - (void *)katom, (void *)kctx); + dev_dbg(kctx->kbdev->dev, "Unpulling atom %pK in kctx %pK\n", (void *)katom, (void *)kctx); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -3121,7 +3086,7 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) * Return: true if caller should call kbase_backend_ctx_count_changed() */ static bool js_complete_start_rp(struct kbase_context *kctx, - struct kbase_jd_atom *const start_katom) + struct kbase_jd_atom *const start_katom) { struct kbase_device *const kbdev = kctx->kbdev; struct kbase_jd_renderpass *rp; @@ -3133,28 +3098,24 @@ static bool js_complete_start_rp(struct kbase_context *kctx, return false; compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; if (WARN_ON(rp->start_katom != start_katom)) return false; - dev_dbg(kctx->kbdev->dev, - "Start atom %pK is done in state %d of RP %d\n", - (void *)start_katom, (int)rp->state, - start_katom->renderpass_id); + dev_dbg(kctx->kbdev->dev, "Start atom %pK is done in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, start_katom->renderpass_id); if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) return false; - if (rp->state == KBASE_JD_RP_PEND_OOM || - rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { + if (rp->state == KBASE_JD_RP_PEND_OOM || rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { unsigned long flags; - dev_dbg(kctx->kbdev->dev, - "Start atom %pK completed before soft-stop\n", + dev_dbg(kctx->kbdev->dev, "Start atom %pK completed before soft-stop\n", (void *)start_katom); kbase_gpu_vm_lock(kctx); @@ -3162,12 +3123,10 @@ static bool js_complete_start_rp(struct kbase_context *kctx, while (!list_empty(&rp->oom_reg_list)) { struct kbase_va_region *reg = - list_first_entry(&rp->oom_reg_list, - struct kbase_va_region, link); + list_first_entry(&rp->oom_reg_list, struct kbase_va_region, link); WARN_ON(reg->flags & KBASE_REG_VA_FREED); - dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n", - (void *)reg); + dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n", (void *)reg); list_del_init(®->link); kbase_va_region_alloc_put(kctx, reg); } @@ -3175,16 +3134,13 @@ static bool js_complete_start_rp(struct kbase_context *kctx, spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_gpu_vm_unlock(kctx); } else { - dev_dbg(kctx->kbdev->dev, - "Start atom %pK did not exceed memory threshold\n", + dev_dbg(kctx->kbdev->dev, "Start atom %pK did not exceed memory threshold\n", (void *)start_katom); - WARN_ON(rp->state != KBASE_JD_RP_START && - rp->state != KBASE_JD_RP_RETRY); + WARN_ON(rp->state != KBASE_JD_RP_START && rp->state != KBASE_JD_RP_RETRY); } - if (rp->state == KBASE_JD_RP_RETRY || - rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { + if (rp->state == KBASE_JD_RP_RETRY || rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { struct kbase_jd_atom *const end_katom = rp->end_katom; if (!WARN_ON(!end_katom)) { @@ -3193,13 +3149,12 @@ static bool js_complete_start_rp(struct kbase_context *kctx, /* Allow the end of the renderpass to be pulled for * execution again to continue incremental rendering. */ - dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n", - (void *)end_katom); + dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n", (void *)end_katom); atomic_dec(&end_katom->blocked); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - timer_sync = kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, end_katom->slot_nr); + timer_sync = kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, + end_katom->slot_nr); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } } @@ -3216,8 +3171,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, * without the tiler job chain at the start using too much memory; otherwise * completion of the end-of-renderpass atom is handled similarly to a soft-stop. */ -static void js_complete_end_rp(struct kbase_context *kctx, - struct kbase_jd_atom *const end_katom) +static void js_complete_end_rp(struct kbase_context *kctx, struct kbase_jd_atom *const end_katom) { struct kbase_device *const kbdev = kctx->kbdev; unsigned long flags; @@ -3229,20 +3183,19 @@ static void js_complete_end_rp(struct kbase_context *kctx, return; compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; if (WARN_ON(rp->end_katom != end_katom)) return; - dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n", - (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n", (void *)end_katom, + (int)rp->state, end_katom->renderpass_id); - if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || - WARN_ON(rp->state == KBASE_JD_RP_OOM) || - WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM)) + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || WARN_ON(rp->state == KBASE_JD_RP_OOM) || + WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM)) return; /* Rendering completed without running out of memory. @@ -3252,12 +3205,10 @@ static void js_complete_end_rp(struct kbase_context *kctx, rp->state = KBASE_JD_RP_COMPLETE; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "Renderpass %d is complete\n", - end_katom->renderpass_id); + dev_dbg(kbdev->dev, "Renderpass %d is complete\n", end_katom->renderpass_id); } -bool kbase_js_complete_atom_wq(struct kbase_context *kctx, - struct kbase_jd_atom *katom) +bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom) { struct kbasep_js_kctx_info *js_kctx_info; struct kbasep_js_device_data *js_devdata; @@ -3271,8 +3222,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, kbdev = kctx->kbdev; atom_slot = katom->slot_nr; - dev_dbg(kbdev->dev, "%s for atom %pK (s:%d)\n", - __func__, (void *)katom, atom_slot); + dev_dbg(kbdev->dev, "%s for atom %pK (s:%d)\n", __func__, (void *)katom, atom_slot); /* Update the incremental rendering state machine. */ @@ -3292,11 +3242,9 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { bool slot_became_unblocked; - dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n", - (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n", (void *)katom); - slot_became_unblocked = - kbase_jsctx_slot_atom_pulled_dec(kctx, katom); + slot_became_unblocked = kbase_jsctx_slot_atom_pulled_dec(kctx, katom); context_idle = !kbase_jsctx_atoms_pulled(kctx); if (!kbase_jsctx_atoms_pulled(kctx) && !kctx->slots_pullable) { @@ -3316,19 +3264,16 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, (void *)kctx, atom_slot, prio); if (kbase_js_ctx_pullable(kctx, atom_slot, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, atom_slot); + timer_sync |= kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, + atom_slot); } } WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); if (!kbase_jsctx_slot_atoms_pulled(kctx, atom_slot) && jsctx_rb_none_to_pull(kctx, atom_slot)) { - if (!list_empty( - &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) - timer_sync |= kbase_js_ctx_list_remove_nolock( - kctx->kbdev, kctx, atom_slot); + if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) + timer_sync |= kbase_js_ctx_list_remove_nolock(kctx->kbdev, kctx, atom_slot); } /* @@ -3336,8 +3281,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * atom failure) and there are now no atoms left in the system then * re-enable submission so that context can be scheduled again. */ - if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && - !kbase_jsctx_atoms_pulled(kctx) && + if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && !kbase_jsctx_atoms_pulled(kctx) && !kbase_ctx_flag(kctx, KCTX_DYING)) { unsigned int js; @@ -3346,18 +3290,15 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, js); } - } else if (katom->x_post_dep && - kbasep_js_is_submit_allowed(js_devdata, kctx)) { + } else if (katom->x_post_dep && kbasep_js_is_submit_allowed(js_devdata, kctx)) { unsigned int js; for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, js); } } @@ -3365,8 +3306,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * jd_done_worker(). */ if (context_idle) { - dev_dbg(kbdev->dev, "kctx %pK is no longer active\n", - (void *)kctx); + dev_dbg(kbdev->dev, "kctx %pK is no longer active\n", (void *)kctx); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); } @@ -3407,18 +3347,16 @@ static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) return true; compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; if (WARN_ON(rp->end_katom != end_katom)) return true; - dev_dbg(kbdev->dev, - "JS complete end atom %pK in state %d of RP %d\n", - (void *)end_katom, (int)rp->state, - end_katom->renderpass_id); + dev_dbg(kbdev->dev, "JS complete end atom %pK in state %d of RP %d\n", (void *)end_katom, + (int)rp->state, end_katom->renderpass_id); if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) return true; @@ -3429,29 +3367,26 @@ static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) if (end_katom->event_code != BASE_JD_EVENT_DONE) return true; - if (rp->state != KBASE_JD_RP_OOM && - rp->state != KBASE_JD_RP_RETRY_OOM) + if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM) return true; dev_dbg(kbdev->dev, "Suppressing end atom completion\n"); return false; } -struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, - ktime_t *end_timestamp) +struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) { struct kbase_device *kbdev; struct kbase_context *kctx = katom->kctx; struct kbase_jd_atom *x_dep = katom->x_post_dep; kbdev = kctx->kbdev; - dev_dbg(kbdev->dev, "Atom %pK complete in kctx %pK (post-dep %pK)\n", - (void *)katom, (void *)kctx, (void *)x_dep); + dev_dbg(kbdev->dev, "Atom %pK complete in kctx %pK (post-dep %pK)\n", (void *)katom, + (void *)kctx, (void *)x_dep); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) && - !js_end_rp_is_complete(katom)) { + if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) && !js_end_rp_is_complete(katom)) { katom->event_code = BASE_JD_EVENT_END_RP_DONE; kbase_js_unpull(kctx, katom); return NULL; @@ -3469,46 +3404,38 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, } if (katom->event_code != BASE_JD_EVENT_DONE) { - kbase_js_evict_deps(kctx, katom, katom->slot_nr, - katom->sched_priority); + kbase_js_evict_deps(kctx, katom, katom->slot_nr, katom->sched_priority); } - KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, - katom->slot_nr, 0, TL_JS_EVENT_STOP); + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, katom->slot_nr, 0, TL_JS_EVENT_STOP); - trace_sysgraph_gpu(SGR_COMPLETE, kctx->id, - kbase_jd_atom_id(katom->kctx, katom), katom->slot_nr); + trace_sysgraph_gpu(SGR_COMPLETE, kctx->id, kbase_jd_atom_id(katom->kctx, katom), + katom->slot_nr); KBASE_TLSTREAM_TL_JD_DONE_START(kbdev, katom); kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); KBASE_TLSTREAM_TL_JD_DONE_END(kbdev, katom); /* Unblock cross dependency if present */ - if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || - !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && - (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)) { - bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, - false); + if (x_dep && + (katom->event_code == BASE_JD_EVENT_DONE || + !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && + (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)) { + bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, false); x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %pK\n", - (void *)x_dep); + dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %pK\n", (void *)x_dep); kbase_js_move_to_tree(x_dep); - if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, - false)) - kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, - x_dep->slot_nr); + if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, false)) + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, x_dep->slot_nr); if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { - dev_dbg(kbdev->dev, "Atom %pK is in runnable tree\n", - (void *)x_dep); + dev_dbg(kbdev->dev, "Atom %pK is in runnable tree\n", (void *)x_dep); return x_dep; } } else { - dev_dbg(kbdev->dev, - "No cross-slot dep to unblock for atom %pK\n", - (void *)katom); + dev_dbg(kbdev->dev, "No cross-slot dep to unblock for atom %pK\n", (void *)katom); } return NULL; @@ -3536,8 +3463,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) lockdep_assert_held(&kbdev->hwaccess_lock); - if (!(katom->atom_flags & - KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency", (void *)katom); return false; @@ -3550,8 +3476,8 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) } compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[katom->renderpass_id]; /* We can read a subset of renderpass state without holding @@ -3560,8 +3486,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) WARN_ON(rp->state == KBASE_JD_RP_COMPLETE); - dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n", - (int)rp->state); + dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n", (int)rp->state); if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM) return true; @@ -3575,8 +3500,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) return true; } - dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n", - (void *)katom->x_pre_dep); + dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n", (void *)katom->x_pre_dep); return false; } @@ -3584,15 +3508,15 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) { struct kbasep_js_device_data *js_devdata; - struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS]; + struct kbase_context *last_active_kctx[BASE_JM_MAX_NR_SLOTS]; bool timer_sync = false; bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; unsigned int js; KBASE_TLSTREAM_TL_JS_SCHED_START(kbdev, 0); - dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n", - __func__, (void *)kbdev, (unsigned int)js_mask); + dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n", __func__, (void *)kbdev, + (unsigned int)js_mask); js_devdata = &kbdev->js_data; @@ -3600,7 +3524,7 @@ void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) mutex_lock(&js_devdata->queue_mutex); for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { - last_active[js] = kbdev->hwaccess.active_kctx[js]; + last_active_kctx[js] = kbdev->hwaccess.active_kctx[js]; ctx_waiting[js] = false; } @@ -3627,50 +3551,39 @@ void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) js); if (kbase_pm_context_active_handle_suspend( - kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { dev_dbg(kbdev->dev, "Suspend pending (s:%u)\n", js); /* Suspend pending - return context to * queue and stop scheduling */ - mutex_lock( - &kctx->jctx.sched_info.ctx.jsctx_mutex); - if (kbase_js_ctx_list_add_pullable_head( - kctx->kbdev, kctx, js)) + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + if (kbase_js_ctx_list_add_pullable_head(kctx->kbdev, kctx, + js)) kbase_js_sync_timers(kbdev); - mutex_unlock( - &kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); up(&js_devdata->schedule_sem); - KBASE_TLSTREAM_TL_JS_SCHED_END(kbdev, - 0); + KBASE_TLSTREAM_TL_JS_SCHED_END(kbdev, 0); return; } kbase_ctx_flag_set(kctx, KCTX_ACTIVE); } if (!kbase_js_use_ctx(kbdev, kctx, js)) { - mutex_lock( - &kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - dev_dbg(kbdev->dev, - "kctx %pK cannot be used at this time\n", - kctx); + dev_dbg(kbdev->dev, "kctx %pK cannot be used at this time\n", kctx); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbase_js_ctx_pullable(kctx, js, false) - || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) - timer_sync |= - kbase_js_ctx_list_add_pullable_head_nolock( - kctx->kbdev, kctx, js); + if (kbase_js_ctx_pullable(kctx, js, false) || + kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) + timer_sync |= kbase_js_ctx_list_add_pullable_head_nolock( + kctx->kbdev, kctx, js); else - timer_sync |= - kbase_js_ctx_list_add_unpullable_nolock( - kctx->kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); - mutex_unlock( - &kctx->jctx.sched_info.ctx.jsctx_mutex); + timer_sync |= kbase_js_ctx_list_add_unpullable_nolock( + kctx->kbdev, kctx, js); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); if (context_idle) { WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); @@ -3696,32 +3609,23 @@ void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) dev_dbg(kbdev->dev, "No atoms pulled from kctx %pK (s:%u)\n", (void *)kctx, js); - pullable = kbase_js_ctx_pullable(kctx, js, - true); + pullable = kbase_js_ctx_pullable(kctx, js, true); /* Failed to pull jobs - push to head of list. * Unless this context is already 'active', in * which case it's effectively already scheduled * so push it to the back of the list. */ - if (pullable && kctx == last_active[js] && - kbase_ctx_flag(kctx, - (KCTX_PULLED_SINCE_ACTIVE_JS0 << - js))) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kctx->kbdev, - kctx, js); + if (pullable && kctx == last_active_kctx[js] && + kbase_ctx_flag(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js))) + timer_sync |= kbase_js_ctx_list_add_pullable_nolock( + kctx->kbdev, kctx, js); else if (pullable) - timer_sync |= - kbase_js_ctx_list_add_pullable_head_nolock( - kctx->kbdev, - kctx, js); + timer_sync |= kbase_js_ctx_list_add_pullable_head_nolock( + kctx->kbdev, kctx, js); else - timer_sync |= - kbase_js_ctx_list_add_unpullable_nolock( - kctx->kbdev, - kctx, js); + timer_sync |= kbase_js_ctx_list_add_unpullable_nolock( + kctx->kbdev, kctx, js); /* If this context is not the active context, * but the active context is pullable on this @@ -3730,41 +3634,32 @@ void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) * the IRQ handler, which would prevent this * context from making progress. */ - if (last_active[js] && kctx != last_active[js] - && kbase_js_ctx_pullable( - last_active[js], js, true)) + if (last_active_kctx[js] && kctx != last_active_kctx[js] && + kbase_js_ctx_pullable(last_active_kctx[js], js, true)) ctx_waiting[js] = true; if (context_idle) { kbase_jm_idle_ctx(kbdev, kctx); - spin_unlock_irqrestore( - &kbdev->hwaccess_lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); kbase_pm_context_idle(kbdev); } else { - spin_unlock_irqrestore( - &kbdev->hwaccess_lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } - mutex_unlock( - &kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); js_mask &= ~(1 << js); break; /* Could not run atoms on this slot */ } - dev_dbg(kbdev->dev, "Push kctx %pK to back of list\n", - (void *)kctx); + dev_dbg(kbdev->dev, "Push kctx %pK to back of list\n", (void *)kctx); if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kctx->kbdev, kctx, js); + timer_sync |= kbase_js_ctx_list_add_pullable_nolock(kctx->kbdev, + kctx, js); else - timer_sync |= - kbase_js_ctx_list_add_unpullable_nolock( - kctx->kbdev, kctx, js); + timer_sync |= kbase_js_ctx_list_add_unpullable_nolock(kctx->kbdev, + kctx, js); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); @@ -3775,10 +3670,9 @@ void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) kbase_js_sync_timers(kbdev); for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { - if (kbdev->hwaccess.active_kctx[js] == last_active[js] && - ctx_waiting[js]) { + if (kbdev->hwaccess.active_kctx[js] == last_active_kctx[js] && ctx_waiting[js]) { dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", - (void *)last_active[js], js); + (void *)last_active_kctx[js], js); kbdev->hwaccess.active_kctx[js] = NULL; } } @@ -3853,10 +3747,8 @@ void kbase_js_zap_context(struct kbase_context *kctx) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - if (!list_empty( - &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) - list_del_init( - &kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -3871,7 +3763,8 @@ void kbase_js_zap_context(struct kbase_context *kctx) * back (this already cancels the jobs) */ - KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, + kbase_ctx_flag(kctx, KCTX_SCHEDULED)); dev_dbg(kbdev->dev, "Zap: Ctx %pK scheduled=0", kctx); @@ -3894,7 +3787,8 @@ void kbase_js_zap_context(struct kbase_context *kctx) /* Case c: didn't evict, but it is scheduled - it's in the Run * Pool */ - KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, + kbase_ctx_flag(kctx, KCTX_SCHEDULED)); dev_dbg(kbdev->dev, "Zap: Ctx %pK is in RunPool", kctx); /* Disable the ctx from submitting any more jobs */ @@ -3926,8 +3820,8 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_unlock(&js_devdata->queue_mutex); mutex_unlock(&kctx->jctx.lock); - dev_dbg(kbdev->dev, "Zap: Ctx %pK Release (may or may not schedule out immediately)", - kctx); + dev_dbg(kbdev->dev, + "Zap: Ctx %pK Release (may or may not schedule out immediately)", kctx); kbasep_js_runpool_release_ctx(kbdev, kctx); } @@ -3944,9 +3838,9 @@ void kbase_js_zap_context(struct kbase_context *kctx) */ } -static inline int trace_get_refcnt(struct kbase_device *kbdev, - struct kbase_context *kctx) +static inline int trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx) { + CSTD_UNUSED(kbdev); return atomic_read(&kctx->refcount); } @@ -3968,8 +3862,7 @@ static inline int trace_get_refcnt(struct kbase_device *kbdev, * The locking conditions on the caller are as follows: * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. */ -static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, - kbasep_js_ctx_job_cb *callback) +static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbasep_js_ctx_job_cb *callback) { struct kbase_device *kbdev; unsigned long flags; @@ -3979,8 +3872,8 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, - 0u, trace_get_refcnt(kbdev, kctx)); + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, 0u, + trace_get_refcnt(kbdev, kctx)); /* Invoke callback on jobs on each slot in turn */ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.h b/drivers/gpu/arm/bifrost/mali_kbase_js.h index 96974c8c6439..89c3b45c735b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,4 +33,4 @@ #include "jm/mali_kbase_jm_js.h" #include "jm/mali_kbase_js_defs.h" -#endif /* _KBASE_JS_H_ */ +#endif /* _KBASE_JS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c index 04ea06b2f381..6fc6b8a93027 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,7 +45,9 @@ * or similar is called sometime later. * false indicates no change in ctx attributes state of the runpool. */ -static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, + struct kbase_context *kctx, + enum kbasep_js_ctx_attr attribute) { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; @@ -69,7 +71,8 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { /* First refcount indicates a state change */ runpool_state_changed = true; - KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, + attribute); } } @@ -95,7 +98,9 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s * or similar is called sometime later. * false indicates no change in ctx attributes state of the runpool. */ -static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, + struct kbase_context *kctx, + enum kbasep_js_ctx_attr attribute) { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; @@ -118,7 +123,8 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { /* Last de-refcount indicates a state change */ runpool_state_changed = true; - KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, + attribute); } } @@ -141,7 +147,9 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, * This may allow the scheduler to submit more jobs than previously. * false indicates no change in ctx attributes state of the runpool. */ -static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, + struct kbase_context *kctx, + enum kbasep_js_ctx_attr attribute) { struct kbasep_js_kctx_info *js_kctx_info; bool runpool_state_changed = false; @@ -157,10 +165,12 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); - if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); - runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); + runpool_state_changed = + kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); } return runpool_state_changed; @@ -182,7 +192,9 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc * This may allow the scheduler to submit more jobs than previously. * false indicates no change in ctx attributes state of the runpool. */ -static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, + struct kbase_context *kctx, + enum kbasep_js_ctx_attr attribute) { struct kbasep_js_kctx_info *js_kctx_info; bool runpool_state_changed = false; @@ -195,10 +207,12 @@ static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, stru lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); - if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { lockdep_assert_held(&kbdev->hwaccess_lock); /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ - runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); + runpool_state_changed = + kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); } @@ -219,9 +233,10 @@ void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kb /* Retain any existing attributes */ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { - if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr)i) != false) { /* The context is being scheduled in, so update the runpool with the new attributes */ - runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr( + kbdev, kctx, (enum kbasep_js_ctx_attr)i); /* We don't need to know about state changed, because retaining a * context occurs on scheduling it, and that itself will also try @@ -239,16 +254,18 @@ bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct k /* Release any existing attributes */ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { - if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr)i) != false) { /* The context is being scheduled out, so update the runpool on the removed attributes */ - runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr( + kbdev, kctx, (enum kbasep_js_ctx_attr)i); } } return runpool_state_changed; } -void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) +void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *katom) { bool runpool_state_changed = false; base_jd_core_req core_req; @@ -257,13 +274,17 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase core_req = katom->core_req; if (core_req & BASE_JD_REQ_ONLY_COMPUTE) - runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + runpool_state_changed |= + kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); else - runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr( + kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); - if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { + if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && + (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { /* Atom that can run on slot1 or slot2, and can use all cores */ - runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr( + kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); } /* We don't need to know about state changed, because retaining an atom @@ -272,7 +293,8 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase CSTD_UNUSED(runpool_state_changed); } -bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state) +bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state) { bool runpool_state_changed = false; base_jd_core_req core_req; @@ -285,13 +307,17 @@ bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbas return false; if (core_req & BASE_JD_REQ_ONLY_COMPUTE) - runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr( + kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); else - runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr( + kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); - if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { + if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && + (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { /* Atom that can run on slot1 or slot2, and can use all cores */ - runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr( + kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); } return runpool_state_changed; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h index 2dc640d5ab82..471f54081d75 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2015, 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -77,7 +77,8 @@ bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct k * - jsctx mutex * - If the context is scheduled, then runpool_irq spinlock must also be held */ -void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); +void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *katom); /** * kbasep_js_ctx_attr_ctx_release_atom - Release all attributes of an atom, @@ -101,13 +102,15 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase * or similar is called sometime later. * false indicates no change in ctx attributes state of the runpool. */ -bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); +bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state); /* * Requires: * - runpool_irq spinlock */ -static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) +static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, + enum kbasep_js_ctx_attr attribute) { struct kbasep_js_device_data *js_devdata; @@ -122,17 +125,19 @@ static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, * Requires: * - runpool_irq spinlock */ -static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) +static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, + enum kbasep_js_ctx_attr attribute) { /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */ - return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); + return (bool)kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); } /* * Requires: * - jsctx mutex */ -static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, + enum kbasep_js_ctx_attr attribute) { struct kbasep_js_kctx_info *js_kctx_info; @@ -141,7 +146,7 @@ static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, js_kctx_info = &kctx->jctx.sched_info; /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */ - return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + return (bool)(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); } -#endif /* _KBASE_JS_DEFS_H_ */ +#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c index 14a730dc5a12..c6a66be98178 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -62,7 +62,7 @@ #include #elif !defined(static_assert) // Stringify the expression if no message is given. -#define static_assert(e, ...) __static_assert(e, #__VA_ARGS__, #e) +#define static_assert(e, ...) __static_assert(e, #__VA_ARGS__, #e) #define __static_assert(e, msg, ...) _Static_assert(e, msg) #endif @@ -145,9 +145,8 @@ struct kbase_kinstr_jm_atom_state_change { u8 padding[4]; } data; }; -static_assert( - ((1 << 8 * sizeof(((struct kbase_kinstr_jm_atom_state_change *)0)->state)) - 1) >= - KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT); +static_assert(((1 << 8 * sizeof(((struct kbase_kinstr_jm_atom_state_change *)0)->state)) - 1) >= + KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT); #define KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW BIT(0) @@ -228,8 +227,7 @@ static inline bool reader_changes_is_valid_size(const size_t size) * -ERANGE - the requested memory size was invalid * -ENOMEM - could not allocate the memory */ -static int reader_changes_init(struct reader_changes *const changes, - const size_t size) +static int reader_changes_init(struct reader_changes *const changes, const size_t size) { BUILD_BUG_ON((PAGE_SIZE % sizeof(*changes->data)) != 0); @@ -246,8 +244,8 @@ static int reader_changes_init(struct reader_changes *const changes, mutex_init(&changes->consumer); changes->size = size / sizeof(*changes->data); - changes->threshold = min(((size_t)(changes->size)) / 4, - ((size_t)(PAGE_SIZE)) / sizeof(*changes->data)); + changes->threshold = + min(((size_t)(changes->size)) / 4, ((size_t)(PAGE_SIZE)) / sizeof(*changes->data)); return changes->size; } @@ -326,10 +324,9 @@ static u32 reader_changes_count(struct reader_changes *const changes) * userspace. Kicked when a threshold is reached or there is * overflow. */ -static void reader_changes_push( - struct reader_changes *const changes, - const struct kbase_kinstr_jm_atom_state_change *const change, - wait_queue_head_t *const wait_queue) +static void reader_changes_push(struct reader_changes *const changes, + const struct kbase_kinstr_jm_atom_state_change *const change, + wait_queue_head_t *const wait_queue) { u32 head, tail, size, space; unsigned long irq; @@ -350,8 +347,7 @@ static void reader_changes_push( if (space >= 1) { data[head] = *change; if (space == 1) { - data[head].flags |= - KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW; + data[head].flags |= KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW; pr_warn(PR_ "overflow of circular buffer\n"); } smp_store_release(&changes->head, (head + 1) & (size - 1)); @@ -394,11 +390,10 @@ struct reader { struct kbase_kinstr_jm *context; }; -static struct kbase_kinstr_jm * -kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx); +static struct kbase_kinstr_jm *kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx); static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx); static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, - struct reader *const reader); + struct reader *const reader); static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, struct reader *const reader); @@ -428,8 +423,7 @@ static void reader_term(struct reader *const reader) * * Return: 0 on success, else error code. */ -static int reader_init(struct reader **const out_reader, - struct kbase_kinstr_jm *const ctx, +static int reader_init(struct reader **const out_reader, struct kbase_kinstr_jm *const ctx, size_t const num_changes) { struct reader *reader = NULL; @@ -478,6 +472,8 @@ static int reader_release(struct inode *const node, struct file *const file) { struct reader *const reader = file->private_data; + CSTD_UNUSED(node); + reader_term(reader); file->private_data = NULL; @@ -493,12 +489,10 @@ static int reader_release(struct inode *const node, struct file *const file) * Return: The number of bytes copied or negative errno on failure. */ static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, - char __user *buffer, - size_t buffer_size) + char __user *buffer, size_t buffer_size) { ssize_t ret = 0; - struct kbase_kinstr_jm_atom_state_change const *src_buf = READ_ONCE( - changes->data); + struct kbase_kinstr_jm_atom_state_change const *src_buf = READ_ONCE(changes->data); size_t const entry_size = sizeof(*src_buf); size_t changes_tail, changes_count, read_size; @@ -506,9 +500,8 @@ static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, * Note that we can't use is_power_of_2() since old compilers don't * understand it's a constant expression. */ -#define is_power_of_two(x) ((x) && !((x) & ((x) - 1))) - static_assert(is_power_of_two( - sizeof(struct kbase_kinstr_jm_atom_state_change))); +#define is_power_of_two(x) ((x) && !((x) & ((x)-1))) + static_assert(is_power_of_two(sizeof(struct kbase_kinstr_jm_atom_state_change))); #undef is_power_of_two lockdep_assert_held_once(&changes->consumer); @@ -523,8 +516,7 @@ static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, do { changes_tail = changes->tail; changes_count = reader_changes_count_locked(changes); - read_size = min(changes_count * entry_size, - buffer_size & ~(entry_size - 1)); + read_size = min(changes_count * entry_size, buffer_size & ~(entry_size - 1)); if (!read_size) break; @@ -535,8 +527,7 @@ static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, buffer += read_size; buffer_size -= read_size; ret += read_size; - changes_tail = (changes_tail + read_size / entry_size) & - (changes->size - 1); + changes_tail = (changes_tail + read_size / entry_size) & (changes->size - 1); smp_store_release(&changes->tail, changes_tail); } while (read_size); @@ -564,15 +555,15 @@ static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, * Note: The number of bytes read will always be a multiple of the size of an * entry. */ -static ssize_t reader_read(struct file *const filp, - char __user *const buffer, - size_t const buffer_size, - loff_t *const offset) +static ssize_t reader_read(struct file *const filp, char __user *const buffer, + size_t const buffer_size, loff_t *const offset) { struct reader *const reader = filp->private_data; struct reader_changes *changes; ssize_t ret; + CSTD_UNUSED(offset); + if (!reader) return -EBADF; @@ -596,9 +587,8 @@ static ssize_t reader_read(struct file *const filp, goto exit; } - if (wait_event_interruptible( - reader->wait_queue, - !!reader_changes_count_locked(changes))) { + if (wait_event_interruptible(reader->wait_queue, + !!reader_changes_count_locked(changes))) { ret = -EINTR; goto exit; } @@ -625,8 +615,7 @@ exit: * * EPOLLHUP | EPOLLERR - IO control arguments were invalid or the file * descriptor did not have an attached reader. */ -static __poll_t reader_poll(struct file *const file, - struct poll_table_struct *const wait) +static __poll_t reader_poll(struct file *const file, struct poll_table_struct *const wait) { struct reader *reader; struct reader_changes *changes; @@ -652,13 +641,11 @@ static __poll_t reader_poll(struct file *const file, } /* The file operations virtual function table */ -static const struct file_operations file_operations = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = reader_read, - .poll = reader_poll, - .release = reader_release -}; +static const struct file_operations file_operations = { .owner = THIS_MODULE, + .llseek = no_llseek, + .read = reader_read, + .poll = reader_poll, + .release = reader_release }; /* The maximum amount of readers that can be created on a context. */ static const size_t kbase_kinstr_jm_readers_max = 16; @@ -669,8 +656,7 @@ static const size_t kbase_kinstr_jm_readers_max = 16; */ static void kbase_kinstr_jm_release(struct kref *const ref) { - struct kbase_kinstr_jm *const ctx = - container_of(ref, struct kbase_kinstr_jm, refcount); + struct kbase_kinstr_jm *const ctx = container_of(ref, struct kbase_kinstr_jm, refcount); kfree(ctx); } @@ -680,8 +666,7 @@ static void kbase_kinstr_jm_release(struct kref *const ref) * @ctx: the context to reference count * Return: the reference counted context */ -static struct kbase_kinstr_jm * -kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx) +static struct kbase_kinstr_jm *kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx) { if (likely(ctx)) kref_get(&ctx->refcount); @@ -708,7 +693,7 @@ static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx) * -ENOMEM - too many readers already added. */ static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, - struct reader *const reader) + struct reader *const reader) { struct hlist_bl_head *const readers = &ctx->readers; struct hlist_bl_node *node; @@ -751,16 +736,14 @@ static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, static_branch_dec(&basep_kinstr_jm_reader_static_key); } -int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, - union kbase_kinstr_jm_fd *jm_fd_arg) +int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, union kbase_kinstr_jm_fd *jm_fd_arg) { struct kbase_kinstr_jm_fd_in const *in; struct reader *reader; - size_t const change_size = sizeof(struct - kbase_kinstr_jm_atom_state_change); + size_t const change_size = sizeof(struct kbase_kinstr_jm_atom_state_change); int status; int fd; - int i; + size_t i; if (!ctx || !jm_fd_arg) return -EINVAL; @@ -782,8 +765,7 @@ int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, jm_fd_arg->out.size = change_size; memset(&jm_fd_arg->out.padding, 0, sizeof(jm_fd_arg->out.padding)); - fd = anon_inode_getfd("[mali_kinstr_jm]", &file_operations, reader, - O_CLOEXEC); + fd = anon_inode_getfd("[mali_kinstr_jm]", &file_operations, reader, O_CLOEXEC); if (fd < 0) reader_term(reader); @@ -814,16 +796,15 @@ void kbase_kinstr_jm_term(struct kbase_kinstr_jm *const ctx) kbase_kinstr_jm_ref_put(ctx); } -void kbasep_kinstr_jm_atom_state( - struct kbase_jd_atom *const katom, - const enum kbase_kinstr_jm_reader_atom_state state) +void kbasep_kinstr_jm_atom_state(struct kbase_jd_atom *const katom, + const enum kbase_kinstr_jm_reader_atom_state state) { struct kbase_context *const kctx = katom->kctx; struct kbase_kinstr_jm *const ctx = kctx->kinstr_jm; const u8 id = kbase_jd_atom_id(kctx, katom); - struct kbase_kinstr_jm_atom_state_change change = { - .timestamp = ktime_get_raw_ns(), .atom = id, .state = state - }; + struct kbase_kinstr_jm_atom_state_change change = { .timestamp = ktime_get_raw_ns(), + .atom = id, + .state = state }; struct reader *reader; struct hlist_bl_node *node; @@ -840,8 +821,7 @@ void kbasep_kinstr_jm_atom_state( rcu_read_lock(); hlist_bl_for_each_entry_rcu(reader, node, &ctx->readers, node) - reader_changes_push( - &reader->changes, &change, &reader->wait_queue); + reader_changes_push(&reader->changes, &change, &reader->wait_queue); rcu_read_unlock(); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h index 9451d4cd943d..709ab2eed4ad 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,8 +102,7 @@ void kbase_kinstr_jm_term(struct kbase_kinstr_jm *ctx); * @jm_fd_arg: Pointer to the union containing the in/out params * Return: -1 on failure, valid file descriptor on success */ -int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, - union kbase_kinstr_jm_fd *jm_fd_arg); +int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, union kbase_kinstr_jm_fd *jm_fd_arg); /** * kbasep_kinstr_jm_atom_state() - Signifies that an atom has changed state @@ -115,9 +114,8 @@ int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, * kbase_kinstr_jm_atom_state(). There is almost never a need to invoke this * function directly. */ -void kbasep_kinstr_jm_atom_state( - struct kbase_jd_atom *const atom, - const enum kbase_kinstr_jm_reader_atom_state state); +void kbasep_kinstr_jm_atom_state(struct kbase_jd_atom *const atom, + const enum kbase_kinstr_jm_reader_atom_state state); /* Allows ASM goto patching to reduce tracing overhead. This is * incremented/decremented when readers are created and terminated. This really @@ -133,9 +131,8 @@ extern struct static_key_false basep_kinstr_jm_reader_static_key; * * This uses a static key to reduce overhead when tracing is disabled */ -static inline void kbase_kinstr_jm_atom_state( - struct kbase_jd_atom *const atom, - const enum kbase_kinstr_jm_reader_atom_state state) +static inline void kbase_kinstr_jm_atom_state(struct kbase_jd_atom *const atom, + const enum kbase_kinstr_jm_reader_atom_state state) { if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) kbasep_kinstr_jm_atom_state(atom, state); @@ -146,11 +143,9 @@ static inline void kbase_kinstr_jm_atom_state( * hardware or software queue. * @atom: The atom that has changed state */ -static inline void kbase_kinstr_jm_atom_state_queue( - struct kbase_jd_atom *const atom) +static inline void kbase_kinstr_jm_atom_state_queue(struct kbase_jd_atom *const atom) { - kbase_kinstr_jm_atom_state( - atom, KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE); + kbase_kinstr_jm_atom_state(atom, KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE); } /** @@ -158,11 +153,9 @@ static inline void kbase_kinstr_jm_atom_state_queue( * atom * @atom: The atom that has changed state */ -static inline void kbase_kinstr_jm_atom_state_start( - struct kbase_jd_atom *const atom) +static inline void kbase_kinstr_jm_atom_state_start(struct kbase_jd_atom *const atom) { - kbase_kinstr_jm_atom_state( - atom, KBASE_KINSTR_JM_READER_ATOM_STATE_START); + kbase_kinstr_jm_atom_state(atom, KBASE_KINSTR_JM_READER_ATOM_STATE_START); } /** @@ -170,11 +163,9 @@ static inline void kbase_kinstr_jm_atom_state_start( * atom * @atom: The atom that has changed state */ -static inline void kbase_kinstr_jm_atom_state_stop( - struct kbase_jd_atom *const atom) +static inline void kbase_kinstr_jm_atom_state_stop(struct kbase_jd_atom *const atom) { - kbase_kinstr_jm_atom_state( - atom, KBASE_KINSTR_JM_READER_ATOM_STATE_STOP); + kbase_kinstr_jm_atom_state(atom, KBASE_KINSTR_JM_READER_ATOM_STATE_STOP); } /** @@ -182,11 +173,9 @@ static inline void kbase_kinstr_jm_atom_state_stop( * on an atom * @atom: The atom that has changed state */ -static inline void kbase_kinstr_jm_atom_state_complete( - struct kbase_jd_atom *const atom) +static inline void kbase_kinstr_jm_atom_state_complete(struct kbase_jd_atom *const atom) { - kbase_kinstr_jm_atom_state( - atom, KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE); + kbase_kinstr_jm_atom_state(atom, KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE); } /** @@ -204,8 +193,7 @@ static inline void kbase_kinstr_jm_atom_queue(struct kbase_jd_atom *const atom) * completed * @atom: The atom that has changed state */ -static inline void kbase_kinstr_jm_atom_complete( - struct kbase_jd_atom *const atom) +static inline void kbase_kinstr_jm_atom_complete(struct kbase_jd_atom *const atom) { kbase_kinstr_jm_atom_state_complete(atom); } @@ -214,8 +202,7 @@ static inline void kbase_kinstr_jm_atom_complete( * kbase_kinstr_jm_atom_sw_start() - A software atom has started work * @atom: The atom that has changed state */ -static inline void kbase_kinstr_jm_atom_sw_start( - struct kbase_jd_atom *const atom) +static inline void kbase_kinstr_jm_atom_sw_start(struct kbase_jd_atom *const atom) { kbase_kinstr_jm_atom_state_start(atom); } @@ -224,8 +211,7 @@ static inline void kbase_kinstr_jm_atom_sw_start( * kbase_kinstr_jm_atom_sw_stop() - A software atom has stopped work * @atom: The atom that has changed state */ -static inline void kbase_kinstr_jm_atom_sw_stop( - struct kbase_jd_atom *const atom) +static inline void kbase_kinstr_jm_atom_sw_stop(struct kbase_jd_atom *const atom) { kbase_kinstr_jm_atom_state_stop(atom); } @@ -243,8 +229,7 @@ void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const atom); * kbase_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted * @atom: The atom that has been submitted */ -static inline void kbase_kinstr_jm_atom_hw_submit( - struct kbase_jd_atom *const atom) +static inline void kbase_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const atom) { if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) kbasep_kinstr_jm_atom_hw_submit(atom); @@ -263,8 +248,7 @@ void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const atom); * kbase_kinstr_jm_atom_hw_release() - A hardware atom has been released * @atom: The atom that has been released */ -static inline void kbase_kinstr_jm_atom_hw_release( - struct kbase_jd_atom *const atom) +static inline void kbase_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const atom) { if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) kbasep_kinstr_jm_atom_hw_release(atom); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c index 823f9156e19e..a934948f11ee 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,7 +36,6 @@ #include #include #include -#include #include #include @@ -91,14 +90,20 @@ struct kbase_kinstr_prfcnt_sample { /** * struct kbase_kinstr_prfcnt_sample_array - Array of sample data. - * @user_buf: Address of allocated userspace buffer. A single allocation is used - * for all Dump Buffers in the array. - * @sample_count: Number of allocated samples. - * @samples: Non-NULL pointer to the array of Dump Buffers. + * @user_buf: Address of allocated userspace buffer. A single allocation is used + * for all Dump Buffers in the array. + * @sample_count: Number of allocated samples. + * @blk_stt_scratch: Scratch buffer to redirect the block states from the internal + * dump buffer when setting up the samples array. We use this + * to ensure that the block state information is not duplicated + * when using kbase_hwcnt_dump_buffer methods to copy the client + * dump buffer to @user_buf . + * @samples: Non-NULL pointer to the array of Dump Buffers. */ struct kbase_kinstr_prfcnt_sample_array { u8 *user_buf; size_t sample_count; + blk_stt_t *blk_stt_scratch; struct kbase_kinstr_prfcnt_sample *samples; }; @@ -128,14 +133,14 @@ struct kbase_kinstr_prfcnt_client_config { * for terminating a partially * initialised client. * - * @KINSTR_PRFCNT_UNINITIALISED : Client is uninitialised - * @KINSTR_PRFCNT_PARSE_SETUP : Parse the setup session - * @KINSTR_PRFCNT_ENABLE_MAP : Allocate memory for enable map - * @KINSTR_PRFCNT_DUMP_BUFFER : Allocate memory for dump buffer - * @KINSTR_PRFCNT_SAMPLE_ARRAY : Allocate memory for and initialise sample array - * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT : Create virtualizer client - * @KINSTR_PRFCNT_WAITQ_MUTEX : Create and initialise mutex and waitqueue - * @KINSTR_PRFCNT_INITIALISED : Client is fully initialised + * @KINSTR_PRFCNT_UNINITIALISED: Client is uninitialised + * @KINSTR_PRFCNT_PARSE_SETUP: Parse the setup session + * @KINSTR_PRFCNT_ENABLE_MAP: Allocate memory for enable map + * @KINSTR_PRFCNT_DUMP_BUFFER: Allocate memory for dump buffer + * @KINSTR_PRFCNT_SAMPLE_ARRAY: Allocate memory for and initialise sample array + * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT: Create virtualizer client + * @KINSTR_PRFCNT_WAITQ_MUTEX: Create and initialise mutex and waitqueue + * @KINSTR_PRFCNT_INITIALISED: Client is fully initialised */ enum kbase_kinstr_prfcnt_client_init_state { KINSTR_PRFCNT_UNINITIALISED, @@ -236,9 +241,8 @@ static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if * data can not be read without blocking, else EPOLLHUP | EPOLLERR. */ -static __poll_t -kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, - struct poll_table_struct *wait) +static __poll_t kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, + struct poll_table_struct *wait) { struct kbase_kinstr_prfcnt_client *cli; @@ -305,8 +309,7 @@ static u64 kbasep_kinstr_prfcnt_timestamp_ns(void) * rescheduled. Else, the dump worker will be rescheduled for the next * periodic client dump. */ -static void kbasep_kinstr_prfcnt_reschedule_worker( - struct kbase_kinstr_prfcnt_context *kinstr_ctx) +static void kbasep_kinstr_prfcnt_reschedule_worker(struct kbase_kinstr_prfcnt_context *kinstr_ctx) { u64 cur_ts_ns; u64 shortest_period_ns = U64_MAX; @@ -333,8 +336,7 @@ static void kbasep_kinstr_prfcnt_reschedule_worker( list_for_each_entry(pos, &kinstr_ctx->clients, node) { /* Ignore clients that are not periodic or not active. */ if (pos->active && pos->dump_interval_ns > 0) { - shortest_period_ns = - MIN(shortest_period_ns, pos->dump_interval_ns); + shortest_period_ns = MIN(shortest_period_ns, pos->dump_interval_ns); /* Next dump should happen exactly one period after the last dump. * If last dump was overdue and scheduled to happen more than one @@ -344,8 +346,7 @@ static void kbasep_kinstr_prfcnt_reschedule_worker( if (pos->next_dump_time_ns < cur_ts_ns) pos->next_dump_time_ns = MAX(cur_ts_ns + 1, - pos->next_dump_time_ns + - pos->dump_interval_ns); + pos->next_dump_time_ns + pos->dump_interval_ns); } } @@ -355,19 +356,64 @@ static void kbasep_kinstr_prfcnt_reschedule_worker( /* Start the timer if there are periodic clients and kinstr_prfcnt is not * suspended. */ - if ((shortest_period_ns != U64_MAX) && - (kinstr_ctx->suspend_count == 0)) { + if ((shortest_period_ns != U64_MAX) && (kinstr_ctx->suspend_count == 0)) { u64 next_schedule_time_ns = - kbasep_kinstr_prfcnt_next_dump_time_ns( - cur_ts_ns, shortest_period_ns); + kbasep_kinstr_prfcnt_next_dump_time_ns(cur_ts_ns, shortest_period_ns); hrtimer_start(&kinstr_ctx->dump_timer, - ns_to_ktime(next_schedule_time_ns - cur_ts_ns), - HRTIMER_MODE_REL); + ns_to_ktime(next_schedule_time_ns - cur_ts_ns), HRTIMER_MODE_REL); } } -static enum prfcnt_block_type -kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) +/** + * kbase_hwcnt_block_state_to_prfcnt_block_state - convert internal HW + * block state enum to + * the UAPI bitmask. + * + * @hw_blk_stt: internal representation of the hardware counter block + * state + * Return: UAPI-compliant bitmap of the states. + */ +static u32 kbase_hwcnt_block_state_to_prfcnt_block_state(blk_stt_t hw_blk_stt) +{ + u32 prfcnt_block_state = 0; + unsigned long block_state_mask = hw_blk_stt; + unsigned long bit_shift; + + for_each_set_bit(bit_shift, &block_state_mask, BITS_PER_TYPE(blk_stt_t)) { + blk_stt_t set_bit = 1u << bit_shift; + + switch (set_bit) { + case KBASE_HWCNT_STATE_UNKNOWN: + prfcnt_block_state |= BLOCK_STATE_UNKNOWN; + break; + case KBASE_HWCNT_STATE_ON: + prfcnt_block_state |= BLOCK_STATE_ON; + break; + case KBASE_HWCNT_STATE_OFF: + prfcnt_block_state |= BLOCK_STATE_OFF; + break; + case KBASE_HWCNT_STATE_AVAILABLE: + prfcnt_block_state |= BLOCK_STATE_AVAILABLE; + break; + case KBASE_HWCNT_STATE_UNAVAILABLE: + prfcnt_block_state |= BLOCK_STATE_UNAVAILABLE; + break; + case KBASE_HWCNT_STATE_NORMAL: + prfcnt_block_state |= BLOCK_STATE_NORMAL; + break; + case KBASE_HWCNT_STATE_PROTECTED: + prfcnt_block_state |= BLOCK_STATE_PROTECTED; + break; + default: + WARN(true, "Unknown block state bit set: 0x%x", set_bit); + break; + } + } + + return prfcnt_block_state; +} + +static enum prfcnt_block_type kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) { enum prfcnt_block_type block_type; @@ -393,8 +439,23 @@ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) block_type = PRFCNT_BLOCK_TYPE_MEMORY; break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3: + block_type = PRFCNT_BLOCK_TYPE_FW; + break; + + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3: + block_type = PRFCNT_BLOCK_TYPE_CSG; + break; + + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: default: @@ -406,10 +467,10 @@ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) } static bool kbase_kinstr_is_block_type_reserved(const struct kbase_hwcnt_metadata *metadata, - size_t grp, size_t blk) + size_t blk) { enum prfcnt_block_type block_type = kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( - kbase_hwcnt_metadata_block_type(metadata, grp, blk)); + kbase_hwcnt_metadata_block_type(metadata, blk)); return block_type == PRFCNT_BLOCK_TYPE_RESERVED; } @@ -432,7 +493,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena struct prfcnt_metadata **block_meta_base, u8 *base_addr, u8 counter_set) { - size_t grp, blk, blk_inst; + size_t blk, blk_inst; struct prfcnt_metadata **ptr_md = block_meta_base; const struct kbase_hwcnt_metadata *metadata; uint8_t block_idx = 0; @@ -441,29 +502,32 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena return -EINVAL; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) + { u8 *dst_blk; + blk_stt_t hw_blk_stt; /* Block indices must be reported with no gaps. */ if (blk_inst == 0) block_idx = 0; /* Skip unavailable or non-enabled blocks */ - if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) || - !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) || - !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) + if (kbase_kinstr_is_block_type_reserved(metadata, blk) || + !kbase_hwcnt_metadata_block_instance_avail(metadata, blk, blk_inst) || + !kbase_hwcnt_enable_map_block_enabled(enable_map, blk, blk_inst)) continue; - dst_blk = (u8 *)kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + dst_blk = (u8 *)kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + hw_blk_stt = *kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); (*ptr_md)->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_BLOCK; (*ptr_md)->hdr.item_version = PRFCNT_READER_API_VERSION; (*ptr_md)->u.block_md.block_type = kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( - kbase_hwcnt_metadata_block_type(metadata, grp, - blk)); + kbase_hwcnt_metadata_block_type(metadata, blk)); (*ptr_md)->u.block_md.block_idx = block_idx; (*ptr_md)->u.block_md.set = counter_set; - (*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN; + (*ptr_md)->u.block_md.block_state = + kbase_hwcnt_block_state_to_prfcnt_block_state(hw_blk_stt); (*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr); /* update the buf meta data block pointer to next item */ @@ -481,10 +545,9 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena * @dump_buf: Non-NULL pointer to dump buffer where sample is stored. * @ptr_md: Non-NULL pointer to sample metadata. */ -static void kbasep_kinstr_prfcnt_set_sample_metadata( - struct kbase_kinstr_prfcnt_client *cli, - struct kbase_hwcnt_dump_buffer *dump_buf, - struct prfcnt_metadata *ptr_md) +static void kbasep_kinstr_prfcnt_set_sample_metadata(struct kbase_kinstr_prfcnt_client *cli, + struct kbase_hwcnt_dump_buffer *dump_buf, + struct prfcnt_metadata *ptr_md) { u8 clk_cnt, i; @@ -528,9 +591,9 @@ static void kbasep_kinstr_prfcnt_set_sample_metadata( * @ts_start_ns: Time stamp for the start point of the sample dump. * @ts_end_ns: Time stamp for the end point of the sample dump. */ -static void kbasep_kinstr_prfcnt_client_output_sample( - struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx, - u64 user_data, u64 ts_start_ns, u64 ts_end_ns) +static void kbasep_kinstr_prfcnt_client_output_sample(struct kbase_kinstr_prfcnt_client *cli, + unsigned int buf_idx, u64 user_data, + u64 ts_start_ns, u64 ts_end_ns) { struct kbase_hwcnt_dump_buffer *dump_buf; struct kbase_hwcnt_dump_buffer *tmp_buf = &cli->tmp_buf; @@ -551,8 +614,7 @@ static void kbasep_kinstr_prfcnt_client_output_sample( * variant will explicitly zero any non-enabled counters to ensure * nothing except exactly what the user asked for is made visible. */ - kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf, - &cli->enable_map); + kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf, &cli->enable_map); /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item. * Set timestamp and user data for real dump. @@ -631,9 +693,7 @@ static int kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *c return 0; } -static int -kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, - u64 user_data) +static int kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, u64 user_data) { int ret; u64 tm_start, tm_end; @@ -658,16 +718,15 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, if (!available_samples_count) return -EBUSY; - kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, - &cli->config.phys_em); + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em); /* Enable all the available clk_enable_map. */ cli->enable_map.clk_enable_map = (1ull << cli->kinstr_ctx->metadata->clk_cnt) - 1; mutex_lock(&cli->kinstr_ctx->lock); /* Enable HWC from the configuration of the client creation */ - ret = kbase_hwcnt_virtualizer_client_set_counters( - cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL); + ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli, &cli->enable_map, &tm_start, + &tm_end, NULL); if (!ret) { cli->active = true; @@ -683,14 +742,12 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, return ret; } -static int -kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, - u64 user_data) +static int kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, u64 user_data) { int ret; u64 tm_start = 0; u64 tm_end = 0; - struct kbase_hwcnt_physical_enable_map phys_em; + struct kbase_hwcnt_physical_enable_map phys_em = { 0 }; size_t available_samples_count; unsigned int write_idx; unsigned int read_idx; @@ -704,14 +761,6 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, mutex_lock(&cli->kinstr_ctx->lock); - /* Disable counters under the lock, so we do not race with the - * sampling thread. - */ - phys_em.fe_bm = 0; - phys_em.tiler_bm = 0; - phys_em.mmu_l2_bm = 0; - phys_em.shader_bm = 0; - kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em); /* Check whether one has the buffer to hold the last sample */ @@ -720,10 +769,8 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, available_samples_count = cli->sample_count - (write_idx - read_idx); - ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli, - &cli->enable_map, - &tm_start, &tm_end, - &cli->tmp_buf); + ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli, &cli->enable_map, &tm_start, + &tm_end, &cli->tmp_buf); /* If the last stop sample is in error, set the sample flag */ if (ret) cli->sample_flags |= SAMPLE_FLAG_ERROR; @@ -732,11 +779,10 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, if (!WARN_ON(!available_samples_count)) { write_idx %= cli->sample_arr.sample_count; /* Handle the last stop sample */ - kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, - &cli->config.phys_em); + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em); /* As this is a stop sample, mark it as MANUAL */ - kbasep_kinstr_prfcnt_client_output_sample( - cli, write_idx, user_data, tm_start, tm_end); + kbasep_kinstr_prfcnt_client_output_sample(cli, write_idx, user_data, tm_start, + tm_end); /* Notify client. Make sure all changes to memory are visible. */ wmb(); atomic_inc(&cli->write_idx); @@ -754,9 +800,8 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, return 0; } -static int -kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli, - u64 user_data) +static int kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli, + u64 user_data) { int ret; @@ -775,8 +820,7 @@ kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli, return ret; } -static int -kbasep_kinstr_prfcnt_client_discard(struct kbase_kinstr_prfcnt_client *cli) +static int kbasep_kinstr_prfcnt_client_discard(struct kbase_kinstr_prfcnt_client *cli) { unsigned int write_idx; @@ -814,16 +858,13 @@ int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, switch (control_cmd->cmd) { case PRFCNT_CONTROL_CMD_START: - ret = kbasep_kinstr_prfcnt_client_start(cli, - control_cmd->user_data); + ret = kbasep_kinstr_prfcnt_client_start(cli, control_cmd->user_data); break; case PRFCNT_CONTROL_CMD_STOP: - ret = kbasep_kinstr_prfcnt_client_stop(cli, - control_cmd->user_data); + ret = kbasep_kinstr_prfcnt_client_stop(cli, control_cmd->user_data); break; case PRFCNT_CONTROL_CMD_SAMPLE_SYNC: - ret = kbasep_kinstr_prfcnt_client_sync_dump( - cli, control_cmd->user_data); + ret = kbasep_kinstr_prfcnt_client_sync_dump(cli, control_cmd->user_data); break; case PRFCNT_CONTROL_CMD_DISCARD: ret = kbasep_kinstr_prfcnt_client_discard(cli); @@ -838,9 +879,8 @@ int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, return ret; } -static int -kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli, - struct prfcnt_sample_access *sample_access) +static int kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli, + struct prfcnt_sample_access *sample_access) { unsigned int write_idx; unsigned int read_idx; @@ -890,9 +930,8 @@ error_out: return err; } -static int -kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli, - struct prfcnt_sample_access *sample_access) +static int kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli, + struct prfcnt_sample_access *sample_access) { unsigned int write_idx; unsigned int read_idx; @@ -938,8 +977,7 @@ error_out: * * Return: 0 on success, else error code. */ -static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp, - unsigned int cmd, +static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { long rcode = 0; @@ -978,8 +1016,7 @@ static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp, struct prfcnt_sample_access sample_access; int err; - err = copy_from_user(&sample_access, uarg, - sizeof(sample_access)); + err = copy_from_user(&sample_access, uarg, sizeof(sample_access)); if (err) return -EFAULT; rcode = kbasep_kinstr_prfcnt_put_sample(cli, &sample_access); @@ -999,8 +1036,7 @@ static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp, * * Return: 0 on success, else error code. */ -static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp, - struct vm_area_struct *vma) +static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp, struct vm_area_struct *vma) { struct kbase_kinstr_prfcnt_client *cli; @@ -1014,13 +1050,14 @@ static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp, return remap_vmalloc_range(vma, cli->sample_arr.user_buf, 0); } -static void kbasep_kinstr_prfcnt_sample_array_free( - struct kbase_kinstr_prfcnt_sample_array *sample_arr) +static void +kbasep_kinstr_prfcnt_sample_array_free(struct kbase_kinstr_prfcnt_sample_array *sample_arr) { if (!sample_arr) return; kfree(sample_arr->samples); + kfree(sample_arr->blk_stt_scratch); vfree(sample_arr->user_buf); memset(sample_arr, 0, sizeof(*sample_arr)); } @@ -1072,11 +1109,12 @@ void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) * * Return: 0 always. */ -static int kbasep_kinstr_prfcnt_hwcnt_reader_release(struct inode *inode, - struct file *filp) +static int kbasep_kinstr_prfcnt_hwcnt_reader_release(struct inode *inode, struct file *filp) { struct kbase_kinstr_prfcnt_client *cli = filp->private_data; + CSTD_UNUSED(inode); + mutex_lock(&cli->kinstr_ctx->lock); WARN_ON(cli->kinstr_ctx->client_count == 0); @@ -1104,17 +1142,18 @@ static const struct file_operations kinstr_prfcnt_client_fops = { size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata, struct kbase_hwcnt_enable_map *enable_map) { - size_t grp, blk, blk_inst; + size_t blk, blk_inst; size_t md_count = 0; if (!metadata) return 0; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) + { /* Skip unavailable, non-enabled or reserved blocks */ - if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) || - !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) || - !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) + if (kbase_kinstr_is_block_type_reserved(metadata, blk) || + !kbase_hwcnt_metadata_block_instance_avail(metadata, blk, blk_inst) || + !kbase_hwcnt_enable_map_block_enabled(enable_map, blk, blk_inst)) continue; md_count++; @@ -1141,6 +1180,10 @@ static size_t kbasep_kinstr_prfcnt_get_sample_size(struct kbase_kinstr_prfcnt_cl if (!metadata) return 0; + /* Note that while we have the block state bytes that form part of the dump + * buffer, we do not want to output them as part of the sample as they are already + * part of the in-band metadata. + */ sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count; dump_buf_bytes = metadata->dump_buf_bytes; clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; @@ -1156,8 +1199,8 @@ static size_t kbasep_kinstr_prfcnt_get_sample_size(struct kbase_kinstr_prfcnt_cl */ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) { - struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of( - work, struct kbase_kinstr_prfcnt_context, dump_work); + struct kbase_kinstr_prfcnt_context *kinstr_ctx = + container_of(work, struct kbase_kinstr_prfcnt_context, dump_work); struct kbase_kinstr_prfcnt_client *pos; u64 cur_time_ns; @@ -1184,19 +1227,17 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) * * Return: HRTIMER_NORESTART always. */ -static enum hrtimer_restart -kbasep_kinstr_prfcnt_dump_timer(struct hrtimer *timer) +static enum hrtimer_restart kbasep_kinstr_prfcnt_dump_timer(struct hrtimer *timer) { - struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of( - timer, struct kbase_kinstr_prfcnt_context, dump_timer); + struct kbase_kinstr_prfcnt_context *kinstr_ctx = + container_of(timer, struct kbase_kinstr_prfcnt_context, dump_timer); /* We don't need to check kinstr_ctx->suspend_count here. * Suspend and resume functions already ensure that the worker * is cancelled when the driver is suspended, and resumed when * the suspend_count reaches 0. */ - kbase_hwcnt_virtualizer_queue_work(kinstr_ctx->hvirt, - &kinstr_ctx->dump_work); + kbase_hwcnt_virtualizer_queue_work(kinstr_ctx->hvirt, &kinstr_ctx->dump_work); return HRTIMER_NORESTART; } @@ -1225,8 +1266,7 @@ int kbase_kinstr_prfcnt_init(struct kbase_hwcnt_virtualizer *hvirt, mutex_init(&kinstr_ctx->lock); INIT_LIST_HEAD(&kinstr_ctx->clients); - hrtimer_init(&kinstr_ctx->dump_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); + hrtimer_init(&kinstr_ctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); kinstr_ctx->dump_timer.function = kbasep_kinstr_prfcnt_dump_timer; INIT_WORK(&kinstr_ctx->dump_work, kbasep_kinstr_prfcnt_dump_worker); @@ -1243,7 +1283,7 @@ void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx) if (WARN_ON(kinstr_ctx->client_count > 0)) { struct kbase_kinstr_prfcnt_client *pos, *n; - list_for_each_entry_safe (pos, n, &kinstr_ctx->clients, node) { + list_for_each_entry_safe(pos, n, &kinstr_ctx->clients, node) { list_del(&pos->node); kinstr_ctx->client_count--; kbasep_kinstr_prfcnt_client_destroy(pos); @@ -1258,8 +1298,10 @@ void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx) void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx) { - if (WARN_ON(!kinstr_ctx)) + if (!kinstr_ctx) { + pr_warn("%s: kinstr_ctx is NULL\n", __func__); return; + } mutex_lock(&kinstr_ctx->lock); @@ -1288,8 +1330,10 @@ void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx) void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx) { - if (WARN_ON(!kinstr_ctx)) + if (!kinstr_ctx) { + pr_warn("%s: kinstr_ctx is NULL\n", __func__); return; + } mutex_lock(&kinstr_ctx->lock); @@ -1303,7 +1347,7 @@ void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx) struct kbase_kinstr_prfcnt_client *pos; bool has_periodic_clients = false; - list_for_each_entry (pos, &kinstr_ctx->clients, node) { + list_for_each_entry(pos, &kinstr_ctx->clients, node) { if (pos->dump_interval_ns != 0) { has_periodic_clients = true; break; @@ -1311,9 +1355,8 @@ void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx) } if (has_periodic_clients) - kbase_hwcnt_virtualizer_queue_work( - kinstr_ctx->hvirt, - &kinstr_ctx->dump_work); + kbase_hwcnt_virtualizer_queue_work(kinstr_ctx->hvirt, + &kinstr_ctx->dump_work); } } @@ -1325,10 +1368,12 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_cl { struct kbase_kinstr_prfcnt_sample_array *sample_arr = &cli->sample_arr; struct kbase_kinstr_prfcnt_sample *samples; + blk_stt_t *scratch_buffer; size_t sample_idx; size_t dump_buf_bytes; size_t clk_cnt_buf_bytes; size_t sample_meta_bytes; + size_t scratch_buffer_bytes; size_t md_count; size_t sample_size; size_t buffer_count = cli->config.buffer_count; @@ -1339,10 +1384,15 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_cl md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata, &cli->enable_map); sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count; dump_buf_bytes = metadata->dump_buf_bytes; - clk_cnt_buf_bytes = - sizeof(*samples->dump_buf.clk_cnt_buf) * metadata->clk_cnt; + clk_cnt_buf_bytes = sizeof(*samples->dump_buf.clk_cnt_buf) * metadata->clk_cnt; sample_size = sample_meta_bytes + dump_buf_bytes + clk_cnt_buf_bytes; + /* In order to not run amok of the structured dump into the user_buf, we + * will be redirecting the block state copy to a scratch buffer instead. + * Note that this will not be mapped to userspace as part of the user_buf. + */ + scratch_buffer_bytes = metadata->blk_stt_bytes; + samples = kmalloc_array(buffer_count, sizeof(*samples), GFP_KERNEL); if (!samples) @@ -1355,17 +1405,28 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_cl return -ENOMEM; } + scratch_buffer = kmalloc(scratch_buffer_bytes * sizeof(*scratch_buffer), GFP_KERNEL); + if (!scratch_buffer) { + vfree(sample_arr->user_buf); + kfree(samples); + return -ENOMEM; + } + + sample_arr->blk_stt_scratch = scratch_buffer; sample_arr->sample_count = buffer_count; sample_arr->samples = samples; for (sample_idx = 0; sample_idx < buffer_count; sample_idx++) { const size_t sample_meta_offset = sample_size * sample_idx; - const size_t dump_buf_offset = - sample_meta_offset + sample_meta_bytes; - const size_t clk_cnt_buf_offset = - dump_buf_offset + dump_buf_bytes; + const size_t dump_buf_offset = sample_meta_offset + sample_meta_bytes; + const size_t clk_cnt_buf_offset = dump_buf_offset + dump_buf_bytes; - /* Internal layout in a sample buffer: [sample metadata, dump_buf, clk_cnt_buf]. */ + /* Internal layout in a sample buffer: + * [sample metadata, dump_buf, clk_cnt_buf]. + * The block state buffer is passed as part of the sample + * metadata and not as part of the sample buffer, so we redirect it + * to the scratch buffer allocated above. + */ samples[sample_idx].dump_buf.metadata = metadata; samples[sample_idx].sample_meta = (struct prfcnt_metadata *)(sample_arr->user_buf + sample_meta_offset); @@ -1373,6 +1434,7 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_cl (u64 *)(sample_arr->user_buf + dump_buf_offset); samples[sample_idx].dump_buf.clk_cnt_buf = (u64 *)(sample_arr->user_buf + clk_cnt_buf_offset); + samples[sample_idx].dump_buf.blk_stt_buf = scratch_buffer; } return 0; @@ -1383,12 +1445,42 @@ static bool prfcnt_mode_supported(u8 mode) return (mode == PRFCNT_MODE_MANUAL) || (mode == PRFCNT_MODE_PERIODIC); } -static void -kbasep_kinstr_prfcnt_block_enable_to_physical(uint32_t *phys_em, - const uint64_t *enable_mask) +/** + * prfcnt_block_supported - Check if the current GPU supports a given block + * type. + * @metadata: GPU-specific metadata + * @req_enable: Requested block type coming from userspace. + * + * Return: true if the block is supported by the underlying GPU, false + * otherwise. + */ +static bool prfcnt_block_supported(const struct kbase_hwcnt_metadata *metadata, + const struct prfcnt_request_enable *req_enable) { - *phys_em |= kbase_hwcnt_backend_gpu_block_map_to_physical( - enable_mask[0], enable_mask[1]); + size_t blk, blk_inst; + + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) + { + const enum kbase_hwcnt_gpu_v5_block_type blk_type = + kbase_hwcnt_metadata_block_type(metadata, blk); + const enum prfcnt_block_type prfcnt_block_type = + kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(blk_type); + + if (prfcnt_block_type == req_enable->block_type) + return true; + } + return false; +} + +static void kbasep_kinstr_prfcnt_block_enable_to_physical(uint32_t *phys_em, + const uint64_t *enable_mask) +{ + /* Adding a baseline phys_em value '1' on any type that has been requested. This + * ensures the phys_em block states will always be reflected in the client's + * sample outputs, even when the client provided an all zero value mask. + */ + *phys_em |= + (1 | kbase_hwcnt_backend_gpu_block_map_to_physical(enable_mask[0], enable_mask[1])); } /** @@ -1406,9 +1498,9 @@ kbasep_kinstr_prfcnt_block_enable_to_physical(uint32_t *phys_em, * * Return: 0 on success, else error code. */ -static int kbasep_kinstr_prfcnt_parse_request_enable( - const struct prfcnt_request_enable *req_enable, - struct kbase_kinstr_prfcnt_client_config *config) +static int +kbasep_kinstr_prfcnt_parse_request_enable(const struct prfcnt_request_enable *req_enable, + struct kbase_kinstr_prfcnt_client_config *config) { int err = 0; u8 req_set = KBASE_HWCNT_SET_UNDEFINED, default_set; @@ -1466,20 +1558,28 @@ static int kbasep_kinstr_prfcnt_parse_request_enable( */ switch (req_enable->block_type) { case PRFCNT_BLOCK_TYPE_FE: - kbasep_kinstr_prfcnt_block_enable_to_physical( - &config->phys_em.fe_bm, req_enable->enable_mask); + kbasep_kinstr_prfcnt_block_enable_to_physical(&config->phys_em.fe_bm, + req_enable->enable_mask); break; case PRFCNT_BLOCK_TYPE_TILER: - kbasep_kinstr_prfcnt_block_enable_to_physical( - &config->phys_em.tiler_bm, req_enable->enable_mask); + kbasep_kinstr_prfcnt_block_enable_to_physical(&config->phys_em.tiler_bm, + req_enable->enable_mask); break; case PRFCNT_BLOCK_TYPE_MEMORY: - kbasep_kinstr_prfcnt_block_enable_to_physical( - &config->phys_em.mmu_l2_bm, req_enable->enable_mask); + kbasep_kinstr_prfcnt_block_enable_to_physical(&config->phys_em.mmu_l2_bm, + req_enable->enable_mask); break; case PRFCNT_BLOCK_TYPE_SHADER_CORE: - kbasep_kinstr_prfcnt_block_enable_to_physical( - &config->phys_em.shader_bm, req_enable->enable_mask); + kbasep_kinstr_prfcnt_block_enable_to_physical(&config->phys_em.shader_bm, + req_enable->enable_mask); + break; + case PRFCNT_BLOCK_TYPE_FW: + kbasep_kinstr_prfcnt_block_enable_to_physical(&config->phys_em.fw_bm, + req_enable->enable_mask); + break; + case PRFCNT_BLOCK_TYPE_CSG: + kbasep_kinstr_prfcnt_block_enable_to_physical(&config->phys_em.csg_bm, + req_enable->enable_mask); break; default: err = -EINVAL; @@ -1500,9 +1600,9 @@ static int kbasep_kinstr_prfcnt_parse_request_enable( * * Return: 0 on success, else error code. */ -static int kbasep_kinstr_prfcnt_parse_request_scope( - const struct prfcnt_request_scope *req_scope, - struct kbase_kinstr_prfcnt_client_config *config) +static int +kbasep_kinstr_prfcnt_parse_request_scope(const struct prfcnt_request_scope *req_scope, + struct kbase_kinstr_prfcnt_client_config *config) { int err = 0; @@ -1536,6 +1636,8 @@ static int kbasep_kinstr_prfcnt_parse_setup(struct kbase_kinstr_prfcnt_context * unsigned int item_count = setup->in.request_item_count; int err = 0; + CSTD_UNUSED(kinstr_ctx); + if (req_arr[item_count - 1].hdr.item_type != FLEX_LIST_TYPE_NONE || req_arr[item_count - 1].hdr.item_version != 0) { return -EINVAL; @@ -1569,10 +1671,8 @@ static int kbasep_kinstr_prfcnt_parse_setup(struct kbase_kinstr_prfcnt_context * if (!prfcnt_mode_supported(req_arr[i].u.req_mode.mode)) err = -EINVAL; else if (config->prfcnt_mode == PRFCNT_MODE_RESERVED) - config->prfcnt_mode = - req_arr[i].u.req_mode.mode; - else if (req_arr[i].u.req_mode.mode != - config->prfcnt_mode) + config->prfcnt_mode = req_arr[i].u.req_mode.mode; + else if (req_arr[i].u.req_mode.mode != config->prfcnt_mode) err = -EINVAL; if (err < 0) @@ -1580,15 +1680,11 @@ static int kbasep_kinstr_prfcnt_parse_setup(struct kbase_kinstr_prfcnt_context * if (config->prfcnt_mode == PRFCNT_MODE_PERIODIC) { config->period_ns = - req_arr[i] - .u.req_mode.mode_config.periodic - .period_ns; + req_arr[i].u.req_mode.mode_config.periodic.period_ns; if ((config->period_ns != 0) && - (config->period_ns < - DUMP_INTERVAL_MIN_NS)) { - config->period_ns = - DUMP_INTERVAL_MIN_NS; + (config->period_ns < DUMP_INTERVAL_MIN_NS)) { + config->period_ns = DUMP_INTERVAL_MIN_NS; } if (config->period_ns == 0) @@ -1597,13 +1693,19 @@ static int kbasep_kinstr_prfcnt_parse_setup(struct kbase_kinstr_prfcnt_context * break; case PRFCNT_REQUEST_TYPE_ENABLE: - err = kbasep_kinstr_prfcnt_parse_request_enable( - &req_arr[i].u.req_enable, config); + if (!prfcnt_block_supported(kinstr_ctx->metadata, + &req_arr[i].u.req_enable)) { + err = -EINVAL; + break; + } + + err = kbasep_kinstr_prfcnt_parse_request_enable(&req_arr[i].u.req_enable, + config); break; case PRFCNT_REQUEST_TYPE_SCOPE: - err = kbasep_kinstr_prfcnt_parse_request_scope( - &req_arr[i].u.req_scope, config); + err = kbasep_kinstr_prfcnt_parse_request_scope(&req_arr[i].u.req_scope, + config); break; default: @@ -1729,30 +1831,44 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst *out_vcli = cli; return 0; - } -static size_t kbasep_kinstr_prfcnt_get_block_info_count( - const struct kbase_hwcnt_metadata *metadata) +static size_t kbasep_kinstr_prfcnt_get_block_info_count(const struct kbase_hwcnt_metadata *metadata) { - size_t grp, blk; + size_t blk; size_t block_info_count = 0; if (!metadata) return 0; - for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) { - for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++) { - if (!kbase_kinstr_is_block_type_reserved(metadata, grp, blk)) - block_info_count++; - } + for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata); blk++) { + if (!kbase_kinstr_is_block_type_reserved(metadata, blk)) + block_info_count++; } return block_info_count; } -static void kbasep_kinstr_prfcnt_get_request_info_list( - struct prfcnt_enum_item *item_arr, size_t *arr_idx) +static size_t kbasep_kinstr_prfcnt_get_enum_info_count(const struct kbase_hwcnt_metadata *metadata) +{ + /* First we indicate the number of non-block enable request types that + * are available, defined by a static array in this file. + */ + uint32_t count = ARRAY_SIZE(kinstr_prfcnt_supported_requests); + + /* Then we count the number of block types that should be indicated to + * userspace by checking the metadata. + */ + count += kbasep_kinstr_prfcnt_get_block_info_count(metadata); + + /* We finally include one item for sample_info (clock domain) description + * and one item for a sentinel to indicate that the list is done. + */ + return count + 2; +} + +static void kbasep_kinstr_prfcnt_get_request_info_list(struct prfcnt_enum_item *item_arr, + size_t *arr_idx) { memcpy(&item_arr[*arr_idx], kinstr_prfcnt_supported_requests, sizeof(kinstr_prfcnt_supported_requests)); @@ -1781,113 +1897,87 @@ int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata * size_t block_set, struct prfcnt_enum_item *item_arr, size_t *arr_idx) { - size_t grp, blk; + size_t blk; if (!metadata || !item_arr || !arr_idx) return -EINVAL; - for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) { - for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++) { - size_t blk_inst; - size_t unused_blk_inst_count = 0; - size_t blk_inst_count = - kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk); - enum prfcnt_block_type block_type = - kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( - kbase_hwcnt_metadata_block_type(metadata, grp, blk)); + for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata); blk++) { + size_t blk_inst; + size_t unused_blk_inst_count = 0; + size_t blk_inst_count = kbase_hwcnt_metadata_block_instance_count(metadata, blk); + enum prfcnt_block_type block_type = + kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( + kbase_hwcnt_metadata_block_type(metadata, blk)); - if (block_type == PRFCNT_BLOCK_TYPE_RESERVED) - continue; + if (block_type == PRFCNT_BLOCK_TYPE_RESERVED) + continue; - /* Count number of unused blocks to updated number of instances */ - for (blk_inst = 0; blk_inst < blk_inst_count; blk_inst++) { - if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, - blk_inst)) - unused_blk_inst_count++; - } + /* Count number of unused blocks to updated number of instances */ + for (blk_inst = 0; blk_inst < blk_inst_count; blk_inst++) { + if (!kbase_hwcnt_metadata_block_instance_avail(metadata, blk, blk_inst)) + unused_blk_inst_count++; + } - item_arr[(*arr_idx)++] = (struct prfcnt_enum_item){ - .hdr = { - .item_type = PRFCNT_ENUM_TYPE_BLOCK, - .item_version = PRFCNT_READER_API_VERSION, - }, + item_arr[(*arr_idx)++] = (struct prfcnt_enum_item){ + .hdr = { + .item_type = PRFCNT_ENUM_TYPE_BLOCK, + .item_version = PRFCNT_READER_API_VERSION, + }, .u.block_counter = { .set = block_set, .block_type = block_type, .num_instances = blk_inst_count - unused_blk_inst_count, .num_values = kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk), + metadata, blk), /* The bitmask of available counters should be dynamic. * Temporarily, it is set to U64_MAX, waiting for the * required functionality to be available in the future. */ .counter_mask = {U64_MAX, U64_MAX}, }, - }; - } + }; } return 0; } -static int kbasep_kinstr_prfcnt_enum_info_count( - struct kbase_kinstr_prfcnt_context *kinstr_ctx, - struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) +static int +kbasep_kinstr_prfcnt_enum_info_count(struct kbase_kinstr_prfcnt_context *kinstr_ctx, + struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) { - uint32_t count = 0; - size_t block_info_count = 0; const struct kbase_hwcnt_metadata *metadata; - count = ARRAY_SIZE(kinstr_prfcnt_supported_requests); metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt); - /* Add the sample_info (clock domain) descriptive item */ - count++; - - /* Other blocks based on meta data */ - block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata); - count += block_info_count; - - /* Reserve one for the last sentinel item. */ - count++; - enum_info->info_item_count = count; + enum_info->info_item_count = kbasep_kinstr_prfcnt_get_enum_info_count(metadata); enum_info->info_item_size = sizeof(struct prfcnt_enum_item); - kinstr_ctx->info_item_count = count; return 0; } -static int kbasep_kinstr_prfcnt_enum_info_list( - struct kbase_kinstr_prfcnt_context *kinstr_ctx, - struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) +int kbasep_kinstr_populate_prfcnt_enum_list(const struct kbase_hwcnt_metadata *metadata, + struct prfcnt_enum_item *prfcnt_item_arr, + size_t prfcnt_item_array_size) { - struct prfcnt_enum_item *prfcnt_item_arr; size_t arr_idx = 0; int err = 0; - size_t block_info_count = 0; - const struct kbase_hwcnt_metadata *metadata; + size_t block_info_count; - if ((enum_info->info_item_size == 0) || - (enum_info->info_item_count == 0) || !enum_info->info_list_ptr) + if (!metadata) return -EINVAL; - if (enum_info->info_item_count != kinstr_ctx->info_item_count) + block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata); + /* Some basic sanity check on the provided prfcnt_item_array_size */ + if (block_info_count >= prfcnt_item_array_size) return -EINVAL; - prfcnt_item_arr = kcalloc(enum_info->info_item_count, - sizeof(*prfcnt_item_arr), GFP_KERNEL); - if (!prfcnt_item_arr) - return -ENOMEM; - kbasep_kinstr_prfcnt_get_request_info_list(prfcnt_item_arr, &arr_idx); - metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt); /* Place the sample_info item */ kbasep_kinstr_prfcnt_get_sample_info_item(metadata, prfcnt_item_arr, &arr_idx); - block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata); - - if (arr_idx + block_info_count >= enum_info->info_item_count) + if (arr_idx + block_info_count >= prfcnt_item_array_size) err = -EINVAL; if (!err) { @@ -1901,23 +1991,47 @@ static int kbasep_kinstr_prfcnt_enum_info_list( /* Default to primary */ counter_set = KBASE_HWCNT_SET_PRIMARY; #endif - kbasep_kinstr_prfcnt_get_block_info_list( - metadata, counter_set, prfcnt_item_arr, &arr_idx); - if (arr_idx != enum_info->info_item_count - 1) + kbasep_kinstr_prfcnt_get_block_info_list(metadata, counter_set, prfcnt_item_arr, + &arr_idx); + if (arr_idx != prfcnt_item_array_size - 1) err = -EINVAL; } /* The last sentinel item. */ - prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_type = - FLEX_LIST_TYPE_NONE; - prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_version = 0; + prfcnt_item_arr[prfcnt_item_array_size - 1].hdr.item_type = FLEX_LIST_TYPE_NONE; + prfcnt_item_arr[prfcnt_item_array_size - 1].hdr.item_version = 0; + + return err; +} + +static int +kbasep_kinstr_prfcnt_enum_info_list(struct kbase_kinstr_prfcnt_context *kinstr_ctx, + struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) +{ + struct prfcnt_enum_item *prfcnt_item_arr; + int err = 0; + const struct kbase_hwcnt_metadata *metadata; + + if ((enum_info->info_item_size == 0) || (enum_info->info_item_count == 0) || + !enum_info->info_list_ptr) + return -EINVAL; + + metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt); + + if (enum_info->info_item_count != kbasep_kinstr_prfcnt_get_enum_info_count(metadata)) + return -EINVAL; + + prfcnt_item_arr = kcalloc(enum_info->info_item_count, sizeof(*prfcnt_item_arr), GFP_KERNEL); + if (!prfcnt_item_arr) + return -ENOMEM; + + err = kbasep_kinstr_populate_prfcnt_enum_list(metadata, prfcnt_item_arr, + enum_info->info_item_count); if (!err) { - unsigned long bytes = - enum_info->info_item_count * sizeof(*prfcnt_item_arr); + unsigned long bytes = enum_info->info_item_count * sizeof(*prfcnt_item_arr); - if (copy_to_user(u64_to_user_ptr(enum_info->info_list_ptr), - prfcnt_item_arr, bytes)) + if (copy_to_user(u64_to_user_ptr(enum_info->info_list_ptr), prfcnt_item_arr, bytes)) err = -EFAULT; } @@ -1925,9 +2039,8 @@ static int kbasep_kinstr_prfcnt_enum_info_list( return err; } -int kbase_kinstr_prfcnt_enum_info( - struct kbase_kinstr_prfcnt_context *kinstr_ctx, - struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) +int kbase_kinstr_prfcnt_enum_info(struct kbase_kinstr_prfcnt_context *kinstr_ctx, + struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) { int err; @@ -1935,11 +2048,9 @@ int kbase_kinstr_prfcnt_enum_info( return -EINVAL; if (!enum_info->info_list_ptr) - err = kbasep_kinstr_prfcnt_enum_info_count(kinstr_ctx, - enum_info); + err = kbasep_kinstr_prfcnt_enum_info_count(kinstr_ctx, enum_info); else - err = kbasep_kinstr_prfcnt_enum_info_list(kinstr_ctx, - enum_info); + err = kbasep_kinstr_prfcnt_enum_info_list(kinstr_ctx, enum_info); return err; } @@ -1948,24 +2059,36 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, union kbase_ioctl_kinstr_prfcnt_setup *setup) { int err; - size_t item_count; + size_t item_count, max_item_count; size_t bytes; struct prfcnt_request_item *req_arr = NULL; struct kbase_kinstr_prfcnt_client *cli = NULL; const size_t max_bytes = 32 * sizeof(*req_arr); + const struct kbase_hwcnt_metadata *metadata; if (!kinstr_ctx || !setup) return -EINVAL; item_count = setup->in.request_item_count; + if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0)) + return -EINVAL; + + metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt); + + max_item_count = kbasep_kinstr_prfcnt_get_enum_info_count(metadata); + /* Limiting the request items to 2x of the expected: accommodating * moderate duplications but rejecting excessive abuses. */ - if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0) || - item_count > 2 * kinstr_ctx->info_item_count) { + if (item_count > 2 * max_item_count) return -EINVAL; - } + + /* Only after the initial validation do we want to add more information to the + * prfcnt context, in order to avoid the dependency on the enum_info IOCTL + * before setting up the context. + */ + kinstr_ctx->info_item_count = max_item_count; if (check_mul_overflow(item_count, sizeof(*req_arr), &bytes)) return -EINVAL; @@ -1995,12 +2118,10 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, mutex_unlock(&kinstr_ctx->lock); setup->out.prfcnt_metadata_item_size = sizeof(struct prfcnt_metadata); - setup->out.prfcnt_mmap_size_bytes = - cli->sample_size * cli->sample_count; + setup->out.prfcnt_mmap_size_bytes = cli->sample_size * cli->sample_count; /* Expose to user-space only once the client is fully initialized */ - err = anon_inode_getfd("[mali_kinstr_prfcnt_desc]", - &kinstr_prfcnt_client_fops, cli, + err = anon_inode_getfd("[mali_kinstr_prfcnt_desc]", &kinstr_prfcnt_client_fops, cli, O_RDONLY | O_CLOEXEC); if (err < 0) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h index bbe33796e62f..37ad7027f892 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,9 +46,8 @@ union kbase_ioctl_kinstr_prfcnt_setup; * * Return: 0 on success, else error code. */ -int kbase_kinstr_prfcnt_init( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_kinstr_prfcnt_context **out_kinstr_ctx); +int kbase_kinstr_prfcnt_init(struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_kinstr_prfcnt_context **out_kinstr_ctx); /** * kbase_kinstr_prfcnt_term() - Terminate a kinstr_prfcnt context. @@ -168,9 +167,8 @@ void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) * * Return: 0 on success, else error code. */ -int kbase_kinstr_prfcnt_enum_info( - struct kbase_kinstr_prfcnt_context *kinstr_ctx, - struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info); +int kbase_kinstr_prfcnt_enum_info(struct kbase_kinstr_prfcnt_context *kinstr_ctx, + struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info); /** * kbase_kinstr_prfcnt_setup() - Set up a new hardware counter reader client. @@ -186,4 +184,19 @@ int kbase_kinstr_prfcnt_enum_info( int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, union kbase_ioctl_kinstr_prfcnt_setup *setup); +/** + * kbasep_kinstr_populate_prfcnt_enum_list() - Populate the enumeration output list. + * @metadata: Hardware counter metadata + * @item_array: Pointer to a pre-allocated array for populating the enumeration items + * @array_size: The array size of the item_array. Must match the corresponding metadata + * enumeration number of items. + * + * The function converts the configured hardware counter metadata into perfcnt enumeration + * items and populate them into the supplied recipient array. + * + * Return: 0 on success, else -EINVAL on misconfigured input fields or mismatched array length. + */ +int kbasep_kinstr_populate_prfcnt_enum_list(const struct kbase_hwcnt_metadata *metadata, + struct prfcnt_enum_item *item_array, size_t array_size); + #endif /* _KBASE_KINSTR_PRFCNT_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_linux.h index e5c6f7a0b217..9195be347e2b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2014, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,9 +34,9 @@ #include #if IS_ENABLED(MALI_KERNEL_TEST_API) - #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) +#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) #else - #define KBASE_EXPORT_TEST_API(func) +#define KBASE_EXPORT_TEST_API(func) #endif #define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c index 1c94e9c57b7f..329de89812c0 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c @@ -34,7 +34,8 @@ #include #include -#include +#include +#include #include #include #include @@ -43,6 +44,7 @@ #include #include #include +#include #define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" #define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) @@ -73,1380 +75,115 @@ #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -/* Forward declarations */ -static void free_partial_locked(struct kbase_context *kctx, - struct kbase_mem_pool *pool, struct tagged_addr tp); - -static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) -{ -#if defined(CONFIG_ARM64) - /* VA_BITS can be as high as 48 bits, but all bits are available for - * both user and kernel. - */ - size_t cpu_va_bits = VA_BITS; -#elif defined(CONFIG_X86_64) - /* x86_64 can access 48 bits of VA, but the 48th is used to denote - * kernel (1) vs userspace (0), so the max here is 47. - */ - size_t cpu_va_bits = 47; -#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) - size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; -#else -#error "Unknown CPU VA width for this architecture" -#endif - - if (kbase_ctx_compat_mode(kctx)) - cpu_va_bits = 32; - - return cpu_va_bits; -} - -/* This function finds out which RB tree the given pfn from the GPU VA belongs - * to based on the memory zone the pfn refers to - */ -static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, - u64 gpu_pfn) -{ - struct rb_root *rbtree = NULL; - - struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); - -#if MALI_USE_CSF - struct kbase_reg_zone *fixed_va_zone = - kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA); - - struct kbase_reg_zone *exec_fixed_va_zone = - kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA); - - if (gpu_pfn >= fixed_va_zone->base_pfn) { - rbtree = &kctx->reg_rbtree_fixed; - return rbtree; - } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) { - rbtree = &kctx->reg_rbtree_exec_fixed; - return rbtree; - } -#endif - if (gpu_pfn >= exec_va_zone->base_pfn) - rbtree = &kctx->reg_rbtree_exec; - else { - u64 same_va_end; - - if (kbase_ctx_compat_mode(kctx)) { - same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; - } else { - struct kbase_reg_zone *same_va_zone = - kbase_ctx_reg_zone_get(kctx, - KBASE_REG_ZONE_SAME_VA); - same_va_end = kbase_reg_zone_end_pfn(same_va_zone); - } - - if (gpu_pfn >= same_va_end) - rbtree = &kctx->reg_rbtree_custom; - else - rbtree = &kctx->reg_rbtree_same; - } - - return rbtree; -} - -/* This function inserts a region into the tree. */ -static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) -{ - u64 start_pfn = new_reg->start_pfn; - struct rb_node **link = NULL; - struct rb_node *parent = NULL; - struct rb_root *rbtree = NULL; - - rbtree = new_reg->rbtree; - - link = &(rbtree->rb_node); - /* Find the right place in the tree using tree search */ - while (*link) { - struct kbase_va_region *old_reg; - - parent = *link; - old_reg = rb_entry(parent, struct kbase_va_region, rblink); - - /* RBTree requires no duplicate entries. */ - KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); - - if (old_reg->start_pfn > start_pfn) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - /* Put the new node there, and rebalance tree */ - rb_link_node(&(new_reg->rblink), parent, link); - - rb_insert_color(&(new_reg->rblink), rbtree); -} - -static struct kbase_va_region *find_region_enclosing_range_rbtree( - struct rb_root *rbtree, u64 start_pfn, size_t nr_pages) -{ - struct rb_node *rbnode; - struct kbase_va_region *reg; - u64 end_pfn = start_pfn + nr_pages; - - rbnode = rbtree->rb_node; - - while (rbnode) { - u64 tmp_start_pfn, tmp_end_pfn; - - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - tmp_start_pfn = reg->start_pfn; - tmp_end_pfn = reg->start_pfn + reg->nr_pages; - - /* If start is lower than this, go left. */ - if (start_pfn < tmp_start_pfn) - rbnode = rbnode->rb_left; - /* If end is higher than this, then go right. */ - else if (end_pfn > tmp_end_pfn) - rbnode = rbnode->rb_right; - else /* Enclosing */ - return reg; - } - - return NULL; -} - -struct kbase_va_region *kbase_find_region_enclosing_address( - struct rb_root *rbtree, u64 gpu_addr) -{ - u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_node *rbnode; - struct kbase_va_region *reg; - - rbnode = rbtree->rb_node; - - while (rbnode) { - u64 tmp_start_pfn, tmp_end_pfn; - - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - tmp_start_pfn = reg->start_pfn; - tmp_end_pfn = reg->start_pfn + reg->nr_pages; - - /* If start is lower than this, go left. */ - if (gpu_pfn < tmp_start_pfn) - rbnode = rbnode->rb_left; - /* If end is higher than this, then go right. */ - else if (gpu_pfn >= tmp_end_pfn) - rbnode = rbnode->rb_right; - else /* Enclosing */ - return reg; - } - - return NULL; -} - -/* Find region enclosing given address. */ -struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( - struct kbase_context *kctx, u64 gpu_addr) -{ - u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_root *rbtree = NULL; - - KBASE_DEBUG_ASSERT(kctx != NULL); - - lockdep_assert_held(&kctx->reg_lock); - - rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); - - return kbase_find_region_enclosing_address(rbtree, gpu_addr); -} - -KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); - -struct kbase_va_region *kbase_find_region_base_address( - struct rb_root *rbtree, u64 gpu_addr) -{ - u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_node *rbnode = NULL; - struct kbase_va_region *reg = NULL; - - rbnode = rbtree->rb_node; - - while (rbnode) { - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - if (reg->start_pfn > gpu_pfn) - rbnode = rbnode->rb_left; - else if (reg->start_pfn < gpu_pfn) - rbnode = rbnode->rb_right; - else - return reg; - } - - return NULL; -} - -/* Find region with given base address */ -struct kbase_va_region *kbase_region_tracker_find_region_base_address( - struct kbase_context *kctx, u64 gpu_addr) -{ - u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_root *rbtree = NULL; - - lockdep_assert_held(&kctx->reg_lock); - - rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); - - return kbase_find_region_base_address(rbtree, gpu_addr); -} - -KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); - -/* Find region meeting given requirements */ -static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( - struct kbase_va_region *reg_reqs, - size_t nr_pages, size_t align_offset, size_t align_mask, - u64 *out_start_pfn) -{ - struct rb_node *rbnode = NULL; - struct kbase_va_region *reg = NULL; - struct rb_root *rbtree = NULL; - - /* Note that this search is a linear search, as we do not have a target - * address in mind, so does not benefit from the rbtree search - */ - rbtree = reg_reqs->rbtree; - - for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - if ((reg->nr_pages >= nr_pages) && - (reg->flags & KBASE_REG_FREE)) { - /* Check alignment */ - u64 start_pfn = reg->start_pfn; - - /* When align_offset == align, this sequence is - * equivalent to: - * (start_pfn + align_mask) & ~(align_mask) - * - * Otherwise, it aligns to n*align + offset, for the - * lowest value n that makes this still >start_pfn - */ - start_pfn += align_mask; - start_pfn -= (start_pfn - align_offset) & (align_mask); - - if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) { - /* Can't end at 4GB boundary */ - if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB)) - start_pfn += align_offset; - - /* Can't start at 4GB boundary */ - if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB)) - start_pfn += align_offset; - - if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) || - !(start_pfn & BASE_MEM_PFN_MASK_4GB)) - continue; - } else if (reg_reqs->flags & - KBASE_REG_GPU_VA_SAME_4GB_PAGE) { - u64 end_pfn = start_pfn + nr_pages - 1; - - if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) != - (end_pfn & ~BASE_MEM_PFN_MASK_4GB)) - start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB; - } - - if ((start_pfn >= reg->start_pfn) && - (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && - ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) { - *out_start_pfn = start_pfn; - return reg; - } - } - } - - return NULL; -} - -/** - * kbase_remove_va_region - Remove a region object from the global list. - * - * @kbdev: The kbase device - * @reg: Region object to remove - * - * The region reg is removed, possibly by merging with other free and - * compatible adjacent regions. It must be called with the context - * region lock held. The associated memory is not released (see - * kbase_free_alloced_region). Internal use only. - */ -void kbase_remove_va_region(struct kbase_device *kbdev, - struct kbase_va_region *reg) -{ - struct rb_node *rbprev; - struct kbase_va_region *prev = NULL; - struct rb_node *rbnext; - struct kbase_va_region *next = NULL; - struct rb_root *reg_rbtree = NULL; - struct kbase_va_region *orig_reg = reg; - - int merged_front = 0; - int merged_back = 0; - - reg_rbtree = reg->rbtree; - - if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) - return; - - /* Try to merge with the previous block first */ - rbprev = rb_prev(&(reg->rblink)); - if (rbprev) { - prev = rb_entry(rbprev, struct kbase_va_region, rblink); - if (prev->flags & KBASE_REG_FREE) { - /* We're compatible with the previous VMA, merge with - * it, handling any gaps for robustness. - */ - u64 prev_end_pfn = prev->start_pfn + prev->nr_pages; - - WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != - (reg->flags & KBASE_REG_ZONE_MASK)); - if (!WARN_ON(reg->start_pfn < prev_end_pfn)) - prev->nr_pages += reg->start_pfn - prev_end_pfn; - prev->nr_pages += reg->nr_pages; - rb_erase(&(reg->rblink), reg_rbtree); - reg = prev; - merged_front = 1; - } - } - - /* Try to merge with the next block second */ - /* Note we do the lookup here as the tree may have been rebalanced. */ - rbnext = rb_next(&(reg->rblink)); - if (rbnext) { - next = rb_entry(rbnext, struct kbase_va_region, rblink); - if (next->flags & KBASE_REG_FREE) { - /* We're compatible with the next VMA, merge with it, - * handling any gaps for robustness. - */ - u64 reg_end_pfn = reg->start_pfn + reg->nr_pages; - - WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != - (reg->flags & KBASE_REG_ZONE_MASK)); - if (!WARN_ON(next->start_pfn < reg_end_pfn)) - next->nr_pages += next->start_pfn - reg_end_pfn; - next->start_pfn = reg->start_pfn; - next->nr_pages += reg->nr_pages; - rb_erase(&(reg->rblink), reg_rbtree); - merged_back = 1; - } - } - - if (merged_front && merged_back) { - /* We already merged with prev, free it */ - kfree(reg); - } else if (!(merged_front || merged_back)) { - /* If we failed to merge then we need to add a new block */ - - /* - * We didn't merge anything. Try to add a new free - * placeholder, and in any case, remove the original one. - */ - struct kbase_va_region *free_reg; - - free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages, - reg->flags & KBASE_REG_ZONE_MASK); - if (!free_reg) { - /* In case of failure, we cannot allocate a replacement - * free region, so we will be left with a 'gap' in the - * region tracker's address range (though, the rbtree - * will itself still be correct after erasing - * 'reg'). - * - * The gap will be rectified when an adjacent region is - * removed by one of the above merging paths. Other - * paths will gracefully fail to allocate if they try - * to allocate in the gap. - * - * There is nothing that the caller can do, since free - * paths must not fail. The existing 'reg' cannot be - * repurposed as the free region as callers must have - * freedom of use with it by virtue of it being owned - * by them, not the region tracker insert/remove code. - */ - dev_warn( - kbdev->dev, - "Could not alloc a replacement free region for 0x%.16llx..0x%.16llx", - (unsigned long long)reg->start_pfn << PAGE_SHIFT, - (unsigned long long)(reg->start_pfn + reg->nr_pages) << PAGE_SHIFT); - rb_erase(&(reg->rblink), reg_rbtree); - - goto out; - } - rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); - } - - /* This operation is always safe because the function never frees - * the region. If the region has been merged to both front and back, - * then it's the previous region that is supposed to be freed. - */ - orig_reg->start_pfn = 0; - -out: - return; -} - -KBASE_EXPORT_TEST_API(kbase_remove_va_region); - -/** - * kbase_insert_va_region_nolock - Insert a VA region to the list, - * replacing the existing one. - * - * @kbdev: The kbase device - * @new_reg: The new region to insert - * @at_reg: The region to replace - * @start_pfn: The Page Frame Number to insert at - * @nr_pages: The number of pages of the region - * - * Return: 0 on success, error code otherwise. - */ -static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, - struct kbase_va_region *new_reg, - struct kbase_va_region *at_reg, u64 start_pfn, - size_t nr_pages) -{ - struct rb_root *reg_rbtree = NULL; - int err = 0; - - reg_rbtree = at_reg->rbtree; - - /* Must be a free region */ - KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); - /* start_pfn should be contained within at_reg */ - KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); - /* at least nr_pages from start_pfn should be contained within at_reg */ - KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); - /* having at_reg means the rb_tree should not be empty */ - if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) - return -ENOMEM; - - new_reg->start_pfn = start_pfn; - new_reg->nr_pages = nr_pages; - - /* Regions are a whole use, so swap and delete old one. */ - if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { - rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), - reg_rbtree); - kfree(at_reg); - } - /* New region replaces the start of the old one, so insert before. */ - else if (at_reg->start_pfn == start_pfn) { - at_reg->start_pfn += nr_pages; - KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); - at_reg->nr_pages -= nr_pages; - - kbase_region_tracker_insert(new_reg); - } - /* New region replaces the end of the old one, so insert after. */ - else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { - at_reg->nr_pages -= nr_pages; - - kbase_region_tracker_insert(new_reg); - } - /* New region splits the old one, so insert and create new */ - else { - struct kbase_va_region *new_front_reg; - - new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn, - start_pfn - at_reg->start_pfn, - at_reg->flags & KBASE_REG_ZONE_MASK); - - if (new_front_reg) { - at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; - at_reg->start_pfn = start_pfn + nr_pages; - - kbase_region_tracker_insert(new_front_reg); - kbase_region_tracker_insert(new_reg); - } else { - err = -ENOMEM; - } - } - - return err; -} - -/** - * kbase_add_va_region - Add a VA region to the region list for a context. - * - * @kctx: kbase context containing the region - * @reg: the region to add - * @addr: the address to insert the region at - * @nr_pages: the number of pages in the region - * @align: the minimum alignment in pages - * - * Return: 0 on success, error code otherwise. - */ -int kbase_add_va_region(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 addr, - size_t nr_pages, size_t align) -{ - int err = 0; - struct kbase_device *kbdev = kctx->kbdev; - int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx); - int gpu_pc_bits = - kbdev->gpu_props.props.core_props.log2_program_counter_size; - - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(reg != NULL); - - lockdep_assert_held(&kctx->reg_lock); - - /* The executable allocation from the SAME_VA zone should already have an - * appropriately aligned GPU VA chosen for it. - * Also, executable allocations from EXEC_VA don't need the special - * alignment. - */ -#if MALI_USE_CSF - /* The same is also true for the EXEC_FIXED_VA zone. - */ -#endif - if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && -#if MALI_USE_CSF - ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) && -#endif - ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) { - if (cpu_va_bits > gpu_pc_bits) { - align = max(align, (size_t)((1ULL << gpu_pc_bits) - >> PAGE_SHIFT)); - } - } - - do { - err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages, - align); - if (err != -ENOMEM) - break; - - /* - * If the allocation is not from the same zone as JIT - * then don't retry, we're out of VA and there is - * nothing which can be done about it. - */ - if ((reg->flags & KBASE_REG_ZONE_MASK) != - KBASE_REG_ZONE_CUSTOM_VA) - break; - } while (kbase_jit_evict(kctx)); - - return err; -} - -KBASE_EXPORT_TEST_API(kbase_add_va_region); - -/** - * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree - * - * @kbdev: The kbase device - * @reg: The region to add - * @addr: The address to add the region at, or 0 to map at any available address - * @nr_pages: The size of the region in pages - * @align: The minimum alignment in pages - * - * Insert a region into the rbtree that was specified when the region was - * created. If addr is 0 a free area in the rbtree is used, otherwise the - * specified address is used. - * - * Return: 0 on success, error code otherwise. - */ -int kbase_add_va_region_rbtree(struct kbase_device *kbdev, - struct kbase_va_region *reg, - u64 addr, size_t nr_pages, size_t align) -{ - struct device *const dev = kbdev->dev; - struct rb_root *rbtree = NULL; - struct kbase_va_region *tmp; - u64 gpu_pfn = addr >> PAGE_SHIFT; - int err = 0; - - rbtree = reg->rbtree; - - if (!align) - align = 1; - - /* must be a power of 2 */ - KBASE_DEBUG_ASSERT(is_power_of_2(align)); - KBASE_DEBUG_ASSERT(nr_pages > 0); - - /* Path 1: Map a specific address. Find the enclosing region, - * which *must* be free. - */ - if (gpu_pfn) { - KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); - - tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn, - nr_pages); - if (kbase_is_region_invalid(tmp)) { - dev_warn(dev, "Enclosing region not found or invalid: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); - err = -ENOMEM; - goto exit; - } else if (!kbase_is_region_free(tmp)) { - dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", - tmp->start_pfn, tmp->flags, - tmp->nr_pages, gpu_pfn, nr_pages); - err = -ENOMEM; - goto exit; - } - - err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages); - if (err) { - dev_warn(dev, "Failed to insert va region"); - err = -ENOMEM; - } - } else { - /* Path 2: Map any free address which meets the requirements. */ - u64 start_pfn; - size_t align_offset = align; - size_t align_mask = align - 1; - -#if !MALI_USE_CSF - if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { - WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", - __func__, - (unsigned long)align); - align_mask = reg->extension - 1; - align_offset = reg->extension - reg->initial_commit; - } -#endif /* !MALI_USE_CSF */ - - tmp = kbase_region_tracker_find_region_meeting_reqs(reg, - nr_pages, align_offset, align_mask, - &start_pfn); - if (tmp) { - err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages); - if (unlikely(err)) { - dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages", - start_pfn, nr_pages); - } - } else { - dev_dbg(dev, "Failed to find a suitable region: %zu nr_pages, %zu align_offset, %zu align_mask\n", - nr_pages, align_offset, align_mask); - err = -ENOMEM; - } - } - -exit: - return err; -} - /* - * @brief Initialize the internal region tracker data structure. + * kbase_large_page_state - flag indicating kbase handling of large pages + * @LARGE_PAGE_AUTO: large pages get selected if the GPU hardware supports them + * @LARGE_PAGE_ON: large pages get selected regardless of GPU support + * @LARGE_PAGE_OFF: large pages get disabled regardless of GPU support */ -#if MALI_USE_CSF -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, - struct kbase_va_region *same_va_reg, - struct kbase_va_region *custom_va_reg, - struct kbase_va_region *exec_va_reg, - struct kbase_va_region *exec_fixed_va_reg, - struct kbase_va_region *fixed_va_reg) +enum kbase_large_page_state { LARGE_PAGE_AUTO, LARGE_PAGE_ON, LARGE_PAGE_OFF, LARGE_PAGE_MAX }; + +static enum kbase_large_page_state large_page_conf = + IS_ENABLED(CONFIG_LARGE_PAGE_SUPPORT) ? LARGE_PAGE_AUTO : LARGE_PAGE_OFF; + +static int set_large_page_conf(const char *val, const struct kernel_param *kp) { - u64 last_zone_end_pfn; + char *user_input = strstrip((char *)val); - kctx->reg_rbtree_same = RB_ROOT; - kbase_region_tracker_insert(same_va_reg); + if (!IS_ENABLED(CONFIG_LARGE_PAGE_SUPPORT)) + return 0; - last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages; + if (!strcmp(user_input, "auto")) + large_page_conf = LARGE_PAGE_AUTO; + else if (!strcmp(user_input, "on")) + large_page_conf = LARGE_PAGE_ON; + else if (!strcmp(user_input, "off")) + large_page_conf = LARGE_PAGE_OFF; - /* Although custom_va_reg doesn't always exist, initialize - * unconditionally because of the mem_view debugfs - * implementation which relies on it being empty. - */ - kctx->reg_rbtree_custom = RB_ROOT; - kctx->reg_rbtree_exec = RB_ROOT; - - if (custom_va_reg) { - WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(custom_va_reg); - last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages; - } - - /* Initialize exec, fixed and exec_fixed. These are always - * initialized at this stage, if they will exist at all. - */ - kctx->reg_rbtree_fixed = RB_ROOT; - kctx->reg_rbtree_exec_fixed = RB_ROOT; - - if (exec_va_reg) { - WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(exec_va_reg); - last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages; - } - - if (exec_fixed_va_reg) { - WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(exec_fixed_va_reg); - last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages; - } - - if (fixed_va_reg) { - WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(fixed_va_reg); - last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages; - } -} -#else -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, - struct kbase_va_region *same_va_reg, - struct kbase_va_region *custom_va_reg) -{ - kctx->reg_rbtree_same = RB_ROOT; - kbase_region_tracker_insert(same_va_reg); - - /* Although custom_va_reg and exec_va_reg don't always exist, - * initialize unconditionally because of the mem_view debugfs - * implementation which relies on them being empty. - * - * The difference between the two is that the EXEC_VA region - * is never initialized at this stage. - */ - kctx->reg_rbtree_custom = RB_ROOT; - kctx->reg_rbtree_exec = RB_ROOT; - - if (custom_va_reg) - kbase_region_tracker_insert(custom_va_reg); -} -#endif /* MALI_USE_CSF */ - -static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg) -{ - struct kbase_context *kctx = NULL; - struct rb_root *rbtree = reg->rbtree; - - switch (reg->flags & KBASE_REG_ZONE_MASK) { - case KBASE_REG_ZONE_CUSTOM_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom); - break; - case KBASE_REG_ZONE_SAME_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); - break; - case KBASE_REG_ZONE_EXEC_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec); - break; -#if MALI_USE_CSF - case KBASE_REG_ZONE_EXEC_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); - break; - case KBASE_REG_ZONE_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); - break; - case KBASE_REG_ZONE_MCU_SHARED: - /* This is only expected to be called on driver unload. */ - break; -#endif - default: - WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); - break; - } - - return kctx; -} - -static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) -{ - struct rb_node *rbnode; - struct kbase_va_region *reg; - - do { - rbnode = rb_first(rbtree); - if (rbnode) { - rb_erase(rbnode, rbtree); - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - WARN_ON(kbase_refcount_read(®->va_refcnt) != 1); - if (kbase_page_migration_enabled) - kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); - /* Reset the start_pfn - as the rbtree is being - * destroyed and we've already erased this region, there - * is no further need to attempt to remove it. - * This won't affect the cleanup if the region was - * being used as a sticky resource as the cleanup - * related to sticky resources anyways need to be - * performed before the term of region tracker. - */ - reg->start_pfn = 0; - kbase_free_alloced_region(reg); - } - } while (rbnode); -} - -void kbase_region_tracker_term(struct kbase_context *kctx) -{ - WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, - "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", - kctx->tgid, kctx->id); - - kbase_gpu_vm_lock(kctx); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); -#if MALI_USE_CSF - WARN_ON(!list_empty(&kctx->csf.event_pages_head)); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed); - -#endif - kbase_gpu_vm_unlock(kctx); -} - -void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) -{ - kbase_region_tracker_erase_rbtree(rbtree); -} - -static size_t kbase_get_same_va_bits(struct kbase_context *kctx) -{ - return min_t(size_t, kbase_get_num_cpu_va_bits(kctx), - kctx->kbdev->gpu_props.mmu.va_bits); -} - -int kbase_region_tracker_init(struct kbase_context *kctx) -{ - struct kbase_va_region *same_va_reg; - struct kbase_va_region *custom_va_reg = NULL; - size_t same_va_bits = kbase_get_same_va_bits(kctx); - u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; - u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits; - u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT; - u64 same_va_pages; - u64 same_va_base = 1u; - int err; -#if MALI_USE_CSF - struct kbase_va_region *exec_va_reg; - struct kbase_va_region *exec_fixed_va_reg; - struct kbase_va_region *fixed_va_reg; - - u64 exec_va_base; - u64 fixed_va_end; - u64 exec_fixed_va_base; - u64 fixed_va_base; - u64 fixed_va_pages; -#endif - - /* Take the lock as kbase_free_alloced_region requires it */ - kbase_gpu_vm_lock(kctx); - - same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base; - -#if MALI_USE_CSF - if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) { - /* Depending on how the kernel is configured, it's possible (eg on aarch64) for - * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone - * doesn't cross into the exec_va zone. - */ - same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base; - } -#endif - - /* all have SAME_VA */ - same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base, - same_va_pages, KBASE_REG_ZONE_SAME_VA); - - if (!same_va_reg) { - err = -ENOMEM; - goto fail_unlock; - } - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, - same_va_pages); - - if (kbase_ctx_compat_mode(kctx)) { - if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { - err = -EINVAL; - goto fail_free_same_va; - } - /* If the current size of TMEM is out of range of the - * virtual address space addressable by the MMU then - * we should shrink it to fit - */ - if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) - custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; - - custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, - KBASE_REG_ZONE_CUSTOM_VA_BASE, - custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); - - if (!custom_va_reg) { - err = -ENOMEM; - goto fail_free_same_va; - } - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, - KBASE_REG_ZONE_CUSTOM_VA_BASE, - custom_va_size); - } else { - custom_va_size = 0; - } - -#if MALI_USE_CSF - /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */ - exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64; - - /* Similarly the end of the FIXED_VA zone also depends on whether the client - * is 32 or 64-bits. - */ - fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; - - if (kbase_ctx_compat_mode(kctx)) { - exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32; - fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; - } - - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base, - KBASE_REG_ZONE_EXEC_VA_SIZE); - - exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base, - KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA); - - if (!exec_va_reg) { - err = -ENOMEM; - goto fail_free_custom_va; - } - - exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE; - - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base, - KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); - - exec_fixed_va_reg = - kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed, - exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE, - KBASE_REG_ZONE_EXEC_FIXED_VA); - - if (!exec_fixed_va_reg) { - err = -ENOMEM; - goto fail_free_exec_va; - } - - fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE; - fixed_va_pages = fixed_va_end - fixed_va_base; - - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages); - - fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base, - fixed_va_pages, KBASE_REG_ZONE_FIXED_VA); - - kctx->gpu_va_end = fixed_va_end; - - if (!fixed_va_reg) { - err = -ENOMEM; - goto fail_free_exec_fixed_va; - } - - kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg, - exec_fixed_va_reg, fixed_va_reg); - - INIT_LIST_HEAD(&kctx->csf.event_pages_head); -#else - /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is - * initially U64_MAX - */ - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u); - /* Other zones are 0: kbase_create_context() uses vzalloc */ - - kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); - kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size; -#endif - kctx->jit_va = false; - - kbase_gpu_vm_unlock(kctx); return 0; - -#if MALI_USE_CSF -fail_free_exec_fixed_va: - kbase_free_alloced_region(exec_fixed_va_reg); -fail_free_exec_va: - kbase_free_alloced_region(exec_va_reg); -fail_free_custom_va: - if (custom_va_reg) - kbase_free_alloced_region(custom_va_reg); -#endif - -fail_free_same_va: - kbase_free_alloced_region(same_va_reg); -fail_unlock: - kbase_gpu_vm_unlock(kctx); - return err; } -static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx) +static int get_large_page_conf(char *buffer, const struct kernel_param *kp) { - struct kbase_reg_zone *exec_va_zone; + char *out; - lockdep_assert_held(&kctx->reg_lock); - exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); + switch (large_page_conf) { + case LARGE_PAGE_AUTO: + out = "auto"; + break; + case LARGE_PAGE_ON: + out = "on"; + break; + case LARGE_PAGE_OFF: + out = "off"; + break; + default: + out = "default"; + break; + } - return (exec_va_zone->base_pfn != U64_MAX); + return sprintf(buffer, "%s\n", out); } -bool kbase_has_exec_va_zone(struct kbase_context *kctx) -{ - bool has_exec_va_zone; +static const struct kernel_param_ops large_page_config_params = { + .set = set_large_page_conf, + .get = get_large_page_conf, +}; - kbase_gpu_vm_lock(kctx); - has_exec_va_zone = kbase_has_exec_va_zone_locked(kctx); - kbase_gpu_vm_unlock(kctx); - - return has_exec_va_zone; -} +module_param_cb(large_page_conf, &large_page_config_params, NULL, 0444); +__MODULE_PARM_TYPE(large_page_conf, "charp"); +MODULE_PARM_DESC(large_page_conf, "User override for large page usage on supporting platforms."); /** - * kbase_region_tracker_has_allocs - Determine if any allocations have been made - * on a context's region tracker + * kbasep_mem_page_size_init - Initialize kbase device for 2MB page. + * @kbdev: Pointer to the device. * - * @kctx: KBase context - * - * Check the context to determine if any allocations have been made yet from - * any of its zones. This check should be done before resizing a zone, e.g. to - * make space to add a second zone. - * - * Whilst a zone without allocations can be resized whilst other zones have - * allocations, we still check all of @kctx 's zones anyway: this is a stronger - * guarantee and should be adhered to when creating new zones anyway. - * - * Allocations from kbdev zones are not counted. - * - * Return: true if any allocs exist on any zone, false otherwise + * This function must be called only when a kbase device is initialized. */ -static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) -{ - unsigned int zone_idx; - - lockdep_assert_held(&kctx->reg_lock); - - for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) { - struct kbase_reg_zone *zone; - struct kbase_va_region *reg; - u64 zone_base_addr; - unsigned long zone_bits = KBASE_REG_ZONE(zone_idx); - unsigned long reg_zone; - - if (!kbase_is_ctx_reg_zone(zone_bits)) - continue; - zone = kbase_ctx_reg_zone_get(kctx, zone_bits); - zone_base_addr = zone->base_pfn << PAGE_SHIFT; - - reg = kbase_region_tracker_find_region_base_address( - kctx, zone_base_addr); - - if (!zone->va_size_pages) { - WARN(reg, - "Should not have found a region that starts at 0x%.16llx for zone 0x%lx", - (unsigned long long)zone_base_addr, zone_bits); - continue; - } - - if (WARN(!reg, - "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it", - (unsigned long long)zone_base_addr, zone_bits)) - return true; /* Safest return value */ - - reg_zone = reg->flags & KBASE_REG_ZONE_MASK; - if (WARN(reg_zone != zone_bits, - "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx", - (unsigned long long)zone_base_addr, zone_bits, - reg_zone)) - return true; /* Safest return value */ - - /* Unless the region is completely free, of the same size as - * the original zone, then it has allocs - */ - if ((!(reg->flags & KBASE_REG_FREE)) || - (reg->nr_pages != zone->va_size_pages)) - return true; - } - - /* All zones are the same size as originally made, so there are no - * allocs - */ - return false; -} - -static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, - u64 jit_va_pages) -{ - struct kbase_va_region *same_va_reg; - struct kbase_reg_zone *same_va_zone; - u64 same_va_zone_base_addr; - const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA; - struct kbase_va_region *custom_va_reg; - u64 jit_va_start; - - lockdep_assert_held(&kctx->reg_lock); - - /* - * Modify the same VA free region after creation. The caller has - * ensured that allocations haven't been made, as any allocations could - * cause an overlap to happen with existing same VA allocations and the - * custom VA zone. - */ - same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits); - same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT; - - same_va_reg = kbase_region_tracker_find_region_base_address( - kctx, same_va_zone_base_addr); - if (WARN(!same_va_reg, - "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", - (unsigned long long)same_va_zone_base_addr, same_va_zone_bits)) - return -ENOMEM; - - /* kbase_region_tracker_has_allocs() in the caller has already ensured - * that all of the zones have no allocs, so no need to check that again - * on same_va_reg - */ - WARN_ON((!(same_va_reg->flags & KBASE_REG_FREE)) || - same_va_reg->nr_pages != same_va_zone->va_size_pages); - - if (same_va_reg->nr_pages < jit_va_pages || - same_va_zone->va_size_pages < jit_va_pages) - return -ENOMEM; - - /* It's safe to adjust the same VA zone now */ - same_va_reg->nr_pages -= jit_va_pages; - same_va_zone->va_size_pages -= jit_va_pages; - jit_va_start = kbase_reg_zone_end_pfn(same_va_zone); - - /* - * Create a custom VA zone at the end of the VA for allocations which - * JIT can use so it doesn't have to allocate VA from the kernel. - */ - custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start, - jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); - - /* - * The context will be destroyed if we fail here so no point - * reverting the change we made to same_va. - */ - if (!custom_va_reg) - return -ENOMEM; - /* Since this is 64-bit, the custom zone will not have been - * initialized, so initialize it now - */ - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start, - jit_va_pages); - - kbase_region_tracker_insert(custom_va_reg); - return 0; -} - -int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, - int max_allocations, int trim_level, int group_id, - u64 phys_pages_limit) -{ - int err = 0; - - if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL) - return -EINVAL; - - if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) - return -EINVAL; - - if (phys_pages_limit > jit_va_pages) - return -EINVAL; - -#if MALI_JIT_PRESSURE_LIMIT_BASE - if (phys_pages_limit != jit_va_pages) - kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED); -#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ - - kbase_gpu_vm_lock(kctx); - - /* Verify that a JIT_VA zone has not been created already. */ - if (kctx->jit_va) { - err = -EINVAL; - goto exit_unlock; - } - - /* If in 64-bit, we always lookup the SAME_VA zone. To ensure it has no - * allocs, we can ensure there are no allocs anywhere. - * - * This check is also useful in 32-bit, just to make sure init of the - * zone is always done before any allocs. - */ - if (kbase_region_tracker_has_allocs(kctx)) { - err = -ENOMEM; - goto exit_unlock; - } - - if (!kbase_ctx_compat_mode(kctx)) - err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); - /* - * Nothing to do for 32-bit clients, JIT uses the existing - * custom VA zone. - */ - - if (!err) { - kctx->jit_max_allocations = max_allocations; - kctx->trim_level = trim_level; - kctx->jit_va = true; - kctx->jit_group_id = group_id; -#if MALI_JIT_PRESSURE_LIMIT_BASE - kctx->jit_phys_pages_limit = phys_pages_limit; - dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", - phys_pages_limit); -#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ - } - -exit_unlock: - kbase_gpu_vm_unlock(kctx); - - return err; -} - -int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) -{ -#if !MALI_USE_CSF - struct kbase_va_region *exec_va_reg; - struct kbase_reg_zone *exec_va_zone; - struct kbase_reg_zone *target_zone; - struct kbase_va_region *target_reg; - u64 target_zone_base_addr; - unsigned long target_zone_bits; - u64 exec_va_start; - int err; -#endif - - /* The EXEC_VA zone shall be created by making space either: - * - for 64-bit clients, at the end of the process's address space - * - for 32-bit clients, in the CUSTOM zone - * - * Firstly, verify that the number of EXEC_VA pages requested by the - * client is reasonable and then make sure that it is not greater than - * the address space itself before calculating the base address of the - * new zone. - */ - if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) - return -EINVAL; - -#if MALI_USE_CSF - /* For CSF GPUs we now setup the EXEC_VA zone during initialization, - * so this request is a null-op. - */ - return 0; -#else - kbase_gpu_vm_lock(kctx); - - /* Verify that we've not already created a EXEC_VA zone, and that the - * EXEC_VA zone must come before JIT's CUSTOM_VA. - */ - if (kbase_has_exec_va_zone_locked(kctx) || kctx->jit_va) { - err = -EPERM; - goto exit_unlock; - } - - if (exec_va_pages > kctx->gpu_va_end) { - err = -ENOMEM; - goto exit_unlock; - } - - /* Verify no allocations have already been made */ - if (kbase_region_tracker_has_allocs(kctx)) { - err = -ENOMEM; - goto exit_unlock; - } - - if (kbase_ctx_compat_mode(kctx)) { - /* 32-bit client: take from CUSTOM_VA zone */ - target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; - } else { - /* 64-bit client: take from SAME_VA zone */ - target_zone_bits = KBASE_REG_ZONE_SAME_VA; - } - - target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); - target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; - - target_reg = kbase_region_tracker_find_region_base_address( - kctx, target_zone_base_addr); - if (WARN(!target_reg, - "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", - (unsigned long long)target_zone_base_addr, target_zone_bits)) { - err = -ENOMEM; - goto exit_unlock; - } - /* kbase_region_tracker_has_allocs() above has already ensured that all - * of the zones have no allocs, so no need to check that again on - * target_reg - */ - WARN_ON((!(target_reg->flags & KBASE_REG_FREE)) || - target_reg->nr_pages != target_zone->va_size_pages); - - if (target_reg->nr_pages <= exec_va_pages || - target_zone->va_size_pages <= exec_va_pages) { - err = -ENOMEM; - goto exit_unlock; - } - - /* Taken from the end of the target zone */ - exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; - - exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start, - exec_va_pages, KBASE_REG_ZONE_EXEC_VA); - if (!exec_va_reg) { - err = -ENOMEM; - goto exit_unlock; - } - /* Update EXEC_VA zone - * - * not using kbase_ctx_reg_zone_init() - it was already initialized - */ - exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); - exec_va_zone->base_pfn = exec_va_start; - exec_va_zone->va_size_pages = exec_va_pages; - - /* Update target zone and corresponding region */ - target_reg->nr_pages -= exec_va_pages; - target_zone->va_size_pages -= exec_va_pages; - - kbase_region_tracker_insert(exec_va_reg); - err = 0; - -exit_unlock: - kbase_gpu_vm_unlock(kctx); - return err; -#endif /* MALI_USE_CSF */ -} - -#if MALI_USE_CSF -void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev) -{ - kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree); -} - -int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) -{ - struct kbase_va_region *shared_reg; - u64 shared_reg_start_pfn; - u64 shared_reg_size; - - shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE; - shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE; - - kbdev->csf.shared_reg_rbtree = RB_ROOT; - - shared_reg = - kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn, - shared_reg_size, KBASE_REG_ZONE_MCU_SHARED); - if (!shared_reg) - return -ENOMEM; - - kbase_region_tracker_insert(shared_reg); - return 0; -} -#endif - static void kbasep_mem_page_size_init(struct kbase_device *kbdev) { -#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) -#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) - kbdev->pagesize_2mb = true; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) { - dev_warn( - kbdev->dev, - "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n"); + if (!IS_ENABLED(CONFIG_LARGE_PAGE_SUPPORT)) { + kbdev->pagesize_2mb = false; + dev_info(kbdev->dev, "Large page support was disabled at compile-time!"); + return; } -#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ - kbdev->pagesize_2mb = false; -#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ -#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ - /* Set it to the default based on which GPU is present */ - kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC); -#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ + + switch (large_page_conf) { + case LARGE_PAGE_AUTO: { + kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC); + dev_info(kbdev->dev, "Large page allocation set to %s after hardware feature check", + kbdev->pagesize_2mb ? "true" : "false"); + break; + } + case LARGE_PAGE_ON: { + kbdev->pagesize_2mb = true; + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC)) + dev_warn(kbdev->dev, + "Enabling large page allocations on unsupporting GPU!"); + else + dev_info(kbdev->dev, "Large page allocation override: turned on\n"); + break; + } + case LARGE_PAGE_OFF: { + kbdev->pagesize_2mb = false; + dev_info(kbdev->dev, "Large page allocation override: turned off\n"); + break; + } + default: { + kbdev->pagesize_2mb = false; + dev_info(kbdev->dev, "Invalid large page override, turning off large pages\n"); + break; + } + } + + /* We want the final state of the setup to be reflected in the module parameter, + * so that userspace could read it to figure out the state of the configuration + * if necessary. + */ + if (kbdev->pagesize_2mb) + large_page_conf = LARGE_PAGE_ON; + else + large_page_conf = LARGE_PAGE_OFF; } int kbase_mem_init(struct kbase_device *kbdev) @@ -1477,13 +214,9 @@ int kbase_mem_init(struct kbase_device *kbdev) kbase_mem_migrate_init(kbdev); kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, - KBASE_MEM_POOL_MAX_SIZE_KCTX); - - /* Initialize memory usage */ - atomic_set(&memdev->used_pages, 0); + KBASE_MEM_POOL_MAX_SIZE_KCTX); spin_lock_init(&kbdev->gpu_mem_usage_lock); - kbdev->total_gpu_pages = 0; kbdev->process_root = RB_ROOT; kbdev->dma_buf_root = RB_ROOT; mutex_init(&kbdev->dma_buf_lock); @@ -1500,32 +233,25 @@ int kbase_mem_init(struct kbase_device *kbdev) /* Check to see whether or not a platform-specific memory group manager * is configured and available. */ - mgm_node = of_parse_phandle(kbdev->dev->of_node, - "physical-memory-group-manager", 0); + mgm_node = of_parse_phandle(kbdev->dev->of_node, "physical-memory-group-manager", 0); if (!mgm_node) { - dev_info(kbdev->dev, - "No memory group manager is configured\n"); + dev_info(kbdev->dev, "No memory group manager is configured\n"); } else { - struct platform_device *const pdev = - of_find_device_by_node(mgm_node); + struct platform_device *const pdev = of_find_device_by_node(mgm_node); if (!pdev) { - dev_err(kbdev->dev, - "The configured memory group manager was not found\n"); + dev_err(kbdev->dev, "The configured memory group manager was not found\n"); } else { kbdev->mgm_dev = platform_get_drvdata(pdev); if (!kbdev->mgm_dev) { - dev_info(kbdev->dev, - "Memory group manager is not ready\n"); + dev_info(kbdev->dev, "Memory group manager is not ready\n"); err = -EPROBE_DEFER; } else if (!try_module_get(kbdev->mgm_dev->owner)) { - dev_err(kbdev->dev, - "Failed to get memory group manger module\n"); + dev_err(kbdev->dev, "Failed to get memory group manger module\n"); err = -ENODEV; kbdev->mgm_dev = NULL; } else { - dev_info(kbdev->dev, - "Memory group manager successfully loaded\n"); + dev_info(kbdev->dev, "Memory group manager successfully loaded\n"); } } of_node_put(mgm_node); @@ -1536,7 +262,7 @@ int kbase_mem_init(struct kbase_device *kbdev) struct kbase_mem_pool_group_config mem_pool_defaults; kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults, - KBASE_MEM_POOL_MAX_SIZE_KBDEV); + KBASE_MEM_POOL_MAX_SIZE_KBDEV); err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL); } @@ -1579,165 +305,8 @@ void kbase_mem_term(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_mem_term); -/** - * kbase_alloc_free_region - Allocate a free region object. - * - * @kbdev: kbase device - * @rbtree: Backlink to the red-black tree of memory regions. - * @start_pfn: The Page Frame Number in GPU virtual address space. - * @nr_pages: The size of the region in pages. - * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA - * - * The allocated object is not part of any list yet, and is flagged as - * KBASE_REG_FREE. No mapping is allocated yet. - * - * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. - * - * Return: pointer to the allocated region object on success, NULL otherwise. - */ -struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, - u64 start_pfn, size_t nr_pages, int zone) -{ - struct kbase_va_region *new_reg; - - KBASE_DEBUG_ASSERT(rbtree != NULL); - - /* zone argument should only contain zone related region flags */ - KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); - KBASE_DEBUG_ASSERT(nr_pages > 0); - /* 64-bit address range is the max */ - KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); - - new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL); - - if (!new_reg) - return NULL; - - kbase_refcount_set(&new_reg->va_refcnt, 1); - atomic_set(&new_reg->no_user_free_count, 0); - new_reg->cpu_alloc = NULL; /* no alloc bound yet */ - new_reg->gpu_alloc = NULL; /* no alloc bound yet */ - new_reg->rbtree = rbtree; - new_reg->flags = zone | KBASE_REG_FREE; - - new_reg->flags |= KBASE_REG_GROWABLE; - - new_reg->start_pfn = start_pfn; - new_reg->nr_pages = nr_pages; - - INIT_LIST_HEAD(&new_reg->jit_node); - INIT_LIST_HEAD(&new_reg->link); - - return new_reg; -} - -KBASE_EXPORT_TEST_API(kbase_alloc_free_region); - -/** - * kbase_free_alloced_region - Free a region object. - * - * @reg: Region - * - * The described region must be freed of any mapping. - * - * If the region is not flagged as KBASE_REG_FREE, the region's - * alloc object will be released. - * It is a bug if no alloc object exists for non-free regions. - * - * If region is KBASE_REG_ZONE_MCU_SHARED it is freed - */ -void kbase_free_alloced_region(struct kbase_va_region *reg) -{ -#if MALI_USE_CSF - if ((reg->flags & KBASE_REG_ZONE_MASK) == - KBASE_REG_ZONE_MCU_SHARED) { - kfree(reg); - return; - } -#endif - if (!(reg->flags & KBASE_REG_FREE)) { - struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); - - if (WARN_ON(!kctx)) - return; - - if (WARN_ON(kbase_is_region_invalid(reg))) - return; - - dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n", - (void *)reg); -#if MALI_USE_CSF - if (reg->flags & KBASE_REG_CSF_EVENT) - /* - * This should not be reachable if called from 'mcu_shared' functions - * such as: - * kbase_csf_firmware_mcu_shared_mapping_init - * kbase_csf_firmware_mcu_shared_mapping_term - */ - - kbase_unlink_event_mem_page(kctx, reg); -#endif - - mutex_lock(&kctx->jit_evict_lock); - - /* - * The physical allocation should have been removed from the - * eviction list before this function is called. However, in the - * case of abnormal process termination or the app leaking the - * memory kbase_mem_free_region is not called so it can still be - * on the list at termination time of the region tracker. - */ - if (!list_empty(®->gpu_alloc->evict_node)) { - /* - * Unlink the physical allocation before unmaking it - * evictable so that the allocation isn't grown back to - * its last backed size as we're going to unmap it - * anyway. - */ - reg->cpu_alloc->reg = NULL; - if (reg->cpu_alloc != reg->gpu_alloc) - reg->gpu_alloc->reg = NULL; - - mutex_unlock(&kctx->jit_evict_lock); - - /* - * If a region has been made evictable then we must - * unmake it before trying to free it. - * If the memory hasn't been reclaimed it will be - * unmapped and freed below, if it has been reclaimed - * then the operations below are no-ops. - */ - if (reg->flags & KBASE_REG_DONT_NEED) { - KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == - KBASE_MEM_TYPE_NATIVE); - kbase_mem_evictable_unmake(reg->gpu_alloc); - } - } else { - mutex_unlock(&kctx->jit_evict_lock); - } - - /* - * Remove the region from the sticky resource metadata - * list should it be there. - */ - kbase_sticky_resource_release_force(kctx, NULL, - reg->start_pfn << PAGE_SHIFT); - - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); - - reg->flags |= KBASE_REG_VA_FREED; - kbase_va_region_alloc_put(kctx, reg); - } else { - kfree(reg); - } -} - -KBASE_EXPORT_TEST_API(kbase_free_alloced_region); - -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, - u64 addr, size_t nr_pages, size_t align, - enum kbase_caller_mmu_sync_info mmu_sync_info) +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, + size_t nr_pages, size_t align, enum kbase_caller_mmu_sync_info mmu_sync_info) { int err; size_t i = 0; @@ -1752,11 +321,10 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, gwt_mask = ~KBASE_REG_GPU_WR; #endif - if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && - (reg->flags & KBASE_REG_SHARE_BOTH)) - attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); + if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && (reg->flags & KBASE_REG_SHARE_BOTH)) + attr = KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_OUTER_WA); else - attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); + attr = KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_WRITE_ALLOC); KBASE_DEBUG_ASSERT(kctx != NULL); KBASE_DEBUG_ASSERT(reg != NULL); @@ -1799,10 +367,34 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, } } } else { - if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || - reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { - - err = kbase_mmu_insert_imported_pages( + /* Imported user buffers have dedicated state transitions. + * The intended outcome is still the same: creating a GPU mapping, + * but only if the user buffer has already advanced to the expected + * state and has acquired enough resources. + */ + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { + /* The region is always supposed to be EMPTY at this stage. + * If the region is coherent with the CPU then all resources are + * acquired, including physical pages and DMA addresses, and a + * GPU mapping is created. + */ + switch (alloc->imported.user_buf.state) { + case KBASE_USER_BUF_STATE_EMPTY: { + if (reg->flags & KBASE_REG_SHARE_BOTH) { + err = kbase_user_buf_from_empty_to_gpu_mapped(kctx, reg); + reg->gpu_alloc->imported.user_buf + .current_mapping_usage_count++; + } + break; + } + default: { + WARN(1, "Unexpected state %d for imported user buffer\n", + alloc->imported.user_buf.state); + break; + } + } + } else if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + err = kbase_mmu_insert_pages_skip_status_update( kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg); @@ -1811,7 +403,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, kctx->as_nr, group_id, - mmu_sync_info, reg, true); + mmu_sync_info, reg); } if (err) @@ -1819,8 +411,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, kbase_mem_phy_alloc_gpu_mapped(alloc); } - if (reg->flags & KBASE_REG_IMPORT_PAD && - !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) && + if (reg->flags & KBASE_REG_IMPORT_PAD && !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) && reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM && reg->gpu_alloc->imported.umm.current_mapping_usage_count) { /* For padded imported dma-buf or user-buf memory, map the dummy @@ -1855,8 +446,7 @@ bad_aliased_insert: kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), phys_alloc, alloc->imported.alias.aliased[i].length, - alloc->imported.alias.aliased[i].length, kctx->as_nr, - false); + alloc->imported.alias.aliased[i].length, kctx->as_nr); } bad_insert: kbase_remove_va_region(kctx->kbdev, reg); @@ -1866,8 +456,7 @@ bad_insert: KBASE_EXPORT_TEST_API(kbase_gpu_mmap); -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, - struct kbase_va_region *reg, bool writeable); +static void kbase_user_buf_unmap(struct kbase_context *kctx, struct kbase_va_region *reg); int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { @@ -1885,37 +474,34 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) /* Tear down GPU page tables, depending on memory type. */ switch (alloc->type) { case KBASE_MEM_TYPE_ALIAS: { - size_t i = 0; - /* Due to the way the number of valid PTEs and ATEs are tracked + size_t i = 0; + /* Due to the way the number of valid PTEs and ATEs are tracked * currently, only the GPU virtual range that is backed & mapped - * should be passed to the kbase_mmu_teardown_pages() function, - * hence individual aliased regions needs to be unmapped - * separately. + * should be passed to the page teardown function, hence individual + * aliased regions needs to be unmapped separately. */ - for (i = 0; i < alloc->imported.alias.nents; i++) { - struct tagged_addr *phys_alloc = NULL; - int err_loop; + for (i = 0; i < alloc->imported.alias.nents; i++) { + struct tagged_addr *phys_alloc = NULL; + int err_loop; - if (alloc->imported.alias.aliased[i].alloc != NULL) - phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + - alloc->imported.alias.aliased[i].offset; + if (alloc->imported.alias.aliased[i].alloc != NULL) + phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + + alloc->imported.alias.aliased[i].offset; - err_loop = kbase_mmu_teardown_pages( - kctx->kbdev, &kctx->mmu, - reg->start_pfn + (i * alloc->imported.alias.stride), - phys_alloc, alloc->imported.alias.aliased[i].length, - alloc->imported.alias.aliased[i].length, kctx->as_nr, - false); + err_loop = kbase_mmu_teardown_pages( + kctx->kbdev, &kctx->mmu, + reg->start_pfn + (i * alloc->imported.alias.stride), phys_alloc, + alloc->imported.alias.aliased[i].length, + alloc->imported.alias.aliased[i].length, kctx->as_nr); - if (WARN_ON_ONCE(err_loop)) - err = err_loop; - } + if (WARN_ON_ONCE(err_loop)) + err = err_loop; } - break; + } break; case KBASE_MEM_TYPE_IMPORTED_UMM: { - size_t nr_phys_pages = reg->nr_pages; - size_t nr_virt_pages = reg->nr_pages; - /* If the region has import padding and falls under the threshold for + size_t nr_phys_pages = reg->nr_pages; + size_t nr_virt_pages = reg->nr_pages; + /* If the region has import padding and falls under the threshold for * issuing a partial GPU cache flush, we want to reduce the number of * physical pages that get flushed. @@ -1924,65 +510,64 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) * maps the single aliasing sink page to each of the virtual padding * pages. */ - if (reg->flags & KBASE_REG_IMPORT_PAD) - nr_phys_pages = alloc->nents + 1; + if (reg->flags & KBASE_REG_IMPORT_PAD) + nr_phys_pages = alloc->nents + 1; - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, nr_phys_pages, nr_virt_pages, - kctx->as_nr, true); - } - break; + err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, nr_phys_pages, nr_virt_pages, + kctx->as_nr); + } break; case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - size_t nr_reg_pages = kbase_reg_current_backed_size(reg); - - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, nr_reg_pages, nr_reg_pages, - kctx->as_nr, true); - } - break; - default: { - size_t nr_reg_pages = kbase_reg_current_backed_size(reg); - - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, nr_reg_pages, nr_reg_pages, - kctx->as_nr, false); - } - break; - } - - /* Update tracking, and other cleanup, depending on memory type. */ - switch (alloc->type) { - case KBASE_MEM_TYPE_ALIAS: - /* We mark the source allocs as unmapped from the GPU when - * putting reg's allocs + /* Progress through all stages to destroy the GPU mapping and release + * all resources. */ - break; - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf; - - if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { - user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT; - - /* The allocation could still have active mappings. */ - if (user_buf->current_mapping_usage_count == 0) { - kbase_jd_user_buf_unmap(kctx, alloc, reg, - (reg->flags & - (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); - } + switch (alloc->imported.user_buf.state) { + case KBASE_USER_BUF_STATE_GPU_MAPPED: { + alloc->imported.user_buf.current_mapping_usage_count = 0; + kbase_user_buf_from_gpu_mapped_to_empty(kctx, reg); + break; + } + case KBASE_USER_BUF_STATE_DMA_MAPPED: { + kbase_user_buf_from_dma_mapped_to_empty(kctx, reg); + break; + } + case KBASE_USER_BUF_STATE_PINNED: { + kbase_user_buf_from_pinned_to_empty(kctx, reg); + break; + } + case KBASE_USER_BUF_STATE_EMPTY: { + /* Nothing to do. This is a legal possibility, because an imported + * memory handle can be destroyed just after creation without being + * used. + */ + break; + } + default: { + WARN(1, "Unexpected state %d for imported user buffer\n", + alloc->imported.user_buf.state); + break; + } } - } - fallthrough; - default: - kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); break; } + default: { + size_t nr_reg_pages = kbase_reg_current_backed_size(reg); + + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, nr_reg_pages, nr_reg_pages, + kctx->as_nr); + } break; + } + + if (alloc->type != KBASE_MEM_TYPE_ALIAS) + kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); return err; } -static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( - struct kbase_context *kctx, - unsigned long uaddr, size_t size, u64 *offset) +static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping(struct kbase_context *kctx, + unsigned long uaddr, size_t size, + u64 *offset) { struct vm_area_struct *vma; struct kbase_cpu_mapping *map; @@ -1993,10 +578,10 @@ static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( lockdep_assert_held(kbase_mem_get_process_mmap_lock()); - if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ + if ((uintptr_t)uaddr + size < (uintptr_t)uaddr) /* overflow check */ return NULL; - vma = find_vma_intersection(current->mm, uaddr, uaddr+size); + vma = find_vma_intersection(current->mm, uaddr, uaddr + size); if (!vma || vma->vm_start > uaddr) return NULL; @@ -2024,9 +609,8 @@ static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( return map; } -int kbasep_find_enclosing_cpu_mapping_offset( - struct kbase_context *kctx, - unsigned long uaddr, size_t size, u64 *offset) +int kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context *kctx, unsigned long uaddr, + size_t size, u64 *offset) { struct kbase_cpu_mapping *map; @@ -2044,8 +628,8 @@ int kbasep_find_enclosing_cpu_mapping_offset( KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset); -int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx, - u64 gpu_addr, size_t size, u64 *start, u64 *offset) +int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx, u64 gpu_addr, + size_t size, u64 *start, u64 *offset) { struct kbase_va_region *region; @@ -2074,9 +658,9 @@ int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kct KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset); -void kbase_sync_single(struct kbase_context *kctx, - struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa, - off_t offset, size_t size, enum kbase_sync_type sync_fn) +void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr t_cpu_pa, + struct tagged_addr t_gpu_pa, off_t offset, size_t size, + enum kbase_sync_type sync_fn) { struct page *cpu_page; phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa); @@ -2093,11 +677,11 @@ void kbase_sync_single(struct kbase_context *kctx, dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset; if (sync_fn == KBASE_SYNC_TO_CPU) - dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, - size, DMA_BIDIRECTIONAL); + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size, + DMA_BIDIRECTIONAL); else if (sync_fn == KBASE_SYNC_TO_DEVICE) - dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, - size, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size, + DMA_BIDIRECTIONAL); } else { void *src = NULL; void *dst = NULL; @@ -2111,26 +695,26 @@ void kbase_sync_single(struct kbase_context *kctx, dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset; if (sync_fn == KBASE_SYNC_TO_DEVICE) { - src = ((unsigned char *)kmap(cpu_page)) + offset; - dst = ((unsigned char *)kmap(gpu_page)) + offset; + src = ((unsigned char *)kbase_kmap(cpu_page)) + offset; + dst = ((unsigned char *)kbase_kmap(gpu_page)) + offset; } else if (sync_fn == KBASE_SYNC_TO_CPU) { dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size, DMA_BIDIRECTIONAL); - src = ((unsigned char *)kmap(gpu_page)) + offset; - dst = ((unsigned char *)kmap(cpu_page)) + offset; + src = ((unsigned char *)kbase_kmap(gpu_page)) + offset; + dst = ((unsigned char *)kbase_kmap(cpu_page)) + offset; } memcpy(dst, src, size); - kunmap(gpu_page); - kunmap(cpu_page); + kbase_kunmap(gpu_page, src); + kbase_kunmap(cpu_page, dst); if (sync_fn == KBASE_SYNC_TO_DEVICE) dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size, DMA_BIDIRECTIONAL); } } -static int kbase_do_syncset(struct kbase_context *kctx, - struct basep_syncset *sset, enum kbase_sync_type sync_fn) +static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *sset, + enum kbase_sync_type sync_fn) { int err = 0; struct kbase_va_region *reg; @@ -2142,16 +726,17 @@ static int kbase_do_syncset(struct kbase_context *kctx, u64 page_off, page_count; u64 i; u64 offset; + size_t sz; kbase_os_mem_map_lock(kctx); kbase_gpu_vm_lock(kctx); /* find the region where the virtual address is contained */ reg = kbase_region_tracker_find_region_enclosing_address(kctx, - sset->mem_handle.basep.handle); + sset->mem_handle.basep.handle); if (kbase_is_region_invalid_or_free(reg)) { dev_warn(kctx->kbdev->dev, "Can't find a valid region at VA 0x%016llX", - sset->mem_handle.basep.handle); + sset->mem_handle.basep.handle); err = -EINVAL; goto out_unlock; } @@ -2176,7 +761,7 @@ static int kbase_do_syncset(struct kbase_context *kctx, map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset); if (!map) { dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", - start, sset->mem_handle.basep.handle); + start, sset->mem_handle.basep.handle); err = -EINVAL; goto out_unlock; } @@ -2187,39 +772,46 @@ static int kbase_do_syncset(struct kbase_context *kctx, cpu_pa = kbase_get_cpu_phy_pages(reg); gpu_pa = kbase_get_gpu_phy_pages(reg); - if (page_off > reg->nr_pages || - page_off + page_count > reg->nr_pages) { + if (page_off > reg->nr_pages || page_off + page_count > reg->nr_pages) { /* Sync overflows the region */ err = -EINVAL; goto out_unlock; } - /* Sync first page */ - if (as_phys_addr_t(cpu_pa[page_off])) { - size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); + if (page_off >= reg->gpu_alloc->nents) { + /* Start of sync range is outside the physically backed region + * so nothing to do + */ + goto out_unlock; + } - kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off], - offset, sz, sync_fn); + /* Sync first page */ + sz = MIN(((size_t)PAGE_SIZE - offset), size); + + kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off], offset, sz, sync_fn); + + /* Calculate the size for last page */ + sz = ((start + size - 1) & ~PAGE_MASK) + 1; + + /* Limit the sync range to the physically backed region */ + if (page_off + page_count > reg->gpu_alloc->nents) { + page_count = reg->gpu_alloc->nents - page_off; + /* Since we limit the pages then size for last page + * is the whole page + */ + sz = PAGE_SIZE; } /* Sync middle pages (if any) */ for (i = 1; page_count > 2 && i < page_count - 1; i++) { - /* we grow upwards, so bail on first non-present page */ - if (!as_phys_addr_t(cpu_pa[page_off + i])) - break; - - kbase_sync_single(kctx, cpu_pa[page_off + i], - gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn); + kbase_sync_single(kctx, cpu_pa[page_off + i], gpu_pa[page_off + i], 0, PAGE_SIZE, + sync_fn); } /* Sync last page (if any) */ - if (page_count > 1 && - as_phys_addr_t(cpu_pa[page_off + page_count - 1])) { - size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1; - + if (page_count > 1) { kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1], - gpu_pa[page_off + page_count - 1], 0, sz, - sync_fn); + gpu_pa[page_off + page_count - 1], 0, sz, sync_fn); } out_unlock: @@ -2236,8 +828,7 @@ int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset) KBASE_DEBUG_ASSERT(sset != NULL); if (sset->mem_handle.basep.handle & ~PAGE_MASK) { - dev_warn(kctx->kbdev->dev, - "mem_handle: passed parameter is invalid"); + dev_warn(kctx->kbdev->dev, "mem_handle: passed parameter is invalid"); return -EINVAL; } @@ -2267,12 +858,12 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(kctx != NULL); KBASE_DEBUG_ASSERT(reg != NULL); - dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", - __func__, (void *)reg, (void *)kctx); + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); if (kbase_va_region_is_no_user_free(reg)) { - dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); + dev_warn(kctx->kbdev->dev, + "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); return -EINVAL; } @@ -2302,8 +893,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re } #if MALI_USE_CSF - if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) || - ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { + if (((kbase_bits_to_zone(reg->flags)) == FIXED_VA_ZONE) || + ((kbase_bits_to_zone(reg->flags)) == EXEC_FIXED_VA_ZONE)) { if (reg->flags & KBASE_REG_FIXED_ADDRESS) atomic64_dec(&kctx->num_fixed_allocs); else @@ -2337,8 +928,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) struct kbase_va_region *reg; KBASE_DEBUG_ASSERT(kctx != NULL); - dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n", - __func__, gpu_addr, (void *)kctx); + dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n", __func__, gpu_addr, (void *)kctx); if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { dev_warn(kctx->kbdev->dev, "%s: gpu_addr parameter is invalid", __func__); @@ -2346,15 +936,15 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) } if (gpu_addr == 0) { - dev_warn(kctx->kbdev->dev, + dev_warn( + kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using %s\n", __func__); return -EINVAL; } kbase_gpu_vm_lock(kctx); - if (gpu_addr >= BASE_MEM_COOKIE_BASE && - gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { + if (gpu_addr >= BASE_MEM_COOKIE_BASE && gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); reg = kctx->pending_regions[cookie]; @@ -2375,15 +965,15 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); if (kbase_is_region_invalid_or_free(reg)) { dev_warn(kctx->kbdev->dev, "%s called with nonexistent gpu_addr 0x%llX", - __func__, gpu_addr); + __func__, gpu_addr); err = -EINVAL; goto out_unlock; } - if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { + if ((kbase_bits_to_zone(reg->flags)) == SAME_VA_ZONE) { /* SAME_VA must be freed through munmap */ dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, - gpu_addr); + gpu_addr); err = -EINVAL; goto out_unlock; } @@ -2397,8 +987,8 @@ out_unlock: KBASE_EXPORT_TEST_API(kbase_mem_free); -int kbase_update_region_flags(struct kbase_context *kctx, - struct kbase_va_region *reg, unsigned long flags) +int kbase_update_region_flags(struct kbase_context *kctx, struct kbase_va_region *reg, + unsigned long flags) { KBASE_DEBUG_ASSERT(reg != NULL); KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); @@ -2426,16 +1016,13 @@ int kbase_update_region_flags(struct kbase_context *kctx, reg->flags |= KBASE_REG_GPU_NX; if (!kbase_device_is_cpu_coherent(kctx->kbdev)) { - if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED && - !(flags & BASE_MEM_UNCACHED_GPU)) + if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED && !(flags & BASE_MEM_UNCACHED_GPU)) return -EINVAL; - } else if (flags & (BASE_MEM_COHERENT_SYSTEM | - BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { + } else if (flags & (BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { reg->flags |= KBASE_REG_SHARE_BOTH; } - if (!(reg->flags & KBASE_REG_SHARE_BOTH) && - flags & BASE_MEM_COHERENT_LOCAL) { + if (!(reg->flags & KBASE_REG_SHARE_BOTH) && flags & BASE_MEM_COHERENT_LOCAL) { reg->flags |= KBASE_REG_SHARE_IN; } @@ -2461,30 +1048,26 @@ int kbase_update_region_flags(struct kbase_context *kctx, /* Set up default MEMATTR usage */ if (!(reg->flags & KBASE_REG_GPU_CACHED)) { - if (kctx->kbdev->mmu_mode->flags & - KBASE_MMU_MODE_HAS_NON_CACHEABLE) { + if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) { /* Override shareability, and MEMATTR for uncached */ reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); - reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + reg->flags |= KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_NON_CACHEABLE); } else { dev_warn(kctx->kbdev->dev, - "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); + "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); return -EINVAL; } #if MALI_USE_CSF } else if (reg->flags & KBASE_REG_CSF_EVENT) { WARN_ON(!(reg->flags & KBASE_REG_SHARE_BOTH)); - reg->flags |= - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); + reg->flags |= KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_SHARED); #endif } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && - (reg->flags & KBASE_REG_SHARE_BOTH)) { - reg->flags |= - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); + (reg->flags & KBASE_REG_SHARE_BOTH)) { + reg->flags |= KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_DEFAULT_ACE); } else { - reg->flags |= - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); + reg->flags |= KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_DEFAULT); } if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING) @@ -2507,8 +1090,29 @@ int kbase_update_region_flags(struct kbase_context *kctx, return 0; } -int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, - size_t nr_pages_requested) +static int mem_account_inc(struct kbase_context *kctx, int nr_pages_inc) +{ + int new_page_count = atomic_add_return(nr_pages_inc, &kctx->used_pages); + + atomic_add(nr_pages_inc, &kctx->kbdev->memdev.used_pages); + kbase_process_page_usage_inc(kctx, nr_pages_inc); + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_inc); + + return new_page_count; +} + +static int mem_account_dec(struct kbase_context *kctx, int nr_pages_dec) +{ + int new_page_count = atomic_sub_return(nr_pages_dec, &kctx->used_pages); + + atomic_sub(nr_pages_dec, &kctx->kbdev->memdev.used_pages); + kbase_process_page_usage_dec(kctx, nr_pages_dec); + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_pages_dec); + + return new_page_count; +} + +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested) { int new_page_count __maybe_unused; size_t nr_left = nr_pages_requested; @@ -2516,6 +1120,12 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, struct kbase_context *kctx; struct kbase_device *kbdev; struct tagged_addr *tp; + /* The number of pages to account represents the total amount of memory + * actually allocated. If large pages are used, they are taken into account + * in full, even if only a fraction of them is used for sub-allocation + * to satisfy the memory allocation request. + */ + size_t nr_pages_to_account = 0; if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || WARN_ON(alloc->imported.native.kctx == NULL) || @@ -2534,30 +1144,29 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, if (nr_pages_requested == 0) goto done; /*nothing to do*/ - new_page_count = atomic_add_return( - nr_pages_requested, &kctx->used_pages); - atomic_add(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); - /* Increase mm counters before we allocate pages so that this - * allocation is visible to the OOM killer + * allocation is visible to the OOM killer. The actual count + * of pages will be amended later, if necessary, but for the + * moment it is safe to account for the amount initially + * requested. */ - kbase_process_page_usage_inc(kctx, nr_pages_requested); - + new_page_count = mem_account_inc(kctx, nr_pages_requested); tp = alloc->pages + alloc->nents; /* Check if we have enough pages requested so we can allocate a large * page (512 * 4KB = 2MB ) */ - if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) { - int nr_lp = nr_left / (SZ_2M / SZ_4K); + if (kbdev->pagesize_2mb && nr_left >= NUM_4K_PAGES_IN_2MB_PAGE) { + int nr_lp = nr_left / NUM_4K_PAGES_IN_2MB_PAGE; res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id], - nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task); + nr_lp * NUM_4K_PAGES_IN_2MB_PAGE, tp, true, + kctx->task); if (res > 0) { nr_left -= res; tp += res; + nr_pages_to_account += res; } if (nr_left) { @@ -2565,21 +1174,18 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, spin_lock(&kctx->mem_partials_lock); - list_for_each_entry_safe(sa, temp_sa, - &kctx->mem_partials, link) { + list_for_each_entry_safe(sa, temp_sa, &kctx->mem_partials, link) { int pidx = 0; while (nr_left) { pidx = find_next_zero_bit(sa->sub_pages, - SZ_2M / SZ_4K, - pidx); + NUM_4K_PAGES_IN_2MB_PAGE, pidx); bitmap_set(sa->sub_pages, pidx, 1); - *tp++ = as_tagged_tag(page_to_phys(sa->page + - pidx), + *tp++ = as_tagged_tag(page_to_phys(sa->page + pidx), FROM_PARTIAL); nr_left--; - if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) { + if (bitmap_full(sa->sub_pages, NUM_4K_PAGES_IN_2MB_PAGE)) { /* unlink from partial list when full */ list_del_init(&sa->link); break; @@ -2592,47 +1198,41 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, /* only if we actually have a chunk left <512. If more it indicates * that we couldn't allocate a 2MB above, so no point to retry here. */ - if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { + if (nr_left > 0 && nr_left < NUM_4K_PAGES_IN_2MB_PAGE) { /* create a new partial and suballocate the rest from it */ struct page *np = NULL; do { int err; - np = kbase_mem_pool_alloc( - &kctx->mem_pools.large[ - alloc->group_id]); + np = kbase_mem_pool_alloc(&kctx->mem_pools.large[alloc->group_id]); if (np) break; - err = kbase_mem_pool_grow( - &kctx->mem_pools.large[alloc->group_id], - 1, kctx->task); + err = kbase_mem_pool_grow(&kctx->mem_pools.large[alloc->group_id], + 1, kctx->task); if (err) break; } while (1); if (np) { - int i; + size_t i; struct kbase_sub_alloc *sa; struct page *p; sa = kmalloc(sizeof(*sa), GFP_KERNEL); if (!sa) { - kbase_mem_pool_free( - &kctx->mem_pools.large[ - alloc->group_id], - np, - false); + kbase_mem_pool_free(&kctx->mem_pools.large[alloc->group_id], + np, false); goto no_new_partial; } /* store pointers back to the control struct */ np->lru.next = (void *)sa; - for (p = np; p < np + SZ_2M / SZ_4K; p++) + for (p = np; p < np + NUM_4K_PAGES_IN_2MB_PAGE; p++) p->lru.prev = (void *)np; INIT_LIST_HEAD(&sa->link); - bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); + bitmap_zero(sa->sub_pages, NUM_4K_PAGES_IN_2MB_PAGE); sa->page = np; for (i = 0; i < nr_left; i++) @@ -2641,6 +1241,12 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, bitmap_set(sa->sub_pages, 0, nr_left); nr_left = 0; + /* A large page has been used for a sub-allocation: account + * for the whole of the large page, and not just for the + * sub-pages that have been used. + */ + nr_pages_to_account += NUM_4K_PAGES_IN_2MB_PAGE; + /* expose for later use */ spin_lock(&kctx->mem_partials_lock); list_add(&sa->link, &kctx->mem_partials); @@ -2655,48 +1261,84 @@ no_new_partial: tp, false, kctx->task); if (res <= 0) goto alloc_failed; - } - KBASE_TLSTREAM_AUX_PAGESALLOC( - kbdev, - kctx->id, - (u64)new_page_count); + nr_pages_to_account += res; + } alloc->nents += nr_pages_requested; - kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + /* Amend the page count with the number of pages actually used. */ + if (nr_pages_to_account > nr_pages_requested) + new_page_count = mem_account_inc(kctx, nr_pages_to_account - nr_pages_requested); + else if (nr_pages_to_account < nr_pages_requested) + new_page_count = mem_account_dec(kctx, nr_pages_requested - nr_pages_to_account); + + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, kctx->id, (u64)new_page_count); done: return 0; alloc_failed: - /* rollback needed if got one or more 2MB but failed later */ + /* The first step of error recovery is freeing any allocation that + * might have succeeded. The function can be in this condition only + * in one case: it tried to allocate a combination of 2 MB and 4 kB + * pages but only the former step succeeded. In this case, calculate + * the number of 2 MB pages to release and free them. + */ if (nr_left != nr_pages_requested) { size_t nr_pages_to_free = nr_pages_requested - nr_left; alloc->nents += nr_pages_to_free; - - kbase_process_page_usage_inc(kctx, nr_pages_to_free); - atomic_add(nr_pages_to_free, &kctx->used_pages); - atomic_add(nr_pages_to_free, - &kctx->kbdev->memdev.used_pages); - kbase_free_phy_pages_helper(alloc, nr_pages_to_free); } - kbase_process_page_usage_dec(kctx, nr_pages_requested); - atomic_sub(nr_pages_requested, &kctx->used_pages); - atomic_sub(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); + /* Undo the preliminary memory accounting that was done early on + * in the function. If only 4 kB pages are used: nr_left is equal + * to nr_pages_requested. If a combination of 2 MB and 4 kB was + * attempted: nr_pages_requested is equal to the sum of nr_left + * and nr_pages_to_free, and the latter has already been freed above. + * + * Also notice that there's no need to update the page count + * because memory allocation was rolled back. + */ + mem_account_dec(kctx, nr_left); invalid_request: return -ENOMEM; } -struct tagged_addr *kbase_alloc_phy_pages_helper_locked( - struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, - size_t nr_pages_requested, - struct kbase_sub_alloc **prealloc_sa) +static size_t free_partial_locked(struct kbase_context *kctx, struct kbase_mem_pool *pool, + struct tagged_addr tp) +{ + struct page *p, *head_page; + struct kbase_sub_alloc *sa; + size_t nr_pages_to_account = 0; + + lockdep_assert_held(&pool->pool_lock); + lockdep_assert_held(&kctx->mem_partials_lock); + + p = as_page(tp); + head_page = (struct page *)p->lru.prev; + sa = (struct kbase_sub_alloc *)head_page->lru.next; + clear_bit(p - head_page, sa->sub_pages); + if (bitmap_empty(sa->sub_pages, NUM_4K_PAGES_IN_2MB_PAGE)) { + list_del(&sa->link); + kbase_mem_pool_free_locked(pool, head_page, true); + kfree(sa); + nr_pages_to_account = NUM_4K_PAGES_IN_2MB_PAGE; + } else if (bitmap_weight(sa->sub_pages, NUM_4K_PAGES_IN_2MB_PAGE) == + NUM_4K_PAGES_IN_2MB_PAGE - 1) { + /* expose the partial again */ + list_add(&sa->link, &kctx->mem_partials); + } + + return nr_pages_to_account; +} + +struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, + struct kbase_mem_pool *pool, + size_t nr_pages_requested, + struct kbase_sub_alloc **prealloc_sa) { int new_page_count __maybe_unused; size_t nr_left = nr_pages_requested; @@ -2705,6 +1347,12 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( struct kbase_device *kbdev; struct tagged_addr *tp; struct tagged_addr *new_pages = NULL; + /* The number of pages to account represents the total amount of memory + * actually allocated. If large pages are used, they are taken into account + * in full, even if only a fraction of them is used for sub-allocation + * to satisfy the memory allocation request. + */ + size_t nr_pages_to_account = 0; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); @@ -2727,50 +1375,42 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( if (nr_pages_requested == 0) goto done; /*nothing to do*/ - new_page_count = atomic_add_return( - nr_pages_requested, &kctx->used_pages); - atomic_add(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); - /* Increase mm counters before we allocate pages so that this - * allocation is visible to the OOM killer + * allocation is visible to the OOM killer. The actual count + * of pages will be amended later, if necessary, but for the + * moment it is safe to account for the amount initially + * requested. */ - kbase_process_page_usage_inc(kctx, nr_pages_requested); - + new_page_count = mem_account_inc(kctx, nr_pages_requested); tp = alloc->pages + alloc->nents; new_pages = tp; if (kbdev->pagesize_2mb && pool->order) { - int nr_lp = nr_left / (SZ_2M / SZ_4K); + int nr_lp = nr_left / NUM_4K_PAGES_IN_2MB_PAGE; - res = kbase_mem_pool_alloc_pages_locked(pool, - nr_lp * (SZ_2M / SZ_4K), - tp); + res = kbase_mem_pool_alloc_pages_locked(pool, nr_lp * NUM_4K_PAGES_IN_2MB_PAGE, tp); if (res > 0) { nr_left -= res; tp += res; + nr_pages_to_account += res; } if (nr_left) { struct kbase_sub_alloc *sa, *temp_sa; - list_for_each_entry_safe(sa, temp_sa, - &kctx->mem_partials, link) { + list_for_each_entry_safe(sa, temp_sa, &kctx->mem_partials, link) { int pidx = 0; while (nr_left) { pidx = find_next_zero_bit(sa->sub_pages, - SZ_2M / SZ_4K, - pidx); + NUM_4K_PAGES_IN_2MB_PAGE, pidx); bitmap_set(sa->sub_pages, pidx, 1); - *tp++ = as_tagged_tag(page_to_phys( - sa->page + pidx), - FROM_PARTIAL); + *tp++ = as_tagged_tag(page_to_phys(sa->page + pidx), + FROM_PARTIAL); nr_left--; - if (bitmap_full(sa->sub_pages, - SZ_2M / SZ_4K)) { + if (bitmap_full(sa->sub_pages, NUM_4K_PAGES_IN_2MB_PAGE)) { /* unlink from partial list when * full */ @@ -2785,7 +1425,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( * indicates that we couldn't allocate a 2MB above, so no point * to retry here. */ - if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { + if (nr_left > 0 && nr_left < NUM_4K_PAGES_IN_2MB_PAGE) { /* create a new partial and suballocate the rest from it */ struct page *np = NULL; @@ -2793,25 +1433,30 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( np = kbase_mem_pool_alloc_locked(pool); if (np) { - int i; + size_t i; struct kbase_sub_alloc *const sa = *prealloc_sa; struct page *p; /* store pointers back to the control struct */ np->lru.next = (void *)sa; - for (p = np; p < np + SZ_2M / SZ_4K; p++) + for (p = np; p < np + NUM_4K_PAGES_IN_2MB_PAGE; p++) p->lru.prev = (void *)np; INIT_LIST_HEAD(&sa->link); - bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); + bitmap_zero(sa->sub_pages, NUM_4K_PAGES_IN_2MB_PAGE); sa->page = np; for (i = 0; i < nr_left; i++) - *tp++ = as_tagged_tag( - page_to_phys(np + i), - FROM_PARTIAL); + *tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL); bitmap_set(sa->sub_pages, 0, nr_left); nr_left = 0; + + /* A large page has been used for sub-allocation: account + * for the whole of the large page, and not just for the + * sub-pages that have been used. + */ + nr_pages_to_account += NUM_4K_PAGES_IN_2MB_PAGE; + /* Indicate to user that we'll free this memory * later. */ @@ -2824,27 +1469,32 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( if (nr_left) goto alloc_failed; } else { - res = kbase_mem_pool_alloc_pages_locked(pool, - nr_left, - tp); + res = kbase_mem_pool_alloc_pages_locked(pool, nr_left, tp); if (res <= 0) goto alloc_failed; + nr_pages_to_account += res; } - KBASE_TLSTREAM_AUX_PAGESALLOC( - kbdev, - kctx->id, - (u64)new_page_count); + /* Amend the page count with the number of pages actually used. */ + if (nr_pages_to_account > nr_pages_requested) + new_page_count = mem_account_inc(kctx, nr_pages_to_account - nr_pages_requested); + else if (nr_pages_to_account < nr_pages_requested) + new_page_count = mem_account_dec(kctx, nr_pages_requested - nr_pages_to_account); + + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, kctx->id, (u64)new_page_count); alloc->nents += nr_pages_requested; - kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); - done: return new_pages; alloc_failed: - /* rollback needed if got one or more 2MB but failed later */ + /* The first step of error recovery is freeing any allocation that + * might have succeeded. The function can be in this condition only + * in one case: it tried to allocate a combination of 2 MB and 4 kB + * pages but only the former step succeeded. In this case, calculate + * the number of 2 MB pages to release and free them. + */ if (nr_left != nr_pages_requested) { size_t nr_pages_to_free = nr_pages_requested - nr_left; @@ -2853,66 +1503,64 @@ alloc_failed: if (kbdev->pagesize_2mb && pool->order) { while (nr_pages_to_free) { if (is_huge_head(*start_free)) { - kbase_mem_pool_free_pages_locked( - pool, 512, - start_free, - false, /* not dirty */ - true); /* return to pool */ - nr_pages_to_free -= 512; - start_free += 512; + kbase_mem_pool_free_pages_locked(pool, + NUM_4K_PAGES_IN_2MB_PAGE, + start_free, + false, /* not dirty */ + true); /* return to pool */ + nr_pages_to_free -= NUM_4K_PAGES_IN_2MB_PAGE; + start_free += NUM_4K_PAGES_IN_2MB_PAGE; } else if (is_partial(*start_free)) { - free_partial_locked(kctx, pool, - *start_free); + free_partial_locked(kctx, pool, *start_free); nr_pages_to_free--; start_free++; } } } else { - kbase_mem_pool_free_pages_locked(pool, - nr_pages_to_free, - start_free, - false, /* not dirty */ - true); /* return to pool */ + kbase_mem_pool_free_pages_locked(pool, nr_pages_to_free, start_free, + false, /* not dirty */ + true); /* return to pool */ } } - kbase_process_page_usage_dec(kctx, nr_pages_requested); - atomic_sub(nr_pages_requested, &kctx->used_pages); - atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages); + /* Undo the preliminary memory accounting that was done early on + * in the function. The code above doesn't undo memory accounting + * so this is the only point where the function has to undo all + * of the pages accounted for at the top of the function. + */ + mem_account_dec(kctx, nr_pages_requested); invalid_request: return NULL; } -static void free_partial(struct kbase_context *kctx, int group_id, struct - tagged_addr tp) +static size_t free_partial(struct kbase_context *kctx, int group_id, struct tagged_addr tp) { struct page *p, *head_page; struct kbase_sub_alloc *sa; + size_t nr_pages_to_account = 0; p = as_page(tp); head_page = (struct page *)p->lru.prev; sa = (struct kbase_sub_alloc *)head_page->lru.next; spin_lock(&kctx->mem_partials_lock); clear_bit(p - head_page, sa->sub_pages); - if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { + if (bitmap_empty(sa->sub_pages, NUM_4K_PAGES_IN_2MB_PAGE)) { list_del(&sa->link); - kbase_mem_pool_free( - &kctx->mem_pools.large[group_id], - head_page, - true); + kbase_mem_pool_free(&kctx->mem_pools.large[group_id], head_page, true); kfree(sa); - } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == - SZ_2M / SZ_4K - 1) { + nr_pages_to_account = NUM_4K_PAGES_IN_2MB_PAGE; + } else if (bitmap_weight(sa->sub_pages, NUM_4K_PAGES_IN_2MB_PAGE) == + NUM_4K_PAGES_IN_2MB_PAGE - 1) { /* expose the partial again */ list_add(&sa->link, &kctx->mem_partials); } spin_unlock(&kctx->mem_partials_lock); + + return nr_pages_to_account; } -int kbase_free_phy_pages_helper( - struct kbase_mem_phy_alloc *alloc, - size_t nr_pages_to_free) +int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free) { struct kbase_context *kctx = alloc->imported.native.kctx; struct kbase_device *kbdev = kctx->kbdev; @@ -2921,6 +1569,12 @@ int kbase_free_phy_pages_helper( struct tagged_addr *start_free; int new_page_count __maybe_unused; size_t freed = 0; + /* The number of pages to account represents the total amount of memory + * actually freed. If large pages are used, they are taken into account + * in full, even if only a fraction of them is used for sub-allocation + * to satisfy the memory allocation request. + */ + size_t nr_pages_to_account = 0; if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || WARN_ON(alloc->imported.native.kctx == NULL) || @@ -2938,8 +1592,7 @@ int kbase_free_phy_pages_helper( syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; /* pad start_free to a valid start location */ - while (nr_pages_to_free && is_huge(*start_free) && - !is_huge_head(*start_free)) { + while (nr_pages_to_free && is_huge(*start_free) && !is_huge_head(*start_free)) { nr_pages_to_free--; start_free++; } @@ -2949,17 +1602,15 @@ int kbase_free_phy_pages_helper( /* This is a 2MB entry, so free all the 512 pages that * it points to */ - kbase_mem_pool_free_pages( - &kctx->mem_pools.large[alloc->group_id], - 512, - start_free, - syncback, - reclaimed); - nr_pages_to_free -= 512; - start_free += 512; - freed += 512; + kbase_mem_pool_free_pages(&kctx->mem_pools.large[alloc->group_id], + NUM_4K_PAGES_IN_2MB_PAGE, start_free, syncback, + reclaimed); + nr_pages_to_free -= NUM_4K_PAGES_IN_2MB_PAGE; + start_free += NUM_4K_PAGES_IN_2MB_PAGE; + freed += NUM_4K_PAGES_IN_2MB_PAGE; + nr_pages_to_account += NUM_4K_PAGES_IN_2MB_PAGE; } else if (is_partial(*start_free)) { - free_partial(kctx, alloc->group_id, *start_free); + nr_pages_to_account += free_partial(kctx, alloc->group_id, *start_free); nr_pages_to_free--; start_free++; freed++; @@ -2967,81 +1618,62 @@ int kbase_free_phy_pages_helper( struct tagged_addr *local_end_free; local_end_free = start_free; - while (nr_pages_to_free && - !is_huge(*local_end_free) && - !is_partial(*local_end_free)) { + while (nr_pages_to_free && !is_huge(*local_end_free) && + !is_partial(*local_end_free)) { local_end_free++; nr_pages_to_free--; } - kbase_mem_pool_free_pages( - &kctx->mem_pools.small[alloc->group_id], - local_end_free - start_free, - start_free, - syncback, - reclaimed); + kbase_mem_pool_free_pages(&kctx->mem_pools.small[alloc->group_id], + local_end_free - start_free, start_free, syncback, + reclaimed); freed += local_end_free - start_free; start_free += local_end_free - start_free; + nr_pages_to_account += freed; } } alloc->nents -= freed; - /* - * If the allocation was not evicted (i.e. evicted == 0) then - * the page accounting needs to be done. - */ if (!reclaimed) { - kbase_process_page_usage_dec(kctx, freed); - new_page_count = atomic_sub_return(freed, - &kctx->used_pages); - atomic_sub(freed, - &kctx->kbdev->memdev.used_pages); - - KBASE_TLSTREAM_AUX_PAGESALLOC( - kbdev, - kctx->id, - (u64)new_page_count); - - kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); + /* If the allocation was not reclaimed then all freed pages + * need to be accounted. + */ + new_page_count = mem_account_dec(kctx, nr_pages_to_account); + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, kctx->id, (u64)new_page_count); + } else if (freed != nr_pages_to_account) { + /* If the allocation was reclaimed then alloc->nents pages + * have already been accounted for. + * + * Only update the number of pages to account if there is + * a discrepancy to correct, due to the fact that large pages + * were partially allocated at the origin. + */ + if (freed > nr_pages_to_account) + new_page_count = mem_account_inc(kctx, freed - nr_pages_to_account); + else + new_page_count = mem_account_dec(kctx, nr_pages_to_account - freed); + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, kctx->id, (u64)new_page_count); } return 0; } -static void free_partial_locked(struct kbase_context *kctx, - struct kbase_mem_pool *pool, struct tagged_addr tp) -{ - struct page *p, *head_page; - struct kbase_sub_alloc *sa; - - lockdep_assert_held(&pool->pool_lock); - lockdep_assert_held(&kctx->mem_partials_lock); - - p = as_page(tp); - head_page = (struct page *)p->lru.prev; - sa = (struct kbase_sub_alloc *)head_page->lru.next; - clear_bit(p - head_page, sa->sub_pages); - if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { - list_del(&sa->link); - kbase_mem_pool_free_locked(pool, head_page, true); - kfree(sa); - } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == - SZ_2M / SZ_4K - 1) { - /* expose the partial again */ - list_add(&sa->link, &kctx->mem_partials); - } -} - void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, - struct kbase_mem_pool *pool, struct tagged_addr *pages, - size_t nr_pages_to_free) + struct kbase_mem_pool *pool, struct tagged_addr *pages, + size_t nr_pages_to_free) { struct kbase_context *kctx = alloc->imported.native.kctx; struct kbase_device *kbdev = kctx->kbdev; bool syncback; - bool reclaimed = (alloc->evicted != 0); struct tagged_addr *start_free; size_t freed = 0; + /* The number of pages to account represents the total amount of memory + * actually freed. If large pages are used, they are taken into account + * in full, even if only a fraction of them is used for sub-allocation + * to satisfy the memory allocation request. + */ + size_t nr_pages_to_account = 0; + int new_page_count; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); @@ -3050,6 +1682,12 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, lockdep_assert_held(&pool->pool_lock); lockdep_assert_held(&kctx->mem_partials_lock); + /* early out if state is inconsistent. */ + if (alloc->evicted) { + dev_err(kbdev->dev, "%s unexpectedly called for evicted region", __func__); + return; + } + /* early out if nothing to do */ if (!nr_pages_to_free) return; @@ -3059,8 +1697,7 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; /* pad start_free to a valid start location */ - while (nr_pages_to_free && is_huge(*start_free) && - !is_huge_head(*start_free)) { + while (nr_pages_to_free && is_huge(*start_free) && !is_huge_head(*start_free)) { nr_pages_to_free--; start_free++; } @@ -3071,17 +1708,15 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, * it points to */ WARN_ON(!pool->order); - kbase_mem_pool_free_pages_locked(pool, - 512, - start_free, - syncback, - reclaimed); - nr_pages_to_free -= 512; - start_free += 512; - freed += 512; + kbase_mem_pool_free_pages_locked(pool, NUM_4K_PAGES_IN_2MB_PAGE, start_free, + syncback, false); + nr_pages_to_free -= NUM_4K_PAGES_IN_2MB_PAGE; + start_free += NUM_4K_PAGES_IN_2MB_PAGE; + freed += NUM_4K_PAGES_IN_2MB_PAGE; + nr_pages_to_account += NUM_4K_PAGES_IN_2MB_PAGE; } else if (is_partial(*start_free)) { WARN_ON(!pool->order); - free_partial_locked(kctx, pool, *start_free); + nr_pages_to_account += free_partial_locked(kctx, pool, *start_free); nr_pages_to_free--; start_free++; freed++; @@ -3090,62 +1725,26 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, WARN_ON(pool->order); local_end_free = start_free; - while (nr_pages_to_free && - !is_huge(*local_end_free) && + while (nr_pages_to_free && !is_huge(*local_end_free) && !is_partial(*local_end_free)) { local_end_free++; nr_pages_to_free--; } - kbase_mem_pool_free_pages_locked(pool, - local_end_free - start_free, - start_free, - syncback, - reclaimed); + kbase_mem_pool_free_pages_locked(pool, local_end_free - start_free, + start_free, syncback, false); freed += local_end_free - start_free; start_free += local_end_free - start_free; + nr_pages_to_account += freed; } } alloc->nents -= freed; - /* - * If the allocation was not evicted (i.e. evicted == 0) then - * the page accounting needs to be done. - */ - if (!reclaimed) { - int new_page_count; - - kbase_process_page_usage_dec(kctx, freed); - new_page_count = atomic_sub_return(freed, - &kctx->used_pages); - atomic_sub(freed, - &kctx->kbdev->memdev.used_pages); - - KBASE_TLSTREAM_AUX_PAGESALLOC( - kbdev, - kctx->id, - (u64)new_page_count); - - kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); - } + new_page_count = mem_account_dec(kctx, nr_pages_to_account); + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, kctx->id, (u64)new_page_count); } KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); -#if MALI_USE_CSF -/** - * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. - * @alloc: The allocation for the imported user buffer. - * - * This must only be called when terminating an alloc, when its refcount - * (number of users) has become 0. This also ensures it is only called once all - * CPU mappings have been closed. - * - * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active - * allocations - */ -static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); -#endif - void kbase_mem_kref_free(struct kref *kref) { struct kbase_mem_phy_alloc *alloc; @@ -3154,26 +1753,20 @@ void kbase_mem_kref_free(struct kref *kref) switch (alloc->type) { case KBASE_MEM_TYPE_NATIVE: { - if (!WARN_ON(!alloc->imported.native.kctx)) { if (alloc->permanent_map) - kbase_phy_alloc_mapping_term( - alloc->imported.native.kctx, - alloc); + kbase_phy_alloc_mapping_term(alloc->imported.native.kctx, alloc); /* * The physical allocation must have been removed from * the eviction list before trying to free it. */ - mutex_lock( - &alloc->imported.native.kctx->jit_evict_lock); + mutex_lock(&alloc->imported.native.kctx->jit_evict_lock); WARN_ON(!list_empty(&alloc->evict_node)); - mutex_unlock( - &alloc->imported.native.kctx->jit_evict_lock); + mutex_unlock(&alloc->imported.native.kctx->jit_evict_lock); - kbase_process_page_usage_dec( - alloc->imported.native.kctx, - alloc->imported.native.nr_struct_pages); + kbase_process_page_usage_dec(alloc->imported.native.kctx, + alloc->imported.native.nr_struct_pages); } kbase_free_phy_pages_helper(alloc, alloc->nents); break; @@ -3200,23 +1793,44 @@ void kbase_mem_kref_free(struct kref *kref) case KBASE_MEM_TYPE_IMPORTED_UMM: if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { WARN_ONCE(alloc->imported.umm.current_mapping_usage_count != 1, - "WARNING: expected excatly 1 mapping, got %d", - alloc->imported.umm.current_mapping_usage_count); - dma_buf_unmap_attachment( - alloc->imported.umm.dma_attachment, - alloc->imported.umm.sgt, - DMA_BIDIRECTIONAL); - kbase_remove_dma_buf_usage(alloc->imported.umm.kctx, - alloc); + "WARNING: expected exactly 1 mapping, got %d", + alloc->imported.umm.current_mapping_usage_count); + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + kbase_remove_dma_buf_usage(alloc->imported.umm.kctx, alloc); } - dma_buf_detach(alloc->imported.umm.dma_buf, - alloc->imported.umm.dma_attachment); + dma_buf_detach(alloc->imported.umm.dma_buf, alloc->imported.umm.dma_attachment); dma_buf_put(alloc->imported.umm.dma_buf); break; case KBASE_MEM_TYPE_IMPORTED_USER_BUF: -#if MALI_USE_CSF - kbase_jd_user_buf_unpin_pages(alloc); -#endif + switch (alloc->imported.user_buf.state) { + case KBASE_USER_BUF_STATE_PINNED: + case KBASE_USER_BUF_STATE_DMA_MAPPED: + case KBASE_USER_BUF_STATE_GPU_MAPPED: { + /* It's too late to undo all of the operations that might have been + * done on an imported USER_BUFFER handle, as references have been + * lost already. + * + * The only thing that can be done safely and that is crucial for + * the rest of the system is releasing the physical pages that have + * been pinned and that are still referenced by the physical + * allocationl. + */ + kbase_user_buf_unpin_pages(alloc); + alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_EMPTY; + break; + } + case KBASE_USER_BUF_STATE_EMPTY: { + /* Nothing to do. */ + break; + } + default: { + WARN(1, "Unexpected free of type %d state %d\n", alloc->type, + alloc->imported.user_buf.state); + break; + } + } + if (alloc->imported.user_buf.mm) mmdrop(alloc->imported.user_buf.mm); if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) @@ -3225,7 +1839,7 @@ void kbase_mem_kref_free(struct kref *kref) kfree(alloc->imported.user_buf.pages); break; default: - WARN(1, "Unexecpted free of type %d\n", alloc->type); + WARN(1, "Unexpected free of type %d\n", alloc->type); break; } @@ -3250,7 +1864,7 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size /* Prevent vsize*sizeof from wrapping around. * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail. */ - if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) + if ((size_t)vsize > ((size_t)-1 / sizeof(*reg->cpu_alloc->pages))) goto out_term; KBASE_DEBUG_ASSERT(vsize != 0); @@ -3320,30 +1934,27 @@ bool kbase_check_alloc_flags(unsigned long flags) * - Be written by the GPU * - Be grown on GPU page fault */ - if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & - (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) + if ((flags & BASE_MEM_PROT_GPU_EX) && + (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) return false; #if !MALI_USE_CSF /* GPU executable memory also cannot have the top of its initial * commit aligned to 'extension' */ - if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & - BASE_MEM_TILER_ALIGN_TOP)) + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & BASE_MEM_TILER_ALIGN_TOP)) return false; #endif /* !MALI_USE_CSF */ /* To have an allocation lie within a 4GB chunk is required only for * TLS memory, which will never be used to contain executable code. */ - if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & - BASE_MEM_PROT_GPU_EX)) + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & BASE_MEM_PROT_GPU_EX)) return false; #if !MALI_USE_CSF /* TLS memory should also not be used for tiler heap */ - if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & - BASE_MEM_TILER_ALIGN_TOP)) + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & BASE_MEM_TILER_ALIGN_TOP)) return false; #endif /* !MALI_USE_CSF */ @@ -3359,15 +1970,14 @@ bool kbase_check_alloc_flags(unsigned long flags) /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported memory */ - if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) == - BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) == BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) return false; /* Should not combine BASE_MEM_COHERENT_LOCAL with * BASE_MEM_COHERENT_SYSTEM */ if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == - (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) + (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) return false; #if MALI_USE_CSF @@ -3422,11 +2032,11 @@ bool kbase_check_import_flags(unsigned long flags) return true; } -int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, - u64 va_pages, u64 commit_pages, u64 large_extension) +int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, u64 va_pages, + u64 commit_pages, u64 large_extension) { struct device *dev = kctx->kbdev->dev; - int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + int gpu_pc_bits = kctx->kbdev->gpu_props.log2_program_counter_size; u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT; struct kbase_va_region test_reg; @@ -3442,7 +2052,7 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", - (unsigned long long)va_pages); + (unsigned long long)va_pages); return -ENOMEM; } @@ -3452,23 +2062,22 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, /* Limit GPU executable allocs to GPU PC size */ if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) { - dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld", - (unsigned long long)va_pages, - (unsigned long long)gpu_pc_pages_max); + dev_warn(dev, + KBASE_MSG_PRE + "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld", + (unsigned long long)va_pages, (unsigned long long)gpu_pc_pages_max); return -EINVAL; } if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extension == 0)) { - dev_warn(dev, KBASE_MSG_PRE - "BASE_MEM_GROW_ON_GPF but extension == 0\n"); + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF but extension == 0\n"); return -EINVAL; } #if !MALI_USE_CSF if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extension == 0)) { - dev_warn(dev, KBASE_MSG_PRE - "BASE_MEM_TILER_ALIGN_TOP but extension == 0\n"); + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP but extension == 0\n"); return -EINVAL; } @@ -3481,8 +2090,7 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, } #else if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extension != 0) { - dev_warn(dev, KBASE_MSG_PRE - "BASE_MEM_GROW_ON_GPF not set but extension != 0\n"); + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF not set but extension != 0\n"); return -EINVAL; } #endif /* !MALI_USE_CSF */ @@ -3493,11 +2101,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, #define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and " unsigned long small_extension; - if (large_extension > - BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES) { - dev_warn(dev, - KBASE_MSG_PRE_FLAG - "extension==%lld pages exceeds limit %lld", + if (large_extension > BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES) { + dev_warn(dev, KBASE_MSG_PRE_FLAG "extension==%lld pages exceeds limit %lld", (unsigned long long)large_extension, BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES); return -EINVAL; @@ -3508,29 +2113,28 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, small_extension = (unsigned long)large_extension; if (!is_power_of_2(small_extension)) { - dev_warn(dev, - KBASE_MSG_PRE_FLAG - "extension==%ld not a non-zero power of 2", + dev_warn(dev, KBASE_MSG_PRE_FLAG "extension==%ld not a non-zero power of 2", small_extension); return -EINVAL; } if (commit_pages > large_extension) { - dev_warn(dev, - KBASE_MSG_PRE_FLAG - "commit_pages==%ld exceeds extension==%ld", - (unsigned long)commit_pages, - (unsigned long)large_extension); + dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extension==%ld", + (unsigned long)commit_pages, (unsigned long)large_extension); return -EINVAL; } #undef KBASE_MSG_PRE_FLAG } +#else + CSTD_UNUSED(commit_pages); #endif /* !MALI_USE_CSF */ - if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && - (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { - dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space", - (unsigned long long)va_pages); + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { + dev_warn( + dev, + KBASE_MSG_PRE + "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space", + (unsigned long long)va_pages); return -EINVAL; } @@ -3565,8 +2169,8 @@ struct kbase_jit_debugfs_data { char buffer[50]; }; -static int kbase_jit_debugfs_common_open(struct inode *inode, - struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) +static int kbase_jit_debugfs_common_open(struct inode *inode, struct file *file, + int (*func)(struct kbase_jit_debugfs_data *)) { struct kbase_jit_debugfs_data *data; @@ -3576,21 +2180,21 @@ static int kbase_jit_debugfs_common_open(struct inode *inode, data->func = func; mutex_init(&data->lock); - data->kctx = (struct kbase_context *) inode->i_private; + data->kctx = (struct kbase_context *)inode->i_private; file->private_data = data; return nonseekable_open(inode, file); } -static ssize_t kbase_jit_debugfs_common_read(struct file *file, - char __user *buf, size_t len, loff_t *ppos) +static ssize_t kbase_jit_debugfs_common_read(struct file *file, char __user *buf, size_t len, + loff_t *ppos) { struct kbase_jit_debugfs_data *data; size_t size; int ret; - data = (struct kbase_jit_debugfs_data *) file->private_data; + data = (struct kbase_jit_debugfs_data *)file->private_data; mutex_lock(&data->lock); if (*ppos) { @@ -3606,9 +2210,8 @@ static ssize_t kbase_jit_debugfs_common_read(struct file *file, goto out_unlock; } - size = scnprintf(data->buffer, sizeof(data->buffer), - "%llu,%llu,%llu\n", data->active_value, - data->pool_value, data->destroy_value); + size = scnprintf(data->buffer, sizeof(data->buffer), "%llu,%llu,%llu\n", + data->active_value, data->pool_value, data->destroy_value); } ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); @@ -3618,26 +2221,27 @@ out_unlock: return ret; } -static int kbase_jit_debugfs_common_release(struct inode *inode, - struct file *file) +static int kbase_jit_debugfs_common_release(struct inode *inode, struct file *file) { + CSTD_UNUSED(inode); + kfree(file->private_data); return 0; } -#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ -static int __fops ## _open(struct inode *inode, struct file *file) \ -{ \ - return kbase_jit_debugfs_common_open(inode, file, __func); \ -} \ -static const struct file_operations __fops = { \ - .owner = THIS_MODULE, \ - .open = __fops ## _open, \ - .release = kbase_jit_debugfs_common_release, \ - .read = kbase_jit_debugfs_common_read, \ - .write = NULL, \ - .llseek = generic_file_llseek, \ -} +#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ + static int __fops##_open(struct inode *inode, struct file *file) \ + { \ + return kbase_jit_debugfs_common_open(inode, file, __func); \ + } \ + static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops##_open, \ + .release = kbase_jit_debugfs_common_release, \ + .read = kbase_jit_debugfs_common_read, \ + .write = NULL, \ + .llseek = generic_file_llseek, \ + } static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) { @@ -3660,8 +2264,7 @@ static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) return 0; } -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, - kbase_jit_debugfs_count_get); +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, kbase_jit_debugfs_count_get); static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) { @@ -3684,8 +2287,7 @@ static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) return 0; } -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, - kbase_jit_debugfs_vm_get); +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, kbase_jit_debugfs_vm_get); static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) { @@ -3708,8 +2310,7 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) return 0; } -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, - kbase_jit_debugfs_phys_get); +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, kbase_jit_debugfs_phys_get); #if MALI_JIT_PRESSURE_LIMIT_BASE static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) @@ -3732,12 +2333,11 @@ static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) return 0; } -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_used_fops, - kbase_jit_debugfs_used_get); +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_used_fops, kbase_jit_debugfs_used_get); static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, - struct kbase_va_region *reg, size_t pages_needed, - size_t *freed, bool shrink); + struct kbase_va_region *reg, size_t pages_needed, + size_t *freed, bool shrink); static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) { @@ -3753,8 +2353,7 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) int err; size_t freed = 0u; - err = kbase_mem_jit_trim_pages_from_region(kctx, reg, - SIZE_MAX, &freed, false); + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, SIZE_MAX, &freed, false); if (err) { /* Failed to calculate, try the next region */ @@ -3772,8 +2371,7 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) return 0; } -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, - kbase_jit_debugfs_trim_get); +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, kbase_jit_debugfs_trim_get); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ void kbase_jit_debugfs_init(struct kbase_context *kctx) @@ -3786,44 +2384,41 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) /* Caller already ensures this, but we keep the pattern for * maintenance safety. */ - if (WARN_ON(!kctx) || - WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) return; - - /* Debugfs entry for getting the number of JIT allocations. */ - debugfs_create_file("mem_jit_count", mode, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_count_fops); + debugfs_create_file("mem_jit_count", mode, kctx->kctx_dentry, kctx, + &kbase_jit_debugfs_count_fops); /* * Debugfs entry for getting the total number of virtual pages * used by JIT allocations. */ - debugfs_create_file("mem_jit_vm", mode, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_vm_fops); + debugfs_create_file("mem_jit_vm", mode, kctx->kctx_dentry, kctx, + &kbase_jit_debugfs_vm_fops); /* * Debugfs entry for getting the number of physical pages used * by JIT allocations. */ - debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_phys_fops); + debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, kctx, + &kbase_jit_debugfs_phys_fops); #if MALI_JIT_PRESSURE_LIMIT_BASE /* * Debugfs entry for getting the number of pages used * by JIT allocations for estimating the physical pressure * limit. */ - debugfs_create_file("mem_jit_used", mode, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_used_fops); + debugfs_create_file("mem_jit_used", mode, kctx->kctx_dentry, kctx, + &kbase_jit_debugfs_used_fops); /* * Debugfs entry for getting the number of pages that could * be trimmed to free space for more JIT allocations. */ - debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_trim_fops); + debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, kctx, + &kbase_jit_debugfs_trim_fops); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ } #endif /* CONFIG_DEBUG_FS */ @@ -3848,8 +2443,7 @@ static void kbase_jit_destroy_worker(struct work_struct *work) break; } - reg = list_first_entry(&kctx->jit_destroy_head, - struct kbase_va_region, jit_node); + reg = list_first_entry(&kctx->jit_destroy_head, struct kbase_va_region, jit_node); list_del(®->jit_node); mutex_unlock(&kctx->jit_evict_lock); @@ -3887,10 +2481,6 @@ int kbase_jit_init(struct kbase_context *kctx) #endif /* MALI_USE_CSF */ mutex_unlock(&kctx->jit_evict_lock); - kctx->jit_max_allocations = 0; - kctx->jit_current_allocations = 0; - kctx->trim_level = 0; - return 0; } @@ -3898,9 +2488,8 @@ int kbase_jit_init(struct kbase_context *kctx) * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets * the alignment requirements. */ -static bool meet_size_and_tiler_align_top_requirements( - const struct kbase_va_region *walker, - const struct base_jit_alloc_info *info) +static bool meet_size_and_tiler_align_top_requirements(const struct kbase_va_region *walker, + const struct base_jit_alloc_info *info) { bool meet_reqs = true; @@ -3924,8 +2513,8 @@ static bool meet_size_and_tiler_align_top_requirements( /* Function will guarantee *@freed will not exceed @pages_needed */ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, - struct kbase_va_region *reg, size_t pages_needed, - size_t *freed, bool shrink) + struct kbase_va_region *reg, size_t pages_needed, + size_t *freed, bool shrink) { int err = 0; size_t available_pages = 0u; @@ -3959,9 +2548,8 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, * (rounded up to page sized units). Note, this is allowed to * exceed reg->nr_pages. */ - max_allowed_pages += PFN_UP( - KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES - - KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); + max_allowed_pages += PFN_UP(KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES - + KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { /* The GPU could report being ready to write to the next * 'extension' sized chunk, but didn't actually write to it, so we @@ -3999,21 +2587,17 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, */ if (max_allowed_pages < reg->used_pages) { if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) - dev_warn(kctx->kbdev->dev, - "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n", - __func__, - old_pages, reg->used_pages, - max_allowed_pages, - reg->start_pfn << PAGE_SHIFT, - reg->nr_pages); + dev_warn( + kctx->kbdev->dev, + "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n", + __func__, old_pages, reg->used_pages, max_allowed_pages, + reg->start_pfn << PAGE_SHIFT, reg->nr_pages); else dev_dbg(kctx->kbdev->dev, - "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n", - __func__, - old_pages, reg->used_pages, - reg->start_pfn << PAGE_SHIFT, - reg->nr_pages); - } + "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n", + __func__, old_pages, reg->used_pages, + reg->start_pfn << PAGE_SHIFT, reg->nr_pages); + } /* In any case, no error condition to report here, caller can * try other regions */ @@ -4029,13 +2613,11 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, err = kbase_mem_shrink(kctx, reg, new_pages); } out: - trace_mali_jit_trim_from_region(reg, to_free, old_pages, - available_pages, new_pages); + trace_mali_jit_trim_from_region(reg, to_free, old_pages, available_pages, new_pages); *freed = to_free; return err; } - /** * kbase_mem_jit_trim_pages - Trim JIT regions until sufficient pages have been * freed @@ -4053,8 +2635,7 @@ out: * * Return: Total number of successfully freed pages */ -static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, - size_t pages_needed) +static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, size_t pages_needed) { struct kbase_va_region *reg, *tmp; size_t total_freed = 0; @@ -4069,8 +2650,7 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, int err; size_t freed = 0u; - err = kbase_mem_jit_trim_pages_from_region(kctx, reg, - pages_needed, &freed, true); + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, pages_needed, &freed, true); if (err) { /* Failed to trim, try the next region */ @@ -4090,10 +2670,8 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, } #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -static int kbase_jit_grow(struct kbase_context *kctx, - const struct base_jit_alloc_info *info, - struct kbase_va_region *reg, - struct kbase_sub_alloc **prealloc_sas, +static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_alloc_info *info, + struct kbase_va_region *reg, struct kbase_sub_alloc **prealloc_sas, enum kbase_caller_mmu_sync_info mmu_sync_info) { size_t delta; @@ -4124,11 +2702,11 @@ static int kbase_jit_grow(struct kbase_context *kctx, delta = info->commit_pages - reg->gpu_alloc->nents; pages_required = delta; - if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) { + if (kctx->kbdev->pagesize_2mb && pages_required >= NUM_4K_PAGES_IN_2MB_PAGE) { pool = &kctx->mem_pools.large[kctx->jit_group_id]; /* Round up to number of 2 MB pages required */ - pages_required += ((SZ_2M / SZ_4K) - 1); - pages_required /= (SZ_2M / SZ_4K); + pages_required += (NUM_4K_PAGES_IN_2MB_PAGE - 1); + pages_required /= NUM_4K_PAGES_IN_2MB_PAGE; } else { pool = &kctx->mem_pools.small[kctx->jit_group_id]; } @@ -4162,8 +2740,8 @@ static int kbase_jit_grow(struct kbase_context *kctx, kbase_mem_pool_lock(pool); } - gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool, - delta, &prealloc_sas[0]); + gpu_pages = + kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool, delta, &prealloc_sas[0]); if (!gpu_pages) { kbase_mem_pool_unlock(pool); spin_unlock(&kctx->mem_partials_lock); @@ -4173,11 +2751,10 @@ static int kbase_jit_grow(struct kbase_context *kctx, if (reg->cpu_alloc != reg->gpu_alloc) { struct tagged_addr *cpu_pages; - cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc, - pool, delta, &prealloc_sas[1]); + cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc, pool, delta, + &prealloc_sas[1]); if (!cpu_pages) { - kbase_free_phy_pages_helper_locked(reg->gpu_alloc, - pool, gpu_pages, delta); + kbase_free_phy_pages_helper_locked(reg->gpu_alloc, pool, gpu_pages, delta); kbase_mem_pool_unlock(pool); spin_unlock(&kctx->mem_partials_lock); goto update_failed; @@ -4186,8 +2763,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, kbase_mem_pool_unlock(pool); spin_unlock(&kctx->mem_partials_lock); - ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, - old_size, mmu_sync_info); + ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, old_size, mmu_sync_info); /* * The grow failed so put the allocation back in the * pool and return failure. @@ -4206,11 +2782,9 @@ update_failed: return ret; } -static void trace_jit_stats(struct kbase_context *kctx, - u32 bin_id, u32 max_allocations) +static void trace_jit_stats(struct kbase_context *kctx, u32 bin_id, u32 max_allocations) { - const u32 alloc_count = - kctx->jit_current_allocations_per_bin[bin_id]; + const u32 alloc_count = kctx->jit_current_allocations_per_bin[bin_id]; struct kbase_device *kbdev = kctx->kbdev; struct kbase_va_region *walker; @@ -4227,8 +2801,8 @@ static void trace_jit_stats(struct kbase_context *kctx, } mutex_unlock(&kctx->jit_evict_lock); - KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, kctx->id, bin_id, - max_allocations, alloc_count, va_pages, ph_pages); + KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, kctx->id, bin_id, max_allocations, alloc_count, + va_pages, ph_pages); } #if MALI_JIT_PRESSURE_LIMIT_BASE @@ -4255,8 +2829,7 @@ static size_t get_jit_phys_backing(struct kbase_context *kctx) return backing; } -void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, - size_t needed_pages) +void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, size_t needed_pages) { size_t jit_backing = 0; size_t pages_to_trim = 0; @@ -4273,8 +2846,7 @@ void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, * allocation after "ignore_pressure_limit" allocation. */ if (jit_backing > kctx->jit_phys_pages_limit) { - pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) + - needed_pages; + pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) + needed_pages; } else { size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing; @@ -4283,8 +2855,7 @@ void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, } if (pages_to_trim) { - size_t trimmed_pages = - kbase_mem_jit_trim_pages(kctx, pages_to_trim); + size_t trimmed_pages = kbase_mem_jit_trim_pages(kctx, pages_to_trim); /* This should never happen - we already asserted that * we are not violating JIT pressure limit in earlier @@ -4308,9 +2879,8 @@ void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, * * Return: true if allocation can be executed, false otherwise */ -static bool jit_allow_allocate(struct kbase_context *kctx, - const struct base_jit_alloc_info *info, - bool ignore_pressure_limit) +static bool jit_allow_allocate(struct kbase_context *kctx, const struct base_jit_alloc_info *info, + bool ignore_pressure_limit) { #if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); @@ -4320,33 +2890,32 @@ static bool jit_allow_allocate(struct kbase_context *kctx, #if MALI_JIT_PRESSURE_LIMIT_BASE if (!ignore_pressure_limit && - ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || - (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { + ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || + (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { dev_dbg(kctx->kbdev->dev, "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n", kctx->jit_current_phys_pressure + info->va_pages, kctx->jit_phys_pages_limit); return false; } +#else + CSTD_UNUSED(ignore_pressure_limit); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { /* Too many current allocations */ dev_dbg(kctx->kbdev->dev, "Max JIT allocations limit reached: active allocations %d, max allocations %d\n", - kctx->jit_current_allocations, - kctx->jit_max_allocations); + kctx->jit_current_allocations, kctx->jit_max_allocations); return false; } if (info->max_allocations > 0 && - kctx->jit_current_allocations_per_bin[info->bin_id] >= - info->max_allocations) { + kctx->jit_current_allocations_per_bin[info->bin_id] >= info->max_allocations) { /* Too many current allocations in this bin */ dev_dbg(kctx->kbdev->dev, "Per bin limit of max JIT allocations reached: bin_id %d, active allocations %d, max allocations %d\n", - info->bin_id, - kctx->jit_current_allocations_per_bin[info->bin_id], + info->bin_id, kctx->jit_current_allocations_per_bin[info->bin_id], info->max_allocations); return false; } @@ -4354,17 +2923,16 @@ static bool jit_allow_allocate(struct kbase_context *kctx, return true; } -static struct kbase_va_region * -find_reasonable_region(const struct base_jit_alloc_info *info, - struct list_head *pool_head, bool ignore_usage_id) +static struct kbase_va_region *find_reasonable_region(const struct base_jit_alloc_info *info, + struct list_head *pool_head, + bool ignore_usage_id) { struct kbase_va_region *closest_reg = NULL; struct kbase_va_region *walker; size_t current_diff = SIZE_MAX; list_for_each_entry(walker, pool_head, jit_node) { - if ((ignore_usage_id || - walker->jit_usage_id == info->usage_id) && + if ((ignore_usage_id || walker->jit_usage_id == info->usage_id) && walker->jit_bin_id == info->bin_id && meet_size_and_tiler_align_top_requirements(walker, info)) { size_t min_size, max_size, diff; @@ -4374,10 +2942,8 @@ find_reasonable_region(const struct base_jit_alloc_info *info, * it's suitable but other allocations might be a * better fit. */ - min_size = min_t(size_t, walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, walker->gpu_alloc->nents, - info->commit_pages); + min_size = min_t(size_t, walker->gpu_alloc->nents, info->commit_pages); + max_size = max_t(size_t, walker->gpu_alloc->nents, info->commit_pages); diff = max_size - min_size; if (current_diff > diff) { @@ -4395,8 +2961,8 @@ find_reasonable_region(const struct base_jit_alloc_info *info, } struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, - const struct base_jit_alloc_info *info, - bool ignore_pressure_limit) + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit) { struct kbase_va_region *reg = NULL; struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; @@ -4466,8 +3032,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, #if MALI_JIT_PRESSURE_LIMIT_BASE if (!ignore_pressure_limit) { if (info->commit_pages > reg->gpu_alloc->nents) - needed_pages = info->commit_pages - - reg->gpu_alloc->nents; + needed_pages = info->commit_pages - reg->gpu_alloc->nents; /* Update early the recycled JIT region's estimate of * used_pages to ensure it doesn't get trimmed @@ -4485,12 +3050,10 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, * The second call to update pressure at the end of * this function would effectively be a nop. */ - kbase_jit_report_update_pressure( - kctx, reg, info->va_pages, - KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + kbase_jit_report_update_pressure(kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); - kbase_jit_request_phys_increase_locked(kctx, - needed_pages); + kbase_jit_request_phys_increase_locked(kctx, needed_pages); } #endif mutex_unlock(&kctx->jit_evict_lock); @@ -4499,8 +3062,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, * so any state protected by that lock might need to be * re-evaluated if more code is added here in future. */ - ret = kbase_jit_grow(kctx, info, reg, prealloc_sas, - mmu_sync_info); + ret = kbase_jit_grow(kctx, info, reg, prealloc_sas, mmu_sync_info); #if MALI_JIT_PRESSURE_LIMIT_BASE if (!ignore_pressure_limit) @@ -4509,7 +3071,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, kbase_gpu_vm_unlock(kctx); - if (ret < 0) { + if (ret) { /* * An update to an allocation from the pool failed, * chances are slim a new allocation would fare any @@ -4524,9 +3086,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, * region's estimate of used_pages. */ if (!ignore_pressure_limit) { - kbase_jit_report_update_pressure( - kctx, reg, 0, - KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + kbase_jit_report_update_pressure(kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); } #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ mutex_lock(&kctx->jit_evict_lock); @@ -4538,7 +3099,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, /* A suitable JIT allocation existed on the evict list, so we need * to make sure that the NOT_MOVABLE property is cleared. */ - if (kbase_page_migration_enabled) { + if (kbase_is_page_migration_enabled()) { kbase_gpu_vm_lock(kctx); mutex_lock(&kctx->jit_evict_lock); kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); @@ -4548,10 +3109,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, } } else { /* No suitable JIT allocation was found so create a new one */ - u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | - BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | - BASE_MEM_COHERENT_LOCAL | - BASEP_MEM_NO_USER_FREE; + u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | + BASE_MEM_GROW_ON_GPF | BASE_MEM_COHERENT_LOCAL | BASEP_MEM_NO_USER_FREE; u64 gpu_addr; #if !MALI_USE_CSF @@ -4566,8 +3125,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, /* The corresponding call to 'done_phys_increase' would * be made inside the kbase_mem_alloc(). */ - kbase_jit_request_phys_increase_locked( - kctx, info->commit_pages); + kbase_jit_request_phys_increase_locked(kctx, info->commit_pages); } #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ @@ -4639,7 +3197,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; reg->heap_info_gpu_addr = info->heap_info_gpu_addr; kbase_jit_report_update_pressure(kctx, reg, info->va_pages, - KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ kbase_gpu_vm_unlock(kctx); @@ -4670,7 +3228,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) * commit size */ u64 new_size = MAX(reg->initial_commit, - div_u64(old_pages * (100 - kctx->trim_level), 100)); + div_u64(old_pages * (100 - kctx->trim_level), 100)); u64 delta = old_pages - new_size; if (delta) { @@ -4682,8 +3240,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) #if MALI_JIT_PRESSURE_LIMIT_BASE reg->heap_info_gpu_addr = 0; - kbase_jit_report_update_pressure(kctx, reg, 0, - KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + kbase_jit_report_update_pressure(kctx, reg, 0, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ kctx->jit_current_allocations--; @@ -4716,14 +3273,14 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) * by page migration. Once freed, they will enter into the page migration * state machine via the mempools. */ - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); mutex_unlock(&kctx->jit_evict_lock); } void kbase_jit_backing_lost(struct kbase_va_region *reg) { - struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + struct kbase_context *kctx = kbase_reg_to_kctx(reg); if (WARN_ON(!kctx)) return; @@ -4756,8 +3313,7 @@ bool kbase_jit_evict(struct kbase_context *kctx) /* Free the oldest allocation from the pool */ mutex_lock(&kctx->jit_evict_lock); if (!list_empty(&kctx->jit_pool_head)) { - reg = list_entry(kctx->jit_pool_head.prev, - struct kbase_va_region, jit_node); + reg = list_entry(kctx->jit_pool_head.prev, struct kbase_va_region, jit_node); list_del(®->jit_node); list_del_init(®->gpu_alloc->evict_node); } @@ -4788,8 +3344,7 @@ void kbase_jit_term(struct kbase_context *kctx) mutex_lock(&kctx->jit_evict_lock); /* Free all allocations from the pool */ while (!list_empty(&kctx->jit_pool_head)) { - walker = list_first_entry(&kctx->jit_pool_head, - struct kbase_va_region, jit_node); + walker = list_first_entry(&kctx->jit_pool_head, struct kbase_va_region, jit_node); list_del(&walker->jit_node); list_del_init(&walker->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); @@ -4807,8 +3362,7 @@ void kbase_jit_term(struct kbase_context *kctx) /* Free all allocations from active list */ while (!list_empty(&kctx->jit_active_head)) { - walker = list_first_entry(&kctx->jit_active_head, - struct kbase_va_region, jit_node); + walker = list_first_entry(&kctx->jit_active_head, struct kbase_va_region, jit_node); list_del(&walker->jit_node); list_del_init(&walker->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); @@ -4838,14 +3392,14 @@ void kbase_jit_term(struct kbase_context *kctx) #if MALI_JIT_PRESSURE_LIMIT_BASE void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, - struct kbase_va_region *reg, unsigned int flags) + struct kbase_va_region *reg, unsigned int flags) { /* Offset to the location used for a JIT report within the GPU memory * * This constants only used for this debugging function - not useful * anywhere else in kbase */ - const u64 jit_report_gpu_mem_offset = sizeof(u64)*2; + const u64 jit_report_gpu_mem_offset = sizeof(u64) * 2; u64 addr_start; struct kbase_vmap_struct mapping; @@ -4862,18 +3416,16 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; - ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, - KBASE_REG_CPU_RD, &mapping); + ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, KBASE_REG_CPU_RD, + &mapping); if (!ptr) { dev_warn(kctx->kbdev->dev, - "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", - __func__, reg->start_pfn << PAGE_SHIFT, - addr_start); + "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", + __func__, reg->start_pfn << PAGE_SHIFT, addr_start); goto out; } - trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT, - ptr, flags); + trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT, ptr, flags); kbase_vunmap(kctx, &mapping); out: @@ -4882,9 +3434,8 @@ out: #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ #if MALI_JIT_PRESSURE_LIMIT_BASE -void kbase_jit_report_update_pressure(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 new_used_pages, - unsigned int flags) +void kbase_jit_report_update_pressure(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 new_used_pages, unsigned int flags) { u64 diff; @@ -4892,10 +3443,9 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx, lockdep_assert_held(&kctx->jctx.lock); #endif /* !MALI_USE_CSF */ - trace_mali_jit_report_pressure(reg, new_used_pages, - kctx->jit_current_phys_pressure + new_used_pages - - reg->used_pages, - flags); + trace_mali_jit_report_pressure( + reg, new_used_pages, + kctx->jit_current_phys_pressure + new_used_pages - reg->used_pages, flags); if (WARN_ON(new_used_pages > reg->nr_pages)) return; @@ -4917,39 +3467,68 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx, reg->used_pages = new_used_pages; } - } #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -void kbase_unpin_user_buf_page(struct page *page) +int kbase_user_buf_pin_pages(struct kbase_context *kctx, struct kbase_va_region *reg) { -#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE - put_page(page); -#else - unpin_user_page(page); -#endif + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + struct page **pages = alloc->imported.user_buf.pages; + unsigned long address = alloc->imported.user_buf.address; + struct mm_struct *mm = alloc->imported.user_buf.mm; + struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); + long pinned_pages; + long i; + int write; + + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) + return -EINVAL; + + if (WARN_ON(alloc->nents)) + return -EINVAL; + + if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) + return -EINVAL; + + if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) + return -EINVAL; + + write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); + + pinned_pages = kbase_pin_user_pages_remote(NULL, mm, address, + alloc->imported.user_buf.nr_pages, + write ? FOLL_WRITE : 0, pages, NULL, NULL); + + if (pinned_pages <= 0) + return pinned_pages; + + if (pinned_pages != (long)alloc->imported.user_buf.nr_pages) { + /* Above code already ensures there will not have been a CPU + * mapping by ensuring alloc->nents is 0 + */ + for (i = 0; i < pinned_pages; i++) + kbase_unpin_user_buf_page(pages[i]); + return -ENOMEM; + } + + /* The driver is allowed to create CPU mappings now that physical pages + * have been pinned. Update physical allocation in a consistent way: + * update the number of available physical pages and at the same time + * fill the array of physical pages with tagged addresses. + */ + for (i = 0; i < pinned_pages; i++) + pa[i] = as_tagged(page_to_phys(pages[i])); + alloc->nents = pinned_pages; + + return 0; } -#if MALI_USE_CSF -static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) +void kbase_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) { - /* In CSF builds, we keep pages pinned until the last reference is - * released on the alloc. A refcount of 0 also means we can be sure - * that all CPU mappings have been closed on this alloc, and no more - * mappings of it will be created. - * - * Further, the WARN() below captures the restriction that this - * function will not handle anything other than the alloc termination - * path, because the caller of kbase_mem_phy_alloc_put() is not - * required to hold the kctx's reg_lock, and so we could not handle - * removing an existing CPU mapping here. - * - * Refer to this function's kernel-doc comments for alternatives for - * unpinning a User buffer. - */ + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) + return; - if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0, - "must only be called on terminating an allocation")) { + if (alloc->nents) { struct page **pages = alloc->imported.user_buf.pages; long i; @@ -4961,92 +3540,22 @@ static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) alloc->nents = 0; } } -#endif -int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, - struct kbase_va_region *reg) +int kbase_user_buf_dma_map_pages(struct kbase_context *kctx, struct kbase_va_region *reg) { struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; struct page **pages = alloc->imported.user_buf.pages; - unsigned long address = alloc->imported.user_buf.address; - struct mm_struct *mm = alloc->imported.user_buf.mm; - long pinned_pages; - long i; + struct device *dev = kctx->kbdev->dev; int write; - - lockdep_assert_held(&kctx->reg_lock); + long i, pinned_pages, dma_mapped_pages; + enum dma_data_direction dma_dir; if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) return -EINVAL; - if (alloc->nents) { - if (WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages)) - return -EINVAL; - else - return 0; - } - - if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) - return -EINVAL; - write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); - -#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE - pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, - write ? FOLL_WRITE : 0, pages, NULL); -#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE - pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, - write ? FOLL_WRITE : 0, pages, NULL, NULL); -#else - pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages, - write ? FOLL_WRITE : 0, pages, NULL, NULL); -#endif - - if (pinned_pages <= 0) - return pinned_pages; - - if (pinned_pages != alloc->imported.user_buf.nr_pages) { - /* Above code already ensures there will not have been a CPU - * mapping by ensuring alloc->nents is 0 - */ - for (i = 0; i < pinned_pages; i++) - kbase_unpin_user_buf_page(pages[i]); - return -ENOMEM; - } - - alloc->nents = pinned_pages; - - return 0; -} - -static int kbase_jd_user_buf_map(struct kbase_context *kctx, - struct kbase_va_region *reg) -{ - int err; - long pinned_pages = 0; - struct kbase_mem_phy_alloc *alloc; - struct page **pages; - struct tagged_addr *pa; - long i, dma_mapped_pages; - struct device *dev; - unsigned long gwt_mask = ~0; - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - - lockdep_assert_held(&kctx->reg_lock); - - err = kbase_jd_user_buf_pin_pages(kctx, reg); - - if (err) - return err; - - alloc = reg->gpu_alloc; - pa = kbase_get_gpu_phy_pages(reg); - pinned_pages = alloc->nents; - pages = alloc->imported.user_buf.pages; - dev = kctx->kbdev->dev; + dma_dir = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + pinned_pages = reg->gpu_alloc->nents; /* Manual CPU cache synchronization. * @@ -5067,98 +3576,206 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, for (i = 0; i < pinned_pages; i++) { dma_addr_t dma_addr; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, dma_dir); #else - dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC); #endif - err = dma_mapping_error(dev, dma_addr); - if (err) - goto unwind; + if (dma_mapping_error(dev, dma_addr)) + goto unwind_dma_map; alloc->imported.user_buf.dma_addrs[i] = dma_addr; - pa[i] = as_tagged(page_to_phys(pages[i])); - dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir); } + return 0; + +unwind_dma_map: + dma_mapped_pages = i; + + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory as a precautionary measure. + */ + for (i = 0; i < dma_mapped_pages; i++) { + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir); +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(dev, dma_addr, PAGE_SIZE, dma_dir); +#else + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC); +#endif + } + + return -ENOMEM; +} + +/** + * kbase_user_buf_map - Create GPU mapping for a user buffer. + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * The caller must have ensured that physical pages have been pinned and that + * DMA mappings have been obtained prior to calling this function. + * + * Return: zero on success or negative number on failure. + */ +static int kbase_user_buf_map(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + long pinned_pages = 0; + struct kbase_mem_phy_alloc *alloc; + struct page **pages; + struct tagged_addr *pa; + long i; + unsigned long gwt_mask = ~0; + int ret; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + lockdep_assert_held(&kctx->reg_lock); + + alloc = reg->gpu_alloc; + + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) + return -EINVAL; + + pa = kbase_get_gpu_phy_pages(reg); + pinned_pages = alloc->nents; + pages = alloc->imported.user_buf.pages; + + for (i = 0; i < pinned_pages; i++) + pa[i] = as_tagged(page_to_phys(pages[i])); + #ifdef CONFIG_MALI_CINSTR_GWT if (kctx->gwt_enabled) gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, - mmu_sync_info, NULL); - if (err == 0) - return 0; + ret = kbase_mmu_insert_pages_skip_status_update(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, + alloc->group_id, mmu_sync_info, NULL); - /* fall down */ -unwind: - alloc->nents = 0; - dma_mapped_pages = i; - /* Run the unmap loop in the same order as map loop, and perform again - * CPU cache synchronization to re-write the content of dirty CPU caches - * to memory. This is precautionary measure in case a GPU job has taken - * advantage of a partially GPU-mapped range to write and corrupt the - * content of memory, either inside or outside the imported region. - * - * Notice that this error recovery path doesn't try to be optimal and just - * flushes the entire page range. - */ - for (i = 0; i < dma_mapped_pages; i++) { - dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - - dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -#else - dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); -#endif - } - - /* The user buffer could already have been previously pinned before - * entering this function, and hence there could potentially be CPU - * mappings of it - */ - kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages); - - for (i = 0; i < pinned_pages; i++) { - kbase_unpin_user_buf_page(pages[i]); - pages[i] = NULL; - } - - return err; + return ret; } -/* This function would also perform the work of unpinning pages on Job Manager - * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT - * have a corresponding call to kbase_jd_user_buf_unpin_pages(). +/* user_buf_sync_read_only_page - This function handles syncing a single page that has read access, + * only, on both the CPU and * GPU, so it is ready to be unmapped. + * @kctx: kbase context + * @imported_size: the number of bytes to sync + * @dma_addr: DMA address of the bytes to be sync'd + * @offset_within_page: (unused) offset of the bytes within the page. Passed so that the calling + * signature is identical to user_buf_sync_writable_page(). */ -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, - struct kbase_va_region *reg, bool writeable) +static void user_buf_sync_read_only_page(struct kbase_context *kctx, unsigned long imported_size, + dma_addr_t dma_addr, unsigned long offset_within_page) +{ + /* Manual cache synchronization. + * + * Writes from neither the CPU nor GPU are possible via this mapping, + * so we just sync the entire page to the device. + */ + CSTD_UNUSED(offset_within_page); + + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, imported_size, DMA_TO_DEVICE); +} + +/* user_buf_sync_writable_page - This function handles syncing a single page that has read + * and writable access, from either (or both of) the CPU and GPU, + * so it is ready to be unmapped. + * @kctx: kbase context + * @imported_size: the number of bytes to unmap + * @dma_addr: DMA address of the bytes to be unmapped + * @offset_within_page: offset of the bytes within the page. This is the offset to the subrange of + * the memory that is "imported" and so is intended for GPU access. Areas of + * the page outside of this - whilst still GPU accessible - are not intended + * for use by GPU work, and should also not be modified as the userspace CPU + * threads may be modifying them. + */ +static void user_buf_sync_writable_page(struct kbase_context *kctx, unsigned long imported_size, + dma_addr_t dma_addr, unsigned long offset_within_page) +{ + /* Manual CPU cache synchronization. + * + * When the GPU returns ownership of the buffer to the CPU, the driver + * needs to treat imported and non-imported memory differently. + * + * The first case to consider is non-imported sub-regions at the + * beginning of the first page and at the end of last page. For these + * sub-regions: CPU cache shall be committed with a clean+invalidate, + * in order to keep the last CPU write. + * + * Imported region prefers the opposite treatment: this memory has been + * legitimately mapped and used by the GPU, hence GPU writes shall be + * committed to memory, while CPU cache shall be invalidated to make + * sure that CPU reads the correct memory content. + * + * The following diagram shows the expect value of the variables + * used in this loop in the corner case of an imported region encloed + * by a single memory page: + * + * page boundary ->|---------- | <- dma_addr (initial value) + * | | + * | - - - - - | <- offset_within_page + * |XXXXXXXXXXX|\ + * |XXXXXXXXXXX| \ + * |XXXXXXXXXXX| }- imported_size + * |XXXXXXXXXXX| / + * |XXXXXXXXXXX|/ + * | - - - - - | <- offset_within_page + imported_size + * | |\ + * | | }- PAGE_SIZE - imported_size - + * | |/ offset_within_page + * | | + * page boundary ->|-----------| + * + * If the imported region is enclosed by more than one page, then + * offset_within_page = 0 for any page after the first. + */ + + /* Only for first page: handle non-imported range at the beginning. */ + if (offset_within_page > 0) { + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, + DMA_BIDIRECTIONAL); + dma_addr += offset_within_page; + } + + /* For every page: handle imported range. */ + if (imported_size > 0) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, + DMA_BIDIRECTIONAL); + + /* Only for last page (that may coincide with first page): + * handle non-imported range at the end. + */ + if ((imported_size + offset_within_page) < PAGE_SIZE) { + dma_addr += imported_size; + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + PAGE_SIZE - imported_size - offset_within_page, + DMA_BIDIRECTIONAL); + } +} + +void kbase_user_buf_dma_unmap_pages(struct kbase_context *kctx, struct kbase_va_region *reg) { long i; - struct page **pages; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; unsigned long remaining_size = alloc->imported.user_buf.size; - lockdep_assert_held(&kctx->reg_lock); - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); - pages = alloc->imported.user_buf.pages; - -#if !MALI_USE_CSF - kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); -#else - CSTD_UNUSED(reg); -#endif + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) + return; for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page); - /* Notice: this is a temporary variable that is used for DMA sync + /* The DMA unmapping operation affects the whole of every page, + * but cache maintenance shall be limited only to the imported + * address range. + * + * Notice: this is a temporary variable that is used for DMA sync * operations, and that could be incremented by an offset if the * current page contains both imported and non-imported memory * sub-regions. @@ -5171,96 +3788,69 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem * operation, that shall always use the original DMA address of the * whole memory page. */ + unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page); dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + struct page **pages = alloc->imported.user_buf.pages; + bool writable = (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)); + enum dma_data_direction dma_dir = writable ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; - /* Manual CPU cache synchronization. - * - * When the GPU returns ownership of the buffer to the CPU, the driver - * needs to treat imported and non-imported memory differently. - * - * The first case to consider is non-imported sub-regions at the - * beginning of the first page and at the end of last page. For these - * sub-regions: CPU cache shall be committed with a clean+invalidate, - * in order to keep the last CPU write. - * - * Imported region prefers the opposite treatment: this memory has been - * legitimately mapped and used by the GPU, hence GPU writes shall be - * committed to memory, while CPU cache shall be invalidated to make - * sure that CPU reads the correct memory content. - * - * The following diagram shows the expect value of the variables - * used in this loop in the corner case of an imported region encloed - * by a single memory page: - * - * page boundary ->|---------- | <- dma_addr (initial value) - * | | - * | - - - - - | <- offset_within_page - * |XXXXXXXXXXX|\ - * |XXXXXXXXXXX| \ - * |XXXXXXXXXXX| }- imported_size - * |XXXXXXXXXXX| / - * |XXXXXXXXXXX|/ - * | - - - - - | <- offset_within_page + imported_size - * | |\ - * | | }- PAGE_SIZE - imported_size - offset_within_page - * | |/ - * page boundary ->|-----------| - * - * If the imported region is enclosed by more than one page, then - * offset_within_page = 0 for any page after the first. - */ + if (writable) + user_buf_sync_writable_page(kctx, imported_size, dma_addr, + offset_within_page); + else + user_buf_sync_read_only_page(kctx, imported_size, dma_addr, + offset_within_page); - /* Only for first page: handle non-imported range at the beginning. */ - if (offset_within_page > 0) { - dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, - DMA_BIDIRECTIONAL); - dma_addr += offset_within_page; - } - - /* For every page: handle imported range. */ - if (imported_size > 0) - dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, - DMA_BIDIRECTIONAL); - - /* Only for last page (that may coincide with first page): - * handle non-imported range at the end. - */ - if ((imported_size + offset_within_page) < PAGE_SIZE) { - dma_addr += imported_size; - dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, - PAGE_SIZE - imported_size - offset_within_page, - DMA_BIDIRECTIONAL); - } - - /* Notice: use the original DMA address to unmap the whole memory page. */ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_dir); #else dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC); #endif - if (writeable) + + if (writable) set_page_dirty_lock(pages[i]); -#if !MALI_USE_CSF - kbase_unpin_user_buf_page(pages[i]); - pages[i] = NULL; -#endif remaining_size -= imported_size; offset_within_page = 0; } -#if !MALI_USE_CSF - alloc->nents = 0; -#endif } -int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, - void *src_page, size_t *to_copy, unsigned int nr_pages, - unsigned int *target_page_nr, size_t offset) +/** + * kbase_user_buf_unmap - Destroy GPU mapping for a user buffer. + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * Destroy the GPU mapping for an imported user buffer. Notice that this + * function doesn't release DMA mappings and doesn't unpin physical pages. + */ +static void kbase_user_buf_unmap(struct kbase_context *kctx, struct kbase_va_region *reg) { - void *target_page = kmap(dest_pages[*target_page_nr]); - size_t chunk = PAGE_SIZE-offset; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + + lockdep_assert_held(&kctx->reg_lock); + + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) + return; + + if (WARN_ON(alloc->imported.user_buf.current_mapping_usage_count > 0)) + return; + + if (!kbase_is_region_invalid_or_free(reg)) { + kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, kbase_reg_current_backed_size(reg), + kbase_reg_current_backed_size(reg), kctx->as_nr); + } +} + +int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, void *src_page, size_t *to_copy, + unsigned int nr_pages, unsigned int *target_page_nr, + size_t offset) +{ + void *target_page = kbase_kmap(dest_pages[*target_page_nr]); + + size_t chunk = PAGE_SIZE - offset; if (!target_page) { pr_err("%s: kmap failure", __func__); @@ -5272,13 +3862,13 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, memcpy(target_page + offset, src_page, chunk); *to_copy -= chunk; - kunmap(dest_pages[*target_page_nr]); + kbase_kunmap(dest_pages[*target_page_nr], target_page); *target_page_nr += 1; if (*target_page_nr >= nr_pages || *to_copy == 0) return 0; - target_page = kmap(dest_pages[*target_page_nr]); + target_page = kbase_kmap(dest_pages[*target_page_nr]); if (!target_page) { pr_err("%s: kmap failure", __func__); return -ENOMEM; @@ -5287,10 +3877,10 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, KBASE_DEBUG_ASSERT(target_page); chunk = min(offset, *to_copy); - memcpy(target_page, src_page + PAGE_SIZE-offset, chunk); + memcpy(target_page, src_page + PAGE_SIZE - offset, chunk); *to_copy -= chunk; - kunmap(dest_pages[*target_page_nr]); + kbase_kunmap(dest_pages[*target_page_nr], target_page); return 0; } @@ -5300,27 +3890,61 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi { int err = 0; struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + enum kbase_user_buf_state user_buf_original_state; lockdep_assert_held(&kctx->reg_lock); /* decide what needs to happen for this resource */ switch (reg->gpu_alloc->type) { case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && - (!reg->gpu_alloc->nents)) + user_buf_original_state = reg->gpu_alloc->imported.user_buf.state; + + if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && (!reg->gpu_alloc->nents)) return -EINVAL; - reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; - if (reg->gpu_alloc->imported.user_buf - .current_mapping_usage_count == 1) { - err = kbase_jd_user_buf_map(kctx, reg); - if (err) { - reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; - return err; - } + /* This function is reachable through many code paths, and the imported + * memory handle could be in any of the possible states: consider all + * of them as a valid starting point, and progress through all stages + * until creating a GPU mapping or increasing the reference count if + * the handle is already mapped. + * + * Error recovery restores the original state and goes no further. + */ + switch (user_buf_original_state) { + case KBASE_USER_BUF_STATE_EMPTY: + case KBASE_USER_BUF_STATE_PINNED: + case KBASE_USER_BUF_STATE_DMA_MAPPED: { + if (user_buf_original_state == KBASE_USER_BUF_STATE_EMPTY) + err = kbase_user_buf_from_empty_to_gpu_mapped(kctx, reg); + else if (user_buf_original_state == KBASE_USER_BUF_STATE_PINNED) + err = kbase_user_buf_from_pinned_to_gpu_mapped(kctx, reg); + else + err = kbase_user_buf_from_dma_mapped_to_gpu_mapped(kctx, reg); + + if (err) + goto user_buf_to_gpu_mapped_fail; + + break; } + case KBASE_USER_BUF_STATE_GPU_MAPPED: { + if (reg->gpu_alloc->imported.user_buf.current_mapping_usage_count == 0) + return -EINVAL; + break; + } + default: + dev_dbg(kctx->kbdev->dev, + "Invalid external resource GPU allocation state (%x) on mapping", + reg->gpu_alloc->imported.user_buf.state); + return -EINVAL; + } + + /* If the state was valid and the transition is happening, then the handle + * must be in GPU_MAPPED state now and the reference counter of GPU mappings + * can be safely incremented. + */ + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; + break; } - break; case KBASE_MEM_TYPE_IMPORTED_UMM: { err = kbase_mem_umm_map(kctx, reg); if (err) @@ -5336,6 +3960,9 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi kbase_va_region_alloc_get(kctx, reg); kbase_mem_phy_alloc_get(alloc); + return 0; + +user_buf_to_gpu_mapped_fail: return err; } @@ -5351,29 +3978,26 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r switch (alloc->type) { case KBASE_MEM_TYPE_IMPORTED_UMM: { kbase_mem_umm_unmap(kctx, reg, alloc); - } - break; + } break; case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - alloc->imported.user_buf.current_mapping_usage_count--; - - if (alloc->imported.user_buf.current_mapping_usage_count == 0) { - bool writeable = true; - - if (!kbase_is_region_invalid_or_free(reg)) { - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, - kbase_reg_current_backed_size(reg), - kbase_reg_current_backed_size(reg), - kctx->as_nr, true); - } - - if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) - writeable = false; - - kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); + switch (alloc->imported.user_buf.state) { + case KBASE_USER_BUF_STATE_GPU_MAPPED: { + alloc->imported.user_buf.current_mapping_usage_count--; + if (alloc->imported.user_buf.current_mapping_usage_count == 0) + kbase_user_buf_from_gpu_mapped_to_pinned(kctx, reg); + break; } + case KBASE_USER_BUF_STATE_DMA_MAPPED: { + kbase_user_buf_from_dma_mapped_to_pinned(kctx, reg); + break; } - break; + case KBASE_USER_BUF_STATE_PINNED: + case KBASE_USER_BUF_STATE_EMPTY: + default: { + /* nothing to do */ + } break; + } + } break; default: WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping", alloc->type); @@ -5388,11 +4012,12 @@ static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) return reg->start_pfn << PAGE_SHIFT; } -struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( - struct kbase_context *kctx, u64 gpu_addr) +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_context *kctx, + u64 gpu_addr) { struct kbase_ctx_ext_res_meta *meta = NULL; struct kbase_ctx_ext_res_meta *walker; + struct kbase_va_region *reg; lockdep_assert_held(&kctx->reg_lock); @@ -5400,23 +4025,20 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( * Walk the per context external resource metadata list for the * metadata which matches the region which is being acquired. */ + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto failed; + list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { - if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) { + if (walker->reg == reg) { meta = walker; meta->ref++; break; } } - /* No metadata exists so create one. */ + /* If no metadata exists in the list, create one. */ if (!meta) { - struct kbase_va_region *reg; - - /* Find the region */ - reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); - if (kbase_is_region_invalid_or_free(reg)) - goto failed; - /* Allocate the metadata object */ meta = kzalloc(sizeof(*meta), GFP_KERNEL); if (!meta) @@ -5445,34 +4067,39 @@ failed: return NULL; } -static struct kbase_ctx_ext_res_meta * -find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) +static struct kbase_ctx_ext_res_meta *find_sticky_resource_meta(struct kbase_context *kctx, + u64 gpu_addr) { struct kbase_ctx_ext_res_meta *walker; - + struct kbase_va_region *reg; lockdep_assert_held(&kctx->reg_lock); /* * Walk the per context external resource metadata list for the * metadata which matches the region which is being released. */ - list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) - if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); + if (!reg) + return NULL; + + list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { + if (walker->reg == reg) return walker; + } return NULL; } static void release_sticky_resource_meta(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta) + struct kbase_ctx_ext_res_meta *meta) { kbase_unmap_external_resource(kctx, meta->reg); list_del(&meta->ext_res_node); kfree(meta); } -bool kbase_sticky_resource_release(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) +bool kbase_sticky_resource_release(struct kbase_context *kctx, struct kbase_ctx_ext_res_meta *meta, + u64 gpu_addr) { lockdep_assert_held(&kctx->reg_lock); @@ -5493,7 +4120,7 @@ bool kbase_sticky_resource_release(struct kbase_context *kctx, } bool kbase_sticky_resource_release_force(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) { lockdep_assert_held(&kctx->reg_lock); @@ -5533,9 +4160,221 @@ void kbase_sticky_resource_term(struct kbase_context *kctx) * here, but it's more efficient if we do the clean up here. */ while (!list_empty(&kctx->ext_res_meta_head)) { - walker = list_first_entry(&kctx->ext_res_meta_head, - struct kbase_ctx_ext_res_meta, ext_res_node); + walker = list_first_entry(&kctx->ext_res_meta_head, struct kbase_ctx_ext_res_meta, + ext_res_node); kbase_sticky_resource_release_force(kctx, walker, 0); } } + +void kbase_user_buf_empty_init(struct kbase_va_region *reg) +{ + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_EMPTY; + /* Code currently manages transitions among 4 states. + * This is a reminder that code needs to be updated if a new state + * is introduced. + */ + BUILD_BUG_ON(KBASE_USER_BUF_STATE_COUNT != 4); +} + +int kbase_user_buf_from_empty_to_pinned(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + int ret; + + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); + + if (reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_EMPTY) + return -EINVAL; + + ret = kbase_user_buf_pin_pages(kctx, reg); + + if (!ret) + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_PINNED; + + return ret; +} + +int kbase_user_buf_from_empty_to_dma_mapped(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + int ret; + + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); + + if (reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_EMPTY) + return -EINVAL; + + ret = kbase_user_buf_pin_pages(kctx, reg); + + if (ret) + goto pin_pages_fail; + + ret = kbase_user_buf_dma_map_pages(kctx, reg); + + if (!ret) + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_DMA_MAPPED; + else + goto dma_map_pages_fail; + + return ret; + +dma_map_pages_fail: + /* The user buffer could already have been previously pinned before + * entering this function, and hence there could potentially be CPU + * mappings of it. + */ + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); + kbase_user_buf_unpin_pages(reg->gpu_alloc); +pin_pages_fail: + return ret; +} + +int kbase_user_buf_from_empty_to_gpu_mapped(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + int ret; + + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); + + if (reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_EMPTY) + return -EINVAL; + + ret = kbase_user_buf_pin_pages(kctx, reg); + + if (ret) + goto pin_pages_fail; + + ret = kbase_user_buf_dma_map_pages(kctx, reg); + + if (ret) + goto dma_map_pages_fail; + + ret = kbase_user_buf_map(kctx, reg); + + if (!ret) + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_GPU_MAPPED; + else + goto user_buf_map_fail; + + return ret; + +user_buf_map_fail: + kbase_user_buf_dma_unmap_pages(kctx, reg); +dma_map_pages_fail: + /* The user buffer could already have been previously pinned before + * entering this function, and hence there could potentially be CPU + * mappings of it. + */ + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); + kbase_user_buf_unpin_pages(reg->gpu_alloc); +pin_pages_fail: + return ret; +} + +void kbase_user_buf_from_pinned_to_empty(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); + if (WARN_ON(reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_PINNED)) + return; + kbase_user_buf_unpin_pages(reg->gpu_alloc); + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_EMPTY; +} + +int kbase_user_buf_from_pinned_to_gpu_mapped(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + int ret; + + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); + lockdep_assert_held(&kctx->reg_lock); + + if (reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_PINNED) + return -EINVAL; + + ret = kbase_user_buf_dma_map_pages(kctx, reg); + + if (ret) + goto dma_map_pages_fail; + + ret = kbase_user_buf_map(kctx, reg); + + if (!ret) + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_GPU_MAPPED; + else + goto user_buf_map_fail; + + return ret; + +user_buf_map_fail: + kbase_user_buf_dma_unmap_pages(kctx, reg); +dma_map_pages_fail: + return ret; +} + +void kbase_user_buf_from_dma_mapped_to_pinned(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); + if (WARN_ON(reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_DMA_MAPPED)) + return; +#if !MALI_USE_CSF + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); +#endif + kbase_user_buf_dma_unmap_pages(kctx, reg); + + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_PINNED; +} + +void kbase_user_buf_from_dma_mapped_to_empty(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); + if (WARN_ON(reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_DMA_MAPPED)) + return; +#if !MALI_USE_CSF + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); +#endif + kbase_user_buf_dma_unmap_pages(kctx, reg); + + /* Termination code path: fall through to next state transition. */ + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_PINNED; + kbase_user_buf_from_pinned_to_empty(kctx, reg); +} + +int kbase_user_buf_from_dma_mapped_to_gpu_mapped(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + int ret; + + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); + + if (reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_DMA_MAPPED) + return -EINVAL; + + ret = kbase_user_buf_map(kctx, reg); + + if (!ret) + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_GPU_MAPPED; + + return ret; +} + +void kbase_user_buf_from_gpu_mapped_to_pinned(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); + if (WARN_ON(reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_GPU_MAPPED)) + return; + kbase_user_buf_unmap(kctx, reg); + kbase_user_buf_dma_unmap_pages(kctx, reg); + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_PINNED; +} + +void kbase_user_buf_from_gpu_mapped_to_empty(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); + kbase_user_buf_unmap(kctx, reg); + + /* Termination code path: fall through to next state transition. */ + reg->gpu_alloc->imported.user_buf.state = KBASE_USER_BUF_STATE_DMA_MAPPED; + kbase_user_buf_from_dma_mapped_to_empty(kctx, reg); +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h index 490ad3c9c2e2..a8e59b65a73d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h @@ -30,6 +30,7 @@ #error "Don't include this file directly, use mali_kbase.h instead" #endif +#include #include #include #include @@ -37,14 +38,14 @@ #include "mali_kbase_defs.h" /* Required for kbase_mem_evictable_unmake */ #include "mali_kbase_mem_linux.h" +#include "mali_kbase_reg_track.h" #include "mali_kbase_mem_migrate.h" #include "mali_kbase_refcount_defs.h" -static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, - int pages); +static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages); /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ /* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by * 8 pages. The MMU reads in 8 page table entries from memory at a time, if we @@ -53,25 +54,220 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, * memory for the subsequent page table updates and generates duplicate page * faults as the page table information used by the MMU is not valid. */ -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ /* This must always be a power of 2 */ #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 \ + (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 \ + (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) +/* Free region */ +#define KBASE_REG_FREE (1ul << 0) +/* CPU write access */ +#define KBASE_REG_CPU_WR (1ul << 1) +/* GPU write access */ +#define KBASE_REG_GPU_WR (1ul << 2) +/* No eXecute flag */ +#define KBASE_REG_GPU_NX (1ul << 3) +/* Is CPU cached? */ +#define KBASE_REG_CPU_CACHED (1ul << 4) +/* Is GPU cached? + * Some components within the GPU might only be able to access memory that is + * GPU cacheable. Refer to the specific GPU implementation for more details. + */ +#define KBASE_REG_GPU_CACHED (1ul << 5) + +#define KBASE_REG_GROWABLE (1ul << 6) +/* Can grow on pf? */ +#define KBASE_REG_PF_GROW (1ul << 7) + +/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */ +#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8) + +/* inner shareable coherency */ +#define KBASE_REG_SHARE_IN (1ul << 9) +/* inner & outer shareable coherency */ +#define KBASE_REG_SHARE_BOTH (1ul << 10) + +/* Bits 11-13 (inclusive) are reserved for indicating the zone. */ + +/* GPU read access */ +#define KBASE_REG_GPU_RD (1ul << 14) +/* CPU read access */ +#define KBASE_REG_CPU_RD (1ul << 15) + +/* Index of chosen MEMATTR for this region (0..7) */ +#define KBASE_REG_MEMATTR_MASK (7ul << 16) +#define KBASE_REG_MEMATTR_INDEX(x) (((x)&7) << 16) +#define KBASE_REG_MEMATTR_VALUE(x) (((x)&KBASE_REG_MEMATTR_MASK) >> 16) + +/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ +/* Use GPU implementation-defined caching policy. */ +#define KBASE_MEMATTR_IMPL_DEF_CACHE_POLICY \ + ((unsigned long long)(AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_IMPL) | \ + AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_WRITE_BACK))) +/* The attribute set to force all resources to be cached. */ +#define KBASE_MEMATTR_FORCE_TO_CACHE_ALL \ + ((unsigned long long)(AS_MEMATTR_ATTRIBUTE0_ALLOC_W_MASK | \ + AS_MEMATTR_ATTRIBUTE0_ALLOC_R_MASK | \ + AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_ALLOC) | \ + AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_WRITE_BACK))) +/* Inner write-alloc cache setup, no outer caching */ +#define KBASE_MEMATTR_WRITE_ALLOC \ + ((unsigned long long)(AS_MEMATTR_ATTRIBUTE0_ALLOC_W_MASK | \ + AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_ALLOC) | \ + AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_WRITE_BACK))) +/* Set to implementation defined, outer caching */ +#define KBASE_MEMATTR_AARCH64_OUTER_IMPL_DEF \ + ((unsigned long long)(AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_IMPL) | \ + AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_WRITE_BACK))) +/* Set to write back memory, outer caching */ +#define KBASE_MEMATTR_AARCH64_OUTER_WA \ + ((unsigned long long)(AS_MEMATTR_ATTRIBUTE0_ALLOC_W_MASK | \ + AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_ALLOC) | \ + AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_WRITE_BACK))) +/* Set to inner non-cacheable, outer-non-cacheable + * Setting defined by the alloc bits is ignored, but set to a valid encoding: + * - no-alloc on read + * - no alloc on write + */ +#define KBASE_MEMATTR_AARCH64_NON_CACHEABLE \ + ((unsigned long long)(AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_ALLOC) | \ + AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_NON_CACHEABLE))) + +/* Symbols for default MEMATTR to use + * Default is - HW implementation defined caching + */ +#define KBASE_MEMATTR_INDEX_DEFAULT 0 +#define KBASE_MEMATTR_INDEX_DEFAULT_ACE 3 + +/* HW implementation defined caching */ +#define KBASE_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 +/* Force cache on */ +#define KBASE_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 +/* Write-alloc */ +#define KBASE_MEMATTR_INDEX_WRITE_ALLOC 2 +/* Outer coherent, inner implementation defined policy */ +#define KBASE_MEMATTR_INDEX_OUTER_IMPL_DEF 3 +/* Outer coherent, write alloc inner */ +#define KBASE_MEMATTR_INDEX_OUTER_WA 4 +/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ +#define KBASE_MEMATTR_INDEX_NON_CACHEABLE 5 + +#if MALI_USE_CSF +/* Set to shared memory, that is inner cacheable on ACE and inner or outer + * shared, otherwise inner non-cacheable. + * Outer cacheable if inner or outer shared, otherwise outer non-cacheable. + */ +#define KBASE_MEMATTR_AARCH64_SHARED \ + ((unsigned long long)(AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_ALLOC_SEL_IMPL) | \ + AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SET( \ + 0, AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SHARED))) + +/* Normal memory, shared between MCU and Host */ +#define KBASE_MEMATTR_INDEX_SHARED 6 +#endif + +#define KBASE_REG_PROTECTED (1ul << 19) + +/* Region belongs to a shrinker. + * + * This can either mean that it is part of the JIT/Ephemeral or tiler heap + * shrinker paths. Should be removed only after making sure that there are + * no references remaining to it in these paths, as it may cause the physical + * backing of the region to disappear during use. + */ +#define KBASE_REG_DONT_NEED (1ul << 20) + +/* Imported buffer is padded? */ +#define KBASE_REG_IMPORT_PAD (1ul << 21) + +#if MALI_USE_CSF +/* CSF event memory */ +#define KBASE_REG_CSF_EVENT (1ul << 22) +/* Bit 23 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define KBASE_REG_RESERVED_BIT_23 (1ul << 23) +#else +/* Bit 22 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define KBASE_REG_RESERVED_BIT_22 (1ul << 22) +/* The top of the initial commit is aligned to extension pages. + * Extent must be a power of 2 + */ +#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) +#endif /* MALI_USE_CSF */ + +/* Bit 24 is currently unused and is available for use for a new flag */ + +/* Memory has permanent kernel side mapping */ +#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) + +/* GPU VA region has been freed by the userspace, but still remains allocated + * due to the reference held by CPU mappings created on the GPU VA region. + * + * A region with this flag set has had kbase_gpu_munmap() called on it, but can + * still be looked-up in the region tracker as a non-free region. Hence must + * not create or update any more GPU mappings on such regions because they will + * not be unmapped when the region is finally destroyed. + * + * Since such regions are still present in the region tracker, new allocations + * attempted with BASE_MEM_SAME_VA might fail if their address intersects with + * a region with this flag set. + * + * In addition, this flag indicates the gpu_alloc member might no longer valid + * e.g. in infinite cache simulation. + */ +#define KBASE_REG_VA_FREED (1ul << 26) + +/* If set, the heap info address points to a u32 holding the used size in bytes; + * otherwise it points to a u64 holding the lowest address of unused memory. + */ +#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) + +/* Allocation is actively used for JIT memory */ +#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28) + +#if MALI_USE_CSF +/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA + * memory zones, and it determines whether they were created with a fixed + * GPU VA address requested by the user. + */ +#define KBASE_REG_FIXED_ADDRESS (1ul << 29) +#else +#define KBASE_REG_RESERVED_BIT_29 (1ul << 29) +#endif /* * A CPU mapping */ struct kbase_cpu_mapping { - struct list_head mappings_list; - struct kbase_mem_phy_alloc *alloc; - struct kbase_context *kctx; - struct kbase_va_region *region; - int count; - int free_on_close; + struct list_head mappings_list; + struct kbase_mem_phy_alloc *alloc; + struct kbase_context *kctx; + struct kbase_va_region *region; + int count; + int free_on_close; }; enum kbase_memory_type { @@ -92,8 +288,23 @@ struct kbase_aliased { }; /* Physical pages tracking object properties */ -#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1u << 0) -#define KBASE_MEM_PHY_ALLOC_LARGE (1u << 1) +#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1u << 0) +#define KBASE_MEM_PHY_ALLOC_LARGE (1u << 1) + +/* enum kbase_user_buf_state - State of a USER_BUF handle. + * @KBASE_USER_BUF_STATE_EMPTY: Empty handle with no resources. + * @KBASE_USER_BUF_STATE_PINNED: Physical pages have been pinned. + * @KBASE_USER_BUF_STATE_DMA_MAPPED: DMA addresses for cache maintenance + * operations have been mapped. + * @KBASE_USER_BUF_STATE_GPU_MAPPED: Mapped on GPU address space. + */ +enum kbase_user_buf_state { + KBASE_USER_BUF_STATE_EMPTY, + KBASE_USER_BUF_STATE_PINNED, + KBASE_USER_BUF_STATE_DMA_MAPPED, + KBASE_USER_BUF_STATE_GPU_MAPPED, + KBASE_USER_BUF_STATE_COUNT = 4 +}; /* struct kbase_mem_phy_alloc - Physical pages tracking object. * @@ -114,7 +325,7 @@ struct kbase_aliased { * to the physical pages to prevent flag changes or shrink * while maps are still held. * @nents: 0..N - * @pages: N elements, only 0..nents are valid + * @pages: N elements, only 0..(nents - 1) are valid * @mappings: List of CPU mappings of this physical memory allocation. * @evict_node: Node used to store this allocation on the eviction list * @evicted: Physical backing size when the pages where evicted @@ -133,14 +344,14 @@ struct kbase_aliased { * @imported: member in union valid based on @a type */ struct kbase_mem_phy_alloc { - struct kref kref; - atomic_t gpu_mappings; - atomic_t kernel_mappings; - size_t nents; - struct tagged_addr *pages; - struct list_head mappings; - struct list_head evict_node; - size_t evicted; + struct kref kref; + atomic_t gpu_mappings; + atomic_t kernel_mappings; + size_t nents; + struct tagged_addr *pages; + struct list_head mappings; + struct list_head evict_node; + size_t evicted; struct kbase_va_region *reg; enum kbase_memory_type type; struct kbase_vmap_struct *permanent_map; @@ -173,13 +384,10 @@ struct kbase_mem_phy_alloc { unsigned long size; unsigned long nr_pages; struct page **pages; - /* top bit (1<<31) of current_mapping_usage_count - * specifies that this import was pinned on import - * See PINNED_ON_IMPORT - */ u32 current_mapping_usage_count; struct mm_struct *mm; dma_addr_t *dma_addrs; + enum kbase_user_buf_state state; } user_buf; } imported; }; @@ -230,7 +438,7 @@ enum kbase_page_status { #define PGD_VPFN_LEVEL_MASK ((u64)0x3) #define PGD_VPFN_LEVEL_GET_LEVEL(pgd_vpfn_level) (pgd_vpfn_level & PGD_VPFN_LEVEL_MASK) #define PGD_VPFN_LEVEL_GET_VPFN(pgd_vpfn_level) (pgd_vpfn_level & ~PGD_VPFN_LEVEL_MASK) -#define PGD_VPFN_LEVEL_SET(pgd_vpfn, level) \ +#define PGD_VPFN_LEVEL_SET(pgd_vpfn, level) \ ((pgd_vpfn & ~PGD_VPFN_LEVEL_MASK) | (level & PGD_VPFN_LEVEL_MASK)) /** @@ -284,15 +492,6 @@ struct kbase_page_metadata { u8 group_id; }; -/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is - * used to signify that a buffer was pinned when it was imported. Since the - * reference count is limited by the number of atoms that can be submitted at - * once there should be no danger of overflowing into this bit. - * Stealing the top bit also has the benefit that - * current_mapping_usage_count != 0 if and only if the buffer is mapped. - */ -#define PINNED_ON_IMPORT (1<<31) - /** * enum kbase_jit_report_flags - Flags for just-in-time memory allocation * pressure limit functions @@ -302,9 +501,7 @@ struct kbase_page_metadata { * Used to control flow within pressure limit related functions, or to provide * extra debugging information */ -enum kbase_jit_report_flags { - KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) -}; +enum kbase_jit_report_flags { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) }; /** * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying @@ -346,8 +543,7 @@ static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc * * * @alloc: Pointer to physical pages tracking object */ -static inline void -kbase_mem_phy_alloc_kernel_mapped(struct kbase_mem_phy_alloc *alloc) +static inline void kbase_mem_phy_alloc_kernel_mapped(struct kbase_mem_phy_alloc *alloc) { atomic_inc(&alloc->kernel_mappings); } @@ -358,8 +554,7 @@ kbase_mem_phy_alloc_kernel_mapped(struct kbase_mem_phy_alloc *alloc) * * @alloc: Pointer to physical pages tracking object */ -static inline void -kbase_mem_phy_alloc_kernel_unmapped(struct kbase_mem_phy_alloc *alloc) +static inline void kbase_mem_phy_alloc_kernel_unmapped(struct kbase_mem_phy_alloc *alloc) { WARN_ON(atomic_dec_return(&alloc->kernel_mappings) < 0); } @@ -373,12 +568,19 @@ kbase_mem_phy_alloc_kernel_unmapped(struct kbase_mem_phy_alloc *alloc) */ static inline bool kbase_mem_is_imported(enum kbase_memory_type type) { - return (type == KBASE_MEM_TYPE_IMPORTED_UMM) || - (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + return (type == KBASE_MEM_TYPE_IMPORTED_UMM) || (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); } void kbase_mem_kref_free(struct kref *kref); +/** + * kbase_mem_init - Initialize kbase device for memory operation. + * @kbdev: Pointer to the kbase device + * + * This function must be called only when a kbase device is initialized. + * + * Return: 0 on success + */ int kbase_mem_init(struct kbase_device *kbdev); void kbase_mem_halt(struct kbase_device *kbdev); void kbase_mem_term(struct kbase_device *kbdev); @@ -449,204 +651,6 @@ struct kbase_va_region { size_t nr_pages; size_t initial_commit; size_t threshold_pages; - -/* Free region */ -#define KBASE_REG_FREE (1ul << 0) -/* CPU write access */ -#define KBASE_REG_CPU_WR (1ul << 1) -/* GPU write access */ -#define KBASE_REG_GPU_WR (1ul << 2) -/* No eXecute flag */ -#define KBASE_REG_GPU_NX (1ul << 3) -/* Is CPU cached? */ -#define KBASE_REG_CPU_CACHED (1ul << 4) -/* Is GPU cached? - * Some components within the GPU might only be able to access memory that is - * GPU cacheable. Refer to the specific GPU implementation for more details. - */ -#define KBASE_REG_GPU_CACHED (1ul << 5) - -#define KBASE_REG_GROWABLE (1ul << 6) -/* Can grow on pf? */ -#define KBASE_REG_PF_GROW (1ul << 7) - -/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */ -#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8) - -/* inner shareable coherency */ -#define KBASE_REG_SHARE_IN (1ul << 9) -/* inner & outer shareable coherency */ -#define KBASE_REG_SHARE_BOTH (1ul << 10) - -#if MALI_USE_CSF -/* Space for 8 different zones */ -#define KBASE_REG_ZONE_BITS 3 -#else -/* Space for 4 different zones */ -#define KBASE_REG_ZONE_BITS 2 -#endif - -#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << 11) -#define KBASE_REG_ZONE(x) (((x) & ((1 << KBASE_REG_ZONE_BITS) - 1ul)) << 11) -#define KBASE_REG_ZONE_IDX(x) (((x) & KBASE_REG_ZONE_MASK) >> 11) - -#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS) -#error "Too many zones for the number of zone bits defined" -#endif - -/* GPU read access */ -#define KBASE_REG_GPU_RD (1ul << 14) -/* CPU read access */ -#define KBASE_REG_CPU_RD (1ul << 15) - -/* Index of chosen MEMATTR for this region (0..7) */ -#define KBASE_REG_MEMATTR_MASK (7ul << 16) -#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) -#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) - -#define KBASE_REG_PROTECTED (1ul << 19) - -/* Region belongs to a shrinker. - * - * This can either mean that it is part of the JIT/Ephemeral or tiler heap - * shrinker paths. Should be removed only after making sure that there are - * no references remaining to it in these paths, as it may cause the physical - * backing of the region to disappear during use. - */ -#define KBASE_REG_DONT_NEED (1ul << 20) - -/* Imported buffer is padded? */ -#define KBASE_REG_IMPORT_PAD (1ul << 21) - -#if MALI_USE_CSF -/* CSF event memory */ -#define KBASE_REG_CSF_EVENT (1ul << 22) -#else -/* Bit 22 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define KBASE_REG_RESERVED_BIT_22 (1ul << 22) -#endif - -#if !MALI_USE_CSF -/* The top of the initial commit is aligned to extension pages. - * Extent must be a power of 2 - */ -#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) -#else -/* Bit 23 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define KBASE_REG_RESERVED_BIT_23 (1ul << 23) -#endif /* !MALI_USE_CSF */ - -/* Bit 24 is currently unused and is available for use for a new flag */ - -/* Memory has permanent kernel side mapping */ -#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) - -/* GPU VA region has been freed by the userspace, but still remains allocated - * due to the reference held by CPU mappings created on the GPU VA region. - * - * A region with this flag set has had kbase_gpu_munmap() called on it, but can - * still be looked-up in the region tracker as a non-free region. Hence must - * not create or update any more GPU mappings on such regions because they will - * not be unmapped when the region is finally destroyed. - * - * Since such regions are still present in the region tracker, new allocations - * attempted with BASE_MEM_SAME_VA might fail if their address intersects with - * a region with this flag set. - * - * In addition, this flag indicates the gpu_alloc member might no longer valid - * e.g. in infinite cache simulation. - */ -#define KBASE_REG_VA_FREED (1ul << 26) - -/* If set, the heap info address points to a u32 holding the used size in bytes; - * otherwise it points to a u64 holding the lowest address of unused memory. - */ -#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) - -/* Allocation is actively used for JIT memory */ -#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28) - -#if MALI_USE_CSF -/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA - * memory zones, and it determines whether they were created with a fixed - * GPU VA address requested by the user. - */ -#define KBASE_REG_FIXED_ADDRESS (1ul << 29) -#else -#define KBASE_REG_RESERVED_BIT_29 (1ul << 29) -#endif - -#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) - -#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(1) -#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT) - -#if MALI_USE_CSF -/* only used with 32-bit clients */ -/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43). - */ -#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \ - (((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) -#else -/* only used with 32-bit clients */ -/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the - * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44 - * bytes, see mmap64 man page for reference). So we put the default limit to the - * maximum possible on Linux and shrink it down, if required by the GPU, during - * initialization. - */ -#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \ - (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) -/* end 32-bit clients only */ -#endif - -/* The starting address and size of the GPU-executable zone are dynamic - * and depend on the platform and the number of pages requested by the - * user process, with an upper limit of 4 GB. - */ -#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) -#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ - -#if MALI_USE_CSF -#define KBASE_REG_ZONE_MCU_SHARED KBASE_REG_ZONE(3) -#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT) -#define KBASE_REG_ZONE_MCU_SHARED_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - \ - KBASE_REG_ZONE_MCU_SHARED_BASE) - -/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit - * clients, and 2^43 for 32-bit clients. - */ -#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT) -#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT) -#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES - -/* Executable zone supporting FIXED/FIXABLE allocations. - * It is always 4GB in size. - */ - -#define KBASE_REG_ZONE_EXEC_FIXED_VA KBASE_REG_ZONE(4) -#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES - -/* Non-executable zone supporting FIXED/FIXABLE allocations. - * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from - * (2^43) up to (2^44)-1 for 32-bit userspace clients. - */ -#define KBASE_REG_ZONE_FIXED_VA KBASE_REG_ZONE(5) - -/* Again - 32-bit userspace cannot map addresses beyond 2^44, but 64-bit can - and so - * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1. - */ -#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT) -#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT) - -#endif - unsigned long flags; size_t extension; struct kbase_mem_phy_alloc *cpu_alloc; @@ -686,23 +690,6 @@ struct kbase_va_region { atomic_t no_user_free_count; }; -/** - * kbase_is_ctx_reg_zone - determine whether a KBASE_REG_ZONE_<...> is for a - * context or for a device - * @zone_bits: A KBASE_REG_ZONE_<...> to query - * - * Return: True if the zone for @zone_bits is a context zone, False otherwise - */ -static inline bool kbase_is_ctx_reg_zone(unsigned long zone_bits) -{ - WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits); - return (zone_bits == KBASE_REG_ZONE_SAME_VA || -#if MALI_USE_CSF - zone_bits == KBASE_REG_ZONE_EXEC_FIXED_VA || zone_bits == KBASE_REG_ZONE_FIXED_VA || -#endif - zone_bits == KBASE_REG_ZONE_CUSTOM_VA || zone_bits == KBASE_REG_ZONE_EXEC_VA); -} - /* Special marker for failed JIT allocations that still must be marked as * in-use */ @@ -723,7 +710,7 @@ static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg) /* Possibly not all functions that find regions would be using this * helper, so they need to be checked when maintaining this function. */ - return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg)); + return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg)); } /** @@ -743,10 +730,8 @@ static inline bool kbase_is_region_shrinkable(struct kbase_va_region *reg) return (reg->flags & KBASE_REG_DONT_NEED) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC); } -void kbase_remove_va_region(struct kbase_device *kbdev, - struct kbase_va_region *reg); -static inline void kbase_region_refcnt_free(struct kbase_device *kbdev, - struct kbase_va_region *reg) +void kbase_remove_va_region(struct kbase_device *kbdev, struct kbase_va_region *reg); +static inline void kbase_region_refcnt_free(struct kbase_device *kbdev, struct kbase_va_region *reg) { /* If region was mapped then remove va region*/ if (reg->start_pfn) @@ -757,8 +742,8 @@ static inline void kbase_region_refcnt_free(struct kbase_device *kbdev, kfree(reg); } -static inline struct kbase_va_region *kbase_va_region_alloc_get( - struct kbase_context *kctx, struct kbase_va_region *region) +static inline struct kbase_va_region *kbase_va_region_alloc_get(struct kbase_context *kctx, + struct kbase_va_region *region) { WARN_ON(!kbase_refcount_read(®ion->va_refcnt)); WARN_ON(kbase_refcount_read(®ion->va_refcnt) == INT_MAX); @@ -770,8 +755,8 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( return region; } -static inline struct kbase_va_region *kbase_va_region_alloc_put( - struct kbase_context *kctx, struct kbase_va_region *region) +static inline struct kbase_va_region *kbase_va_region_alloc_put(struct kbase_context *kctx, + struct kbase_va_region *region) { WARN_ON(kbase_refcount_read(®ion->va_refcnt) <= 0); WARN_ON(region->flags & KBASE_REG_FREE); @@ -835,8 +820,7 @@ static inline void kbase_va_region_no_user_free_dec(struct kbase_va_region *regi } /* Common functions */ -static inline struct tagged_addr *kbase_get_cpu_phy_pages( - struct kbase_va_region *reg) +static inline struct tagged_addr *kbase_get_cpu_phy_pages(struct kbase_va_region *reg) { KBASE_DEBUG_ASSERT(reg); KBASE_DEBUG_ASSERT(reg->cpu_alloc); @@ -846,8 +830,7 @@ static inline struct tagged_addr *kbase_get_cpu_phy_pages( return reg->cpu_alloc->pages; } -static inline struct tagged_addr *kbase_get_gpu_phy_pages( - struct kbase_va_region *reg) +static inline struct tagged_addr *kbase_get_gpu_phy_pages(struct kbase_va_region *reg) { KBASE_DEBUG_ASSERT(reg); KBASE_DEBUG_ASSERT(reg->cpu_alloc); @@ -871,20 +854,22 @@ static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) return reg->cpu_alloc->nents; } -#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */ +#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD \ + ((size_t)(4 * 1024)) /* size above which vmalloc is used over kmalloc */ -static inline struct kbase_mem_phy_alloc *kbase_alloc_create( - struct kbase_context *kctx, size_t nr_pages, - enum kbase_memory_type type, int group_id) +static inline struct kbase_mem_phy_alloc *kbase_alloc_create(struct kbase_context *kctx, + size_t nr_pages, + enum kbase_memory_type type, + int group_id) { struct kbase_mem_phy_alloc *alloc; size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; size_t per_page_size = sizeof(*alloc->pages); + size_t i; /* Imported pages may have page private data already in use */ if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { - alloc_size += nr_pages * - sizeof(*alloc->imported.user_buf.dma_addrs); + alloc_size += nr_pages * sizeof(*alloc->imported.user_buf.dma_addrs); per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs); } @@ -892,24 +877,24 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create( * Prevent nr_pages*per_page_size + sizeof(*alloc) from * wrapping around. */ - if (nr_pages > ((((size_t) -1) - sizeof(*alloc)) - / per_page_size)) + if (nr_pages > ((((size_t)-1) - sizeof(*alloc)) / per_page_size)) return ERR_PTR(-ENOMEM); /* Allocate based on the size to reduce internal fragmentation of vmem */ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) - alloc = vzalloc(alloc_size); + alloc = vmalloc(alloc_size); else - alloc = kzalloc(alloc_size, GFP_KERNEL); + alloc = kmalloc(alloc_size, GFP_KERNEL); if (!alloc) return ERR_PTR(-ENOMEM); + memset(alloc, 0, sizeof(struct kbase_mem_phy_alloc)); + if (type == KBASE_MEM_TYPE_NATIVE) { - alloc->imported.native.nr_struct_pages = - (alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - kbase_process_page_usage_inc(kctx, - alloc->imported.native.nr_struct_pages); + alloc->imported.native.nr_struct_pages = (alloc_size + (PAGE_SIZE - 1)) >> + PAGE_SHIFT; + kbase_process_page_usage_inc(kctx, alloc->imported.native.nr_struct_pages); } /* Store allocation method */ @@ -921,37 +906,37 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create( atomic_set(&alloc->kernel_mappings, 0); alloc->nents = 0; alloc->pages = (void *)(alloc + 1); + /* fill pages with invalid address value */ + for (i = 0; i < nr_pages; i++) + alloc->pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); INIT_LIST_HEAD(&alloc->mappings); alloc->type = type; alloc->group_id = group_id; if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) - alloc->imported.user_buf.dma_addrs = - (void *) (alloc->pages + nr_pages); + alloc->imported.user_buf.dma_addrs = (void *)(alloc->pages + nr_pages); return alloc; } -static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, - struct kbase_context *kctx, int group_id) +static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, struct kbase_context *kctx, + int group_id) { KBASE_DEBUG_ASSERT(reg); KBASE_DEBUG_ASSERT(!reg->cpu_alloc); KBASE_DEBUG_ASSERT(!reg->gpu_alloc); KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); - reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, - KBASE_MEM_TYPE_NATIVE, group_id); + reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, KBASE_MEM_TYPE_NATIVE, group_id); if (IS_ERR(reg->cpu_alloc)) return PTR_ERR(reg->cpu_alloc); else if (!reg->cpu_alloc) return -ENOMEM; reg->cpu_alloc->imported.native.kctx = kctx; - if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) - && (reg->flags & KBASE_REG_CPU_CACHED)) { - reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, - KBASE_MEM_TYPE_NATIVE, group_id); + if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) && (reg->flags & KBASE_REG_CPU_CACHED)) { + reg->gpu_alloc = + kbase_alloc_create(kctx, reg->nr_pages, KBASE_MEM_TYPE_NATIVE, group_id); if (IS_ERR_OR_NULL(reg->gpu_alloc)) { kbase_mem_phy_alloc_put(reg->cpu_alloc); return -ENOMEM; @@ -979,17 +964,17 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, /* * Max size for kctx memory pool (in pages) */ -#define KBASE_MEM_POOL_MAX_SIZE_KCTX (SZ_64M >> PAGE_SHIFT) +#define KBASE_MEM_POOL_MAX_SIZE_KCTX (SZ_64M >> PAGE_SHIFT) /* * The order required for a 2MB page allocation (2^order * 4KB = 2MB) */ -#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER 9 +#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER 9 /* * The order required for a 4KB page allocation */ -#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER 0 +#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER 0 /** * kbase_mem_pool_config_set_max_size - Set maximum number of free pages in @@ -999,8 +984,8 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, * @max_size: Maximum number of free pages that a pool created from * @config can hold */ -static inline void kbase_mem_pool_config_set_max_size( - struct kbase_mem_pool_config *const config, size_t const max_size) +static inline void kbase_mem_pool_config_set_max_size(struct kbase_mem_pool_config *const config, + size_t const max_size) { WRITE_ONCE(config->max_size, max_size); } @@ -1014,8 +999,8 @@ static inline void kbase_mem_pool_config_set_max_size( * Return: Maximum number of free pages that a pool created from @config * can hold */ -static inline size_t kbase_mem_pool_config_get_max_size( - const struct kbase_mem_pool_config *const config) +static inline size_t +kbase_mem_pool_config_get_max_size(const struct kbase_mem_pool_config *const config) { return READ_ONCE(config->max_size); } @@ -1105,8 +1090,7 @@ struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool); * Note : This function should not be used if the pool lock is held. Use * kbase_mem_pool_free_locked() instead. */ -void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, - bool dirty); +void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, bool dirty); /** * kbase_mem_pool_free_locked - Free a page to memory pool @@ -1119,8 +1103,7 @@ void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, * * Note : Caller must hold the pool lock. */ -void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, - bool dirty); +void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, bool dirty); /** * kbase_mem_pool_alloc_pages - Allocate pages from memory pool @@ -1188,8 +1171,8 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, * * Note : Caller must hold the pool lock. */ -int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, - size_t nr_4k_pages, struct tagged_addr *pages); +int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, size_t nr_4k_pages, + struct tagged_addr *pages); /** * kbase_mem_pool_free_pages - Free pages to memory pool @@ -1204,7 +1187,7 @@ int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, * Like kbase_mem_pool_free() but optimized for freeing many pages. */ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - struct tagged_addr *pages, bool dirty, bool reclaimed); + struct tagged_addr *pages, bool dirty, bool reclaimed); /** * kbase_mem_pool_free_pages_locked - Free pages to memory pool @@ -1218,9 +1201,8 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, * * Like kbase_mem_pool_free() but optimized for freeing many pages. */ -void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, - size_t nr_pages, struct tagged_addr *pages, bool dirty, - bool reclaimed); +void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, size_t nr_pages, + struct tagged_addr *pages, bool dirty, bool reclaimed); /** * kbase_mem_pool_size - Get number of free pages in memory pool @@ -1246,7 +1228,6 @@ static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) return pool->max_size; } - /** * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool * @pool: Memory pool to inspect @@ -1296,13 +1277,14 @@ void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool); /** * kbase_mem_alloc_page - Allocate a new page for a device * @pool: Memory pool to allocate a page from + * @alloc_from_kthread: Flag indicating that the current thread is a kernel thread. * * Most uses should use kbase_mem_pool_alloc to allocate a page. However that * function can fail in the event the pool is empty. * * Return: A new page or NULL if no memory */ -struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); +struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool, const bool alloc_from_kthread); /** * kbase_mem_pool_free_page - Free a page from a memory pool. @@ -1314,96 +1296,13 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); */ void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p); -/** - * kbase_region_tracker_init - Initialize the region tracker data structure - * @kctx: kbase context - * - * Return: 0 if success, negative error code otherwise. - */ -int kbase_region_tracker_init(struct kbase_context *kctx); - -/** - * kbase_region_tracker_init_jit - Initialize the just-in-time memory - * allocation region - * @kctx: Kbase context. - * @jit_va_pages: Size of the JIT region in pages. - * @max_allocations: Maximum number of allocations allowed for the JIT region. - * Valid range is 0..%BASE_JIT_ALLOC_COUNT. - * @trim_level: Trim level for the JIT region. - * Valid range is 0..%BASE_JIT_MAX_TRIM_LEVEL. - * @group_id: The physical group ID from which to allocate JIT memory. - * Valid range is 0..(%MEMORY_GROUP_MANAGER_NR_GROUPS-1). - * @phys_pages_limit: Maximum number of physical pages to use to back the JIT - * region. Must not exceed @jit_va_pages. - * - * Return: 0 if success, negative error code otherwise. - */ -int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, - int max_allocations, int trim_level, int group_id, - u64 phys_pages_limit); - -/** - * kbase_region_tracker_init_exec - Initialize the GPU-executable memory region - * @kctx: kbase context - * @exec_va_pages: Size of the JIT region in pages. - * It must not be greater than 4 GB. - * - * Return: 0 if success, negative error code otherwise. - */ -int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages); - -/** - * kbase_region_tracker_term - Terminate the JIT region - * @kctx: kbase context - */ -void kbase_region_tracker_term(struct kbase_context *kctx); - -/** - * kbase_region_tracker_term_rbtree - Free memory for a region tracker - * - * @rbtree: Region tracker tree root - * - * This will free all the regions within the region tracker - */ -void kbase_region_tracker_term_rbtree(struct rb_root *rbtree); - -struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( - struct kbase_context *kctx, u64 gpu_addr); -struct kbase_va_region *kbase_find_region_enclosing_address( - struct rb_root *rbtree, u64 gpu_addr); - -/** - * kbase_region_tracker_find_region_base_address - Check that a pointer is - * actually a valid region. - * @kctx: kbase context containing the region - * @gpu_addr: pointer to check - * - * Must be called with context lock held. - * - * Return: pointer to the valid region on success, NULL otherwise - */ -struct kbase_va_region *kbase_region_tracker_find_region_base_address( - struct kbase_context *kctx, u64 gpu_addr); -struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree, - u64 gpu_addr); - -struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, - u64 start_pfn, size_t nr_pages, int zone); -void kbase_free_alloced_region(struct kbase_va_region *reg); -int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, - u64 addr, size_t nr_pages, size_t align); -int kbase_add_va_region_rbtree(struct kbase_device *kbdev, - struct kbase_va_region *reg, u64 addr, size_t nr_pages, - size_t align); - bool kbase_check_alloc_flags(unsigned long flags); bool kbase_check_import_flags(unsigned long flags); static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va_pages) { if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { - dev_dbg( - kbdev->dev, + dev_dbg(kbdev->dev, "Import attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", (unsigned long long)va_pages); return false; @@ -1415,8 +1314,7 @@ static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va static inline bool kbase_alias_size_is_valid(struct kbase_device *kbdev, u64 va_pages) { if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { - dev_dbg( - kbdev->dev, + dev_dbg(kbdev->dev, "Alias attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", (unsigned long long)va_pages); return false; @@ -1441,8 +1339,8 @@ static inline bool kbase_alias_size_is_valid(struct kbase_device *kbdev, u64 va_ * * Return: 0 if sizes are valid for these flags, negative error code otherwise */ -int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, - u64 va_pages, u64 commit_pages, u64 extension); +int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, u64 va_pages, + u64 commit_pages, u64 extension); /** * kbase_update_region_flags - Convert user space flags to kernel region flags @@ -1456,8 +1354,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, * * Return: 0 if successful, -EINVAL if the flags are not supported */ -int kbase_update_region_flags(struct kbase_context *kctx, - struct kbase_va_region *reg, unsigned long flags); +int kbase_update_region_flags(struct kbase_context *kctx, struct kbase_va_region *reg, + unsigned long flags); /** * kbase_gpu_vm_lock() - Acquire the per-context region list lock @@ -1526,9 +1424,8 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size * * Return: 0 on success, error code otherwise. */ -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, - u64 addr, size_t nr_pages, size_t align, - enum kbase_caller_mmu_sync_info mmu_sync_info); +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, + size_t nr_pages, size_t align, enum kbase_caller_mmu_sync_info mmu_sync_info); /** * kbase_gpu_munmap - Remove the region from the GPU and unregister it. @@ -1554,8 +1451,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); * - It must hold kbase_device->mmu_hw_mutex * - It must hold the hwaccess_lock */ -void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - int as_nr); +void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr); /** * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. @@ -1618,8 +1514,8 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); */ int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset); void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa, - struct tagged_addr gpu_pa, off_t offset, size_t size, - enum kbase_sync_type sync_fn); + struct tagged_addr gpu_pa, off_t offset, size_t size, + enum kbase_sync_type sync_fn); /* OS specific functions */ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); @@ -1688,9 +1584,8 @@ static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int * * Return: 0 if offset was obtained successfully. Error code otherwise. */ -int kbasep_find_enclosing_cpu_mapping_offset( - struct kbase_context *kctx, - unsigned long uaddr, size_t size, u64 *offset); +int kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context *kctx, unsigned long uaddr, + size_t size, u64 *offset); /** * kbasep_find_enclosing_gpu_mapping_start_and_offset() - Find the address of @@ -1716,9 +1611,8 @@ int kbasep_find_enclosing_cpu_mapping_offset( * * Return: 0 on success, error code otherwise. */ -int kbasep_find_enclosing_gpu_mapping_start_and_offset( - struct kbase_context *kctx, - u64 gpu_addr, size_t size, u64 *start, u64 *offset); +int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx, u64 gpu_addr, + size_t size, u64 *start, u64 *offset); /** * kbase_alloc_phy_pages_helper - Allocates physical pages. @@ -1741,8 +1635,7 @@ int kbasep_find_enclosing_gpu_mapping_start_and_offset( * * Return: 0 if all pages have been successfully allocated. Error code otherwise */ -int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, - size_t nr_pages_requested); +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested); /** * kbase_alloc_phy_pages_helper_locked - Allocates physical pages. @@ -1793,10 +1686,10 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, * * Return: Pointer to array of allocated pages. NULL on failure. */ -struct tagged_addr *kbase_alloc_phy_pages_helper_locked( - struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, - size_t nr_pages_requested, - struct kbase_sub_alloc **prealloc_sa); +struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, + struct kbase_mem_pool *pool, + size_t nr_pages_requested, + struct kbase_sub_alloc **prealloc_sa); /** * kbase_free_phy_pages_helper() - Free physical pages. @@ -1827,8 +1720,8 @@ int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pag * kbase_free_phy_pages_helper() should be used instead. */ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, - struct kbase_mem_pool *pool, struct tagged_addr *pages, - size_t nr_pages_to_free); + struct kbase_mem_pool *pool, struct tagged_addr *pages, + size_t nr_pages_to_free); static inline void kbase_set_dma_addr_as_priv(struct page *p, dma_addr_t dma_addr) { @@ -1866,7 +1759,7 @@ static inline struct kbase_page_metadata *kbase_page_private(struct page *p) static inline dma_addr_t kbase_dma_addr(struct page *p) { - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) return kbase_page_private(p)->dma_addr; return kbase_dma_addr_as_priv(p); @@ -1876,8 +1769,9 @@ static inline dma_addr_t kbase_dma_addr_from_tagged(struct tagged_addr tagged_pa { phys_addr_t pa = as_phys_addr_t(tagged_pa); struct page *page = pfn_to_page(PFN_DOWN(pa)); - dma_addr_t dma_addr = - is_huge(tagged_pa) ? kbase_dma_addr_as_priv(page) : kbase_dma_addr(page); + dma_addr_t dma_addr = (is_huge(tagged_pa) || is_partial(tagged_pa)) ? + kbase_dma_addr_as_priv(page) : + kbase_dma_addr(page); return dma_addr; } @@ -1899,8 +1793,8 @@ void kbase_flush_mmu_wqs(struct kbase_device *kbdev); * @dir: DMA data direction */ -void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir); +void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, size_t size, + enum dma_data_direction dir); /** * kbase_sync_single_for_cpu - update physical memory and give CPU ownership @@ -1910,8 +1804,8 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, * @dir: DMA data direction */ -void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir); +void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, + enum dma_data_direction dir); #if IS_ENABLED(CONFIG_DEBUG_FS) /** @@ -1925,6 +1819,8 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx); * kbase_jit_init - Initialize the JIT memory pool management * @kctx: kbase context * + * This function must be called only when a kbase context is instantiated. + * * Return: zero on success or negative error number on failure. */ int kbase_jit_init(struct kbase_context *kctx); @@ -1938,8 +1834,8 @@ int kbase_jit_init(struct kbase_context *kctx); * Return: JIT allocation on success or NULL on failure. */ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, - const struct base_jit_alloc_info *info, - bool ignore_pressure_limit); + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit); /** * kbase_jit_free - Free a JIT allocation @@ -1983,7 +1879,7 @@ void kbase_jit_term(struct kbase_context *kctx); * @flags: combination of values from enum kbase_jit_report_flags */ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, - struct kbase_va_region *reg, unsigned int flags); + struct kbase_va_region *reg, unsigned int flags); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /** @@ -2007,15 +1903,13 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, * inlining it. */ #if MALI_JIT_PRESSURE_LIMIT_BASE -#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ - do { \ - if (trace_mali_jit_report_gpu_mem_enabled()) \ - kbase_trace_jit_report_gpu_mem_trace_enabled( \ - (kctx), (reg), (flags)); \ +#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + do { \ + if (trace_mali_jit_report_gpu_mem_enabled()) \ + kbase_trace_jit_report_gpu_mem_trace_enabled((kctx), (reg), (flags)); \ } while (0) #else -#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ - CSTD_NOP(kctx, reg, flags) +#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) CSTD_NOP(kctx, reg, flags) #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ #if MALI_JIT_PRESSURE_LIMIT_BASE @@ -2035,9 +1929,8 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, * Precondition: * - new_used_pages <= reg->nr_pages */ -void kbase_jit_report_update_pressure(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 new_used_pages, - unsigned int flags); +void kbase_jit_report_update_pressure(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 new_used_pages, unsigned int flags); /** * kbase_jit_trim_necessary_pages() - calculate and trim the least pages @@ -2065,16 +1958,14 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx, * Any pages freed will go into the pool and be allocated from there in * kbase_mem_alloc(). */ -void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, - size_t needed_pages); +void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, size_t needed_pages); /* * Same as kbase_jit_request_phys_increase(), except that Caller is supposed * to take jit_evict_lock also on @kctx before calling this function. */ -static inline void -kbase_jit_request_phys_increase_locked(struct kbase_context *kctx, - size_t needed_pages) +static inline void kbase_jit_request_phys_increase_locked(struct kbase_context *kctx, + size_t needed_pages) { #if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); @@ -2084,8 +1975,7 @@ kbase_jit_request_phys_increase_locked(struct kbase_context *kctx, kctx->jit_phys_pages_to_be_allocated += needed_pages; - kbase_jit_trim_necessary_pages(kctx, - kctx->jit_phys_pages_to_be_allocated); + kbase_jit_trim_necessary_pages(kctx, kctx->jit_phys_pages_to_be_allocated); } /** @@ -2113,8 +2003,7 @@ kbase_jit_request_phys_increase_locked(struct kbase_context *kctx, * * Caller is supposed to take reg_lock on @kctx before calling this function. */ -static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx, - size_t needed_pages) +static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx, size_t needed_pages) { #if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); @@ -2149,8 +2038,7 @@ static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx, * * Caller is supposed to take reg_lock on @kctx before calling this function. */ -static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx, - size_t needed_pages) +static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx, size_t needed_pages) { lockdep_assert_held(&kctx->reg_lock); @@ -2181,6 +2069,10 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx); * On successful mapping, the VA region and the gpu_alloc refcounts will be * increased, making it safe to use and store both values directly. * + * For imported user buffers, this function will acquire the necessary + * resources if they've not already been acquired before, in order to + * create a valid GPU mapping. + * * Return: Zero on success, or negative error code. */ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, @@ -2195,6 +2087,10 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi * be decreased. If the refcount reaches zero, both @reg and the corresponding * allocation may be freed, so using them after returning from this function * requires the caller to explicitly check their state. + * + * For imported user buffers, in the case where the refcount reaches zero, + * the function shall release all the resources acquired by the user buffer, + * including DMA mappings and physical pages. */ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg); @@ -2209,18 +2105,217 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r void kbase_unpin_user_buf_page(struct page *page); /** - * kbase_jd_user_buf_pin_pages - Pin the pages of a user buffer. + * kbase_user_buf_pin_pages - Pin the pages of a user buffer. * @kctx: kbase context. * @reg: The region associated with the imported user buffer. * * To successfully pin the pages for a user buffer the current mm_struct must - * be the same as the mm_struct of the user buffer. After successfully pinning - * the pages further calls to this function succeed without doing work. + * be the same as the mm_struct of the user buffer. Further calls to this + * function fail if pages have already been pinned successfully. * * Return: zero on success or negative number on failure. */ -int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, - struct kbase_va_region *reg); +int kbase_user_buf_pin_pages(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_user_buf_unpin_pages - Release the pinned pages of a user buffer. + * @alloc: The allocation for the imported user buffer. + * + * The caller must have ensured that previous stages of the termination of + * the physical allocation have already been completed, which implies that + * GPU mappings have been destroyed and DMA addresses have been unmapped. + * + * This function does not affect CPU mappings: if there are any, they should + * be unmapped by the caller prior to calling this function. + */ +void kbase_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_user_buf_dma_map_pages - DMA map pages of a user buffer. + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * Acquire DMA addresses for the pages of the user buffer. Automatic CPU cache + * synchronization will be disabled because, in the general case, DMA mappings + * might be larger than the region to import. Further calls to this function + * fail if DMA addresses have already been obtained successfully. + * + * The caller must have ensured that physical pages have already been pinned + * prior to calling this function. + * + * Return: zero on success or negative number on failure. + */ +int kbase_user_buf_dma_map_pages(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_user_buf_dma_unmap_pages - DMA unmap pages of a user buffer. + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * The caller must have ensured that GPU mappings have been destroyed prior to + * calling this function. + */ +void kbase_user_buf_dma_unmap_pages(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_user_buf_empty_init - Initialize a user buffer as "empty". + * @reg: The region associated with the imported user buffer. + * + * This function initializes a user buffer as "empty". + */ +void kbase_user_buf_empty_init(struct kbase_va_region *reg); + +/** + * kbase_user_buf_from_empty_to_pinned - Transition user buffer from "empty" to "pinned". + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * This function transitions a user buffer from the "empty" state, in which no resources are + * attached to it, to the "pinned" state, in which physical pages have been acquired and pinned. + * + * Return: zero on success or negative number on failure. + */ +int kbase_user_buf_from_empty_to_pinned(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_user_buf_from_empty_to_dma_mapped - Transition user buffer from "empty" to "DMA mapped". + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * This function transitions a user buffer from the "empty" state, in which no resources are + * attached to it, to the "DMA mapped" state, in which physical pages have been acquired, pinned + * and DMA mappings for cache synchronization have been obtained. + * + * Notice that the "empty" state is preserved in case of failure. + * + * Return: zero on success or negative number on failure. + */ +int kbase_user_buf_from_empty_to_dma_mapped(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_user_buf_from_empty_to_gpu_mapped - Transition user buffer from "empty" to "GPU mapped". + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * This function transitions a user buffer from the "empty" state, in which no resources are + * attached to it, to the "GPU mapped" state, in which DMA mappings for cache synchronization + * have been obtained and GPU mappings have been created. + * + * However, the function does not update the counter of GPU mappings in usage, because different + * policies may be applied in different points of the driver. + * + * Notice that the "empty" state is preserved in case of failure. + * + * Return: zero on success or negative number on failure. + */ +int kbase_user_buf_from_empty_to_gpu_mapped(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_user_buf_from_pinned_to_empty - Transition user buffer from "pinned" to "empty". + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * This function transitions a user buffer from the "pinned" state, in which physical pages + * have been acquired and pinned but no mappings are present, to the "empty" state, in which + * physical pages have been unpinned. + */ +void kbase_user_buf_from_pinned_to_empty(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_user_buf_from_pinned_to_gpu_mapped - Transition user buffer from "pinned" to "GPU mapped". + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * This function transitions a user buffer from the "pinned" state, in which physical pages + * have been acquired and pinned but no mappings are present, to the "GPU mapped" state, in which + * DMA mappings for cache synchronization have been obtained and GPU mappings have been created. + * + * However, the function does not update the counter of GPU mappings in use, because different + * policies may be applied in different points of the driver. + * + * Notice that the "pinned" state is preserved in case of failure. + * + * Return: zero on success or negative number on failure. + */ +int kbase_user_buf_from_pinned_to_gpu_mapped(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_user_buf_from_dma_mapped_to_pinned - Transition user buffer from "DMA mapped" to "pinned". + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * This function transitions a user buffer from the "DMA mapped" state, in which physical pages + * have been acquired and pinned and DMA mappings have been obtained, to the "pinned" state, + * in which DMA mappings have been released but physical pages are still pinned. + */ +void kbase_user_buf_from_dma_mapped_to_pinned(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_user_buf_from_dma_mapped_to_empty - Transition user buffer from "DMA mapped" to "empty". + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * This function transitions a user buffer from the "DMA mapped" state, in which physical pages + * have been acquired and pinned and DMA mappings have been obtained, to the "empty" state, + * in which DMA mappings have been released and physical pages have been unpinned. + */ +void kbase_user_buf_from_dma_mapped_to_empty(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_user_buf_from_dma_mapped_to_gpu_mapped - Transition user buffer from "DMA mapped" to "GPU mapped". + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * This function transitions a user buffer from the "DMA mapped" state, in which physical pages + * have been acquired and pinned and DMA mappings have been obtained, to the "GPU mapped" state, + * in which GPU mappings have been created. + * + * However, the function does not update the counter of GPU mappings in usage, because different + * policies may be applied in different points of the driver. + * + * Notice that the "DMA mapped" state is preserved in case of failure. + * + * Return: zero on success or negative number on failure. + */ +int kbase_user_buf_from_dma_mapped_to_gpu_mapped(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_user_buf_from_gpu_mapped_to_pinned - Transition user buffer from "GPU mapped" to "pinned". + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * This function transitions a user buffer from the "GPU mapped" state, in which physical pages + * have been acquired and pinned, DMA mappings have been obtained, and GPU mappings have been + * created, to the "pinned" state, in which all mappings have been torn down but physical pages + * are still pinned. + * + * However, the function does not update the counter of GPU mappings in usage, because different + * policies may be applied in different points of the driver. + */ +void kbase_user_buf_from_gpu_mapped_to_pinned(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_user_buf_from_gpu_mapped_to_empty - Transition user buffer from "GPU mapped" to "empty". + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * This function transitions a user buffer from the "GPU mapped" state, in which physical pages + * have been acquired and pinned, DMA mappings have been obtained, and GPU mappings have been + * created, to the "empty" state, in which all mappings have been torn down and physical pages + * have been unpinned. + * + * However, the function does not update the counter of GPU mappings in usage, because different + * policies may be applied in different points of the driver. + */ +void kbase_user_buf_from_gpu_mapped_to_empty(struct kbase_context *kctx, + struct kbase_va_region *reg); /** * kbase_sticky_resource_init - Initialize sticky resource management. @@ -2238,8 +2333,8 @@ int kbase_sticky_resource_init(struct kbase_context *kctx); * Return: The metadata object which represents the binding between the * external resource and the kbase context on success or NULL on failure. */ -struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( - struct kbase_context *kctx, u64 gpu_addr); +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_context *kctx, + u64 gpu_addr); /** * kbase_sticky_resource_release - Release a reference on a sticky resource. @@ -2253,8 +2348,8 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( * * Return: True if the release found the metadata and the reference was dropped. */ -bool kbase_sticky_resource_release(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); +bool kbase_sticky_resource_release(struct kbase_context *kctx, struct kbase_ctx_ext_res_meta *meta, + u64 gpu_addr); /** * kbase_sticky_resource_release_force - Release a sticky resource. @@ -2270,7 +2365,7 @@ bool kbase_sticky_resource_release(struct kbase_context *kctx, * released. */ bool kbase_sticky_resource_release_force(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); /** * kbase_sticky_resource_term - Terminate sticky resource management. @@ -2313,7 +2408,7 @@ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc); * have KBASE_REG_CSF_EVENT flag set for it. */ static inline void kbase_link_event_mem_page(struct kbase_context *kctx, - struct kbase_va_region *reg) + struct kbase_va_region *reg) { lockdep_assert_held(&kctx->reg_lock); @@ -2333,7 +2428,7 @@ static inline void kbase_link_event_mem_page(struct kbase_context *kctx, * have KBASE_REG_CSF_EVENT flag set for it. */ static inline void kbase_unlink_event_mem_page(struct kbase_context *kctx, - struct kbase_va_region *reg) + struct kbase_va_region *reg) { lockdep_assert_held(&kctx->reg_lock); @@ -2369,8 +2464,7 @@ void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev); * * Return: 0 on success, or a negative error code. */ -int kbase_mem_umm_map(struct kbase_context *kctx, - struct kbase_va_region *reg); +int kbase_mem_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg); /** * kbase_mem_umm_unmap - Unmap dma-buf @@ -2390,8 +2484,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx, * release it's mapping reference, and if the refcount reaches 0, also be * unmapped, regardless of the value of @reg. */ -void kbase_mem_umm_unmap(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); +void kbase_mem_umm_unmap(struct kbase_context *kctx, struct kbase_va_region *reg, + struct kbase_mem_phy_alloc *alloc); /** * kbase_mem_do_sync_imported - Sync caches for imported memory @@ -2405,8 +2499,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, * * Return: 0 on success, or a negative error code. */ -int kbase_mem_do_sync_imported(struct kbase_context *kctx, - struct kbase_va_region *reg, enum kbase_sync_type sync_fn); +int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg, + enum kbase_sync_type sync_fn); /** * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to @@ -2429,81 +2523,9 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, * * Return: 0 on success, or a negative error code. */ -int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, - void *src_page, size_t *to_copy, unsigned int nr_pages, - unsigned int *target_page_nr, size_t offset); - -/** - * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone - * @zone: zone to query - * - * Return: The end of the zone corresponding to @zone - */ -static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone) -{ - return zone->base_pfn + zone->va_size_pages; -} - -/** - * kbase_ctx_reg_zone_init - initialize a zone in @kctx - * @kctx: Pointer to kbase context - * @zone_bits: A KBASE_REG_ZONE_<...> to initialize - * @base_pfn: Page Frame Number in GPU virtual address space for the start of - * the Zone - * @va_size_pages: Size of the Zone in pages - */ -static inline void kbase_ctx_reg_zone_init(struct kbase_context *kctx, - unsigned long zone_bits, - u64 base_pfn, u64 va_size_pages) -{ - struct kbase_reg_zone *zone; - - lockdep_assert_held(&kctx->reg_lock); - WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); - - zone = &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; - *zone = (struct kbase_reg_zone){ - .base_pfn = base_pfn, .va_size_pages = va_size_pages, - }; -} - -/** - * kbase_ctx_reg_zone_get_nolock - get a zone from @kctx where the caller does - * not have @kctx 's region lock - * @kctx: Pointer to kbase context - * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve - * - * This should only be used in performance-critical paths where the code is - * resilient to a race with the zone changing. - * - * Return: The zone corresponding to @zone_bits - */ -static inline struct kbase_reg_zone * -kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx, - unsigned long zone_bits) -{ - WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); - - return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; -} - -/** - * kbase_ctx_reg_zone_get - get a zone from @kctx - * @kctx: Pointer to kbase context - * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve - * - * The get is not refcounted - there is no corresponding 'put' operation - * - * Return: The zone corresponding to @zone_bits - */ -static inline struct kbase_reg_zone * -kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) -{ - lockdep_assert_held(&kctx->reg_lock); - WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); - - return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; -} +int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, void *src_page, size_t *to_copy, + unsigned int nr_pages, unsigned int *target_page_nr, + size_t offset); /** * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed @@ -2567,4 +2589,4 @@ static inline base_mem_alloc_flags kbase_mem_group_id_set(int id) { return BASE_MEM_GROUP_ID_SET(id); } -#endif /* _KBASE_MEM_H_ */ +#endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c index f1251a4ed575..0c58205807cb 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c @@ -48,7 +48,7 @@ #include #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ - (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) + (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) /* Enable workaround for ion for kernels prior to v5.0.0 and from v5.3.0 * onwards. * @@ -92,8 +92,7 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct v static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, kbase_vmap_flag vmap_flags); -static void kbase_vunmap_phy_pages(struct kbase_context *kctx, - struct kbase_vmap_struct *map); +static void kbase_vunmap_phy_pages(struct kbase_context *kctx, struct kbase_vmap_struct *map); static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); @@ -118,8 +117,7 @@ static bool is_process_exiting(struct vm_area_struct *vma) * one of the event memory pages. The enclosing region, if found, shouldn't * have been marked as free. */ -static struct kbase_va_region *kbase_find_event_mem_region( - struct kbase_context *kctx, u64 gpu_addr) +static struct kbase_va_region *kbase_find_event_mem_region(struct kbase_context *kctx, u64 gpu_addr) { #if MALI_USE_CSF u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; @@ -128,8 +126,7 @@ static struct kbase_va_region *kbase_find_event_mem_region( lockdep_assert_held(&kctx->reg_lock); list_for_each_entry(reg, &kctx->csf.event_pages_head, link) { - if ((reg->start_pfn <= gpu_pfn) && - (gpu_pfn < (reg->start_pfn + reg->nr_pages))) { + if ((reg->start_pfn <= gpu_pfn) && (gpu_pfn < (reg->start_pfn + reg->nr_pages))) { if (WARN_ON(reg->flags & KBASE_REG_FREE)) return NULL; @@ -139,6 +136,9 @@ static struct kbase_va_region *kbase_find_event_mem_region( return reg; } } +#else + CSTD_UNUSED(kctx); + CSTD_UNUSED(gpu_addr); #endif return NULL; @@ -179,8 +179,8 @@ static struct kbase_va_region *kbase_find_event_mem_region( * On success, the region will also be forced into a certain kind: * - It will no longer be growable */ -static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, - struct kbase_va_region *reg, size_t vsize, size_t size) +static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, struct kbase_va_region *reg, + size_t vsize, size_t size) { size_t size_bytes = (size << PAGE_SHIFT); struct kbase_vmap_struct *kern_mapping; @@ -191,7 +191,7 @@ static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, * Only support KBASE_MEM_TYPE_NATIVE allocations */ if (vsize != size || reg->cpu_alloc->permanent_map != NULL || - reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) return -EINVAL; kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL); @@ -214,8 +214,7 @@ vmap_fail: return err; } -void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc) +void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc) { WARN_ON(!alloc->permanent_map); kbase_vunmap_phy_pages(kctx, alloc->permanent_map); @@ -224,9 +223,8 @@ void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, alloc->permanent_map = NULL; } -void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, - u64 gpu_addr, - struct kbase_vmap_struct **out_kern_mapping) +void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr, + struct kbase_vmap_struct **out_kern_mapping) { struct kbase_va_region *reg; void *kern_mem_ptr = NULL; @@ -242,8 +240,7 @@ void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, reg = kbase_find_event_mem_region(kctx, gpu_addr); if (!reg) { - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, gpu_addr); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); } if (kbase_is_region_invalid_or_free(reg)) @@ -268,8 +265,7 @@ out_unlock: return kern_mem_ptr; } -void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, - struct kbase_vmap_struct *kern_mapping) +void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, struct kbase_vmap_struct *kern_mapping) { WARN_ON(!kctx); WARN_ON(!kern_mapping); @@ -289,9 +285,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages u64 extension, u64 *flags, u64 *gpu_va, enum kbase_caller_mmu_sync_info mmu_sync_info) { - int zone; struct kbase_va_region *reg; - struct rb_root *rbtree; + enum kbase_memory_zone zone; struct device *dev; KBASE_DEBUG_ASSERT(kctx); @@ -299,8 +294,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages KBASE_DEBUG_ASSERT(gpu_va); dev = kctx->kbdev->dev; - dev_dbg(dev, - "Allocating %lld va_pages, %lld commit_pages, %lld extension, 0x%llX flags\n", + dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extension, 0x%llX flags\n", va_pages, commit_pages, extension, *flags); #if MALI_USE_CSF @@ -311,15 +305,11 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages *gpu_va = 0; /* return 0 on failure */ #endif else - dev_dbg(dev, - "Keeping requested GPU VA of 0x%llx\n", - (unsigned long long)*gpu_va); + dev_dbg(dev, "Keeping requested GPU VA of 0x%llx\n", (unsigned long long)*gpu_va); if (!kbase_check_alloc_flags(*flags)) { - dev_warn(dev, - "%s called with bad flags (%llx)", - __func__, - (unsigned long long)*flags); + dev_warn(dev, "%s called with bad flags (%llx)", __func__, + (unsigned long long)*flags); goto bad_flags; } @@ -328,30 +318,27 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages /* Mask coherency flags if infinite cache is enabled to prevent * the skipping of syncs from BASE side. */ - *flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED | - BASE_MEM_COHERENT_SYSTEM); + *flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_COHERENT_SYSTEM); } #endif if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && - (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; } if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && - !kbase_device_is_cpu_coherent(kctx->kbdev)) { - dev_warn(dev, "%s call required coherent mem when unavailable", - __func__); + !kbase_device_is_cpu_coherent(kctx->kbdev)) { + dev_warn(dev, "%s call required coherent mem when unavailable", __func__); goto bad_flags; } if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && - !kbase_device_is_cpu_coherent(kctx->kbdev)) { + !kbase_device_is_cpu_coherent(kctx->kbdev)) { /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ *flags &= ~BASE_MEM_COHERENT_SYSTEM; } - if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, - extension)) + if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extension)) goto bad_sizes; #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED @@ -361,31 +348,25 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages #endif /* find out which VA zone to use */ - if (*flags & BASE_MEM_SAME_VA) { - rbtree = &kctx->reg_rbtree_same; - zone = KBASE_REG_ZONE_SAME_VA; - } + if (*flags & BASE_MEM_SAME_VA) + zone = SAME_VA_ZONE; #if MALI_USE_CSF /* fixed va_zone always exists */ else if (*flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)) { if (*flags & BASE_MEM_PROT_GPU_EX) { - rbtree = &kctx->reg_rbtree_exec_fixed; - zone = KBASE_REG_ZONE_EXEC_FIXED_VA; + zone = EXEC_FIXED_VA_ZONE; } else { - rbtree = &kctx->reg_rbtree_fixed; - zone = KBASE_REG_ZONE_FIXED_VA; + zone = FIXED_VA_ZONE; } } #endif else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { - rbtree = &kctx->reg_rbtree_exec; - zone = KBASE_REG_ZONE_EXEC_VA; + zone = EXEC_VA_ZONE; } else { - rbtree = &kctx->reg_rbtree_custom; - zone = KBASE_REG_ZONE_CUSTOM_VA; + zone = CUSTOM_VA_ZONE; } - reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone); + reg = kbase_ctx_alloc_free_region(kctx, zone, PFN_DOWN(*gpu_va), va_pages); if (!reg) { dev_err(dev, "Failed to allocate free region"); @@ -395,8 +376,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages if (kbase_update_region_flags(kctx, reg, *flags) != 0) goto invalid_flags; - if (kbase_reg_prepare_native(reg, kctx, - kbase_mem_group_id_get(*flags)) != 0) { + if (kbase_reg_prepare_native(reg, kctx, kbase_mem_group_id_get(*flags)) != 0) { dev_err(dev, "Failed to prepare region"); goto prepare_failed; } @@ -411,11 +391,10 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages *flags &= ~BASE_MEM_CACHED_CPU; if (*flags & BASE_MEM_GROW_ON_GPF) { - unsigned int const ir_threshold = atomic_read( - &kctx->kbdev->memdev.ir_threshold); + unsigned int const ir_threshold = atomic_read(&kctx->kbdev->memdev.ir_threshold); - reg->threshold_pages = ((va_pages * ir_threshold) + - (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS; + reg->threshold_pages = + ((va_pages * ir_threshold) + (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS; } else reg->threshold_pages = 0; @@ -434,8 +413,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", - (unsigned long long)commit_pages, - (unsigned long long)va_pages); + (unsigned long long)commit_pages, (unsigned long long)va_pages); goto no_mem; } reg->initial_commit = commit_pages; @@ -448,8 +426,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages * kbase_alloc_phy_pages(). If we move that to setup pages * earlier, also move this call too */ - int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, - commit_pages); + int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, commit_pages); if (err < 0) { kbase_gpu_vm_unlock(kctx); goto no_kern_mapping; @@ -476,7 +453,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE); cookie <<= PAGE_SHIFT; - *gpu_va = (u64) cookie; + *gpu_va = (u64)cookie; } else /* we control the VA */ { size_t align = 1; @@ -496,8 +473,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages align = 1; #endif /* !MALI_USE_CSF */ } - if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, - mmu_sync_info) != 0) { + if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, mmu_sync_info) != 0) { dev_warn(dev, "Failed to map memory on GPU"); kbase_gpu_vm_unlock(kctx); goto no_mmap; @@ -551,8 +527,7 @@ bad_flags: } KBASE_EXPORT_TEST_API(kbase_mem_alloc); -int kbase_mem_query(struct kbase_context *kctx, - u64 gpu_addr, u64 query, u64 * const out) +int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *const out) { struct kbase_va_region *reg; int ret = -EINVAL; @@ -588,8 +563,7 @@ int kbase_mem_query(struct kbase_context *kctx, case KBASE_MEM_QUERY_VA_SIZE: *out = reg->nr_pages; break; - case KBASE_MEM_QUERY_FLAGS: - { + case KBASE_MEM_QUERY_FLAGS: { *out = 0; if (KBASE_REG_CPU_WR & reg->flags) *out |= BASE_MEM_PROT_CPU_WR; @@ -607,6 +581,8 @@ int kbase_mem_query(struct kbase_context *kctx, *out |= BASE_MEM_COHERENT_SYSTEM; if (KBASE_REG_SHARE_IN & reg->flags) *out |= BASE_MEM_COHERENT_LOCAL; + if (KBASE_REG_DONT_NEED & reg->flags) + *out |= BASE_MEM_DONT_NEED; if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) { /* Prior to this version, this was known about by * user-side but we did not return them. Returning @@ -636,8 +612,8 @@ int kbase_mem_query(struct kbase_context *kctx, #if MALI_USE_CSF if (KBASE_REG_CSF_EVENT & reg->flags) *out |= BASE_MEM_CSF_EVENT; - if (((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_FIXED_VA) || - ((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { + if ((kbase_bits_to_zone(reg->flags) == FIXED_VA_ZONE) || + (kbase_bits_to_zone(reg->flags) == EXEC_FIXED_VA_ZONE)) { if (KBASE_REG_FIXED_ADDRESS & reg->flags) *out |= BASE_MEM_FIXED; else @@ -649,8 +625,7 @@ int kbase_mem_query(struct kbase_context *kctx, *out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id); - WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE, - "BASE_MEM_FLAGS_QUERYABLE needs updating\n"); + WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE, "BASE_MEM_FLAGS_QUERYABLE needs updating\n"); *out &= BASE_MEM_FLAGS_QUERYABLE; break; } @@ -674,17 +649,13 @@ out_unlock: * * Return: Number of pages which can be freed or SHRINK_EMPTY if no page remains. */ -static -unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, - struct shrink_control *sc) +static unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) { struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim); int evict_nents = atomic_read(&kctx->evict_nents); unsigned long nr_freeable_items; - WARN((sc->gfp_mask & __GFP_ATOMIC), - "Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n", - sc->gfp_mask); WARN(in_atomic(), "Shrinker called in atomic context. The caller must use GFP_ATOMIC or similar, then Shrinkers must not be called. gfp_mask==%x\n", sc->gfp_mask); @@ -723,9 +694,8 @@ unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, * The eviction list itself is guarded by the eviction lock and the MMU updates * are protected by their own lock. */ -static -unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, - struct shrink_control *sc) +static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) { struct kbase_context *kctx; struct kbase_mem_phy_alloc *alloc; @@ -742,8 +712,7 @@ unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, if (!alloc->reg) continue; - err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, - 0, alloc->nents); + err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, 0, alloc->nents); /* Failed to remove GPU mapping, proceed to next one. */ if (err != 0) @@ -782,15 +751,12 @@ int kbase_mem_evictable_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->evict_list); mutex_init(&kctx->jit_evict_lock); - atomic_set(&kctx->evict_nents, 0); - kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; kctx->reclaim.seeks = DEFAULT_SEEKS; /* Kernel versions prior to 3.1 : * struct shrinker does not define batch */ - kctx->reclaim.batch = 0; #if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(&kctx->reclaim); #else @@ -815,14 +781,10 @@ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) int __maybe_unused new_page_count; kbase_process_page_usage_dec(kctx, alloc->nents); - new_page_count = atomic_sub_return(alloc->nents, - &kctx->used_pages); + new_page_count = atomic_sub_return(alloc->nents, &kctx->used_pages); atomic_sub(alloc->nents, &kctx->kbdev->memdev.used_pages); - KBASE_TLSTREAM_AUX_PAGESALLOC( - kbdev, - kctx->id, - (u64)new_page_count); + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, kctx->id, (u64)new_page_count); kbase_trace_gpu_mem_usage_dec(kbdev, kctx, alloc->nents); } @@ -830,15 +792,13 @@ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. * @alloc: The physical allocation */ -static -void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) +static void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) { struct kbase_context *kctx = alloc->imported.native.kctx; struct kbase_device *kbdev = kctx->kbdev; int __maybe_unused new_page_count; - new_page_count = atomic_add_return(alloc->nents, - &kctx->used_pages); + new_page_count = atomic_add_return(alloc->nents, &kctx->used_pages); atomic_add(alloc->nents, &kctx->kbdev->memdev.used_pages); /* Increase mm counters so that the allocation is accounted for @@ -846,14 +806,11 @@ void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) */ kbase_process_page_usage_inc(kctx, alloc->nents); - KBASE_TLSTREAM_AUX_PAGESALLOC( - kbdev, - kctx->id, - (u64)new_page_count); + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, kctx->id, (u64)new_page_count); kbase_trace_gpu_mem_usage_inc(kbdev, kctx, alloc->nents); } -int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) +void kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) { struct kbase_context *kctx = gpu_alloc->imported.native.kctx; @@ -862,8 +819,7 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) /* Memory is in the process of transitioning to the shrinker, and * should ignore migration attempts */ - kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, - 0, gpu_alloc->nents); + kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, 0, gpu_alloc->nents); mutex_lock(&kctx->jit_evict_lock); /* This allocation can't already be on a list. */ @@ -877,14 +833,13 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) /* Indicate to page migration that the memory can be reclaimed by the shrinker. */ - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE); mutex_unlock(&kctx->jit_evict_lock); kbase_mem_evictable_mark_reclaim(gpu_alloc); gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; - return 0; } bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) @@ -918,8 +873,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) /* If the region is still alive ... */ if (gpu_alloc->reg) { /* ... allocate replacement backing ... */ - err = kbase_alloc_phy_pages_helper(gpu_alloc, - gpu_alloc->evicted); + err = kbase_alloc_phy_pages_helper(gpu_alloc, gpu_alloc->evicted); /* * ... and grow the mapping back to its @@ -927,8 +881,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) */ if (!err) err = kbase_mem_grow_gpu_mapping( - kctx, gpu_alloc->reg, - gpu_alloc->evicted, 0, mmu_sync_info); + kctx, gpu_alloc->reg, gpu_alloc->evicted, 0, mmu_sync_info); gpu_alloc->evicted = 0; @@ -938,7 +891,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) * in which a physical allocation could transition to NOT_MOVABLE * from. */ - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED); } } @@ -950,32 +903,31 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) return (err == 0); } -int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) +/** kbase_mem_flags_change_imported_umm - Change memory flags for imported UMM region + * @kctx: Pointer to kbase context. + * @flags: Base memory flags to modify. + * @reg: Pointer to region whose flags shall be modified. + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_mem_flags_change_imported_umm(struct kbase_context *kctx, unsigned int flags, + struct kbase_va_region *reg) { - struct kbase_va_region *reg; - int ret = -EINVAL; unsigned int real_flags = 0; unsigned int new_flags = 0; - bool prev_needed, new_needed; + int ret = -EINVAL; - KBASE_DEBUG_ASSERT(kctx); + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) + return ret; - if (!gpu_addr) - return -EINVAL; + if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + return ret; - if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) - return -EINVAL; - - /* nuke other bits */ - flags &= mask; - - /* check for only supported flags */ - if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) - goto out; - - /* mask covers bits we don't support? */ - if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) - goto out; + /* shareability flags are ignored for GPU uncached memory + * instead of causing an error. + */ + if (!(reg->flags & KBASE_REG_GPU_CACHED)) + return 0; /* convert flags */ if (BASE_MEM_COHERENT_SYSTEM & flags) @@ -983,79 +935,13 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in else if (BASE_MEM_COHERENT_LOCAL & flags) real_flags |= KBASE_REG_SHARE_IN; - /* now we can lock down the context, and find the region */ - down_write(kbase_mem_get_process_mmap_lock()); - kbase_gpu_vm_lock(kctx); - - /* Validate the region */ - reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); - if (kbase_is_region_invalid_or_free(reg)) - goto out_unlock; - - /* There is no use case to support MEM_FLAGS_CHANGE ioctl for allocations - * that have NO_USER_FREE flag set, to mark them as evictable/reclaimable. - * This would usually include JIT allocations, Tiler heap related allocations - * & GPU queue ringbuffer and none of them needs to be explicitly marked - * as evictable by Userspace. - */ - if (kbase_va_region_is_no_user_free(reg)) - goto out_unlock; - - /* Is the region being transitioning between not needed and needed? */ - prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; - new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; - if (prev_needed != new_needed) { - /* Aliased allocations can't be shrunk as the code doesn't - * support looking up: - * - all physical pages assigned to different GPU VAs - * - CPU mappings for the physical pages at different vm_pgoff - * (==GPU VA) locations. - */ - if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) - goto out_unlock; - - if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) - goto out_unlock; - - if (new_needed) { - /* Only native allocations can be marked not needed */ - if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { - ret = -EINVAL; - goto out_unlock; - } - ret = kbase_mem_evictable_make(reg->gpu_alloc); - if (ret) - goto out_unlock; - } else { - kbase_mem_evictable_unmake(reg->gpu_alloc); - } - } - - /* limit to imported memory */ - if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) - goto out_unlock; - - /* shareability flags are ignored for GPU uncached memory */ - if (!(reg->flags & KBASE_REG_GPU_CACHED)) { - ret = 0; - goto out_unlock; - } - /* no change? */ - if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) { - ret = 0; - goto out_unlock; - } + if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) + return 0; new_flags = reg->flags & ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); new_flags |= real_flags; - /* Currently supporting only imported memory */ - if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) { - ret = -EINVAL; - goto out_unlock; - } - if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { /* Future use will use the new flags, existing mapping * will NOT be updated as memory should not be in use @@ -1079,17 +965,13 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in * addresses from buffer with different shareability * properties. */ - dev_dbg(kctx->kbdev->dev, - "Updating page tables on mem flag change\n"); - ret = kbase_mmu_update_pages(kctx, reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), - new_flags, - reg->gpu_alloc->group_id); + dev_dbg(kctx->kbdev->dev, "Updating page tables on mem flag change\n"); + ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), new_flags, + reg->gpu_alloc->group_id); if (ret) dev_warn(kctx->kbdev->dev, - "Failed to update GPU page tables on flag change: %d\n", - ret); + "Failed to update GPU page tables on flag change: %d\n", ret); } else WARN_ON(!reg->gpu_alloc->imported.umm.current_mapping_usage_count); @@ -1097,17 +979,128 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in if (!ret) reg->flags = new_flags; + return ret; +} + +/** kbase_mem_flags_change_native - Change memory flags for native region + * @kctx: Pointer to kbase context. + * @flags: Base memory flags to modify. + * @reg: Pointer to region whose flags shall be modified. + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_mem_flags_change_native(struct kbase_context *kctx, unsigned int flags, + struct kbase_va_region *reg) +{ + bool kbase_reg_dont_need_flag = (KBASE_REG_DONT_NEED & reg->flags); + bool requested_dont_need_flag = (BASE_MEM_DONT_NEED & flags); + int ret = 0; + + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + return -EINVAL; + + /* Nothing to do if flag doesn't change. */ + if (kbase_reg_dont_need_flag ^ requested_dont_need_flag) { + /* Changes of the "evictable" property cannot be applied to regions + * which are members of memory aliases, as the code doesn't support + * looking up all physical pages assigned to different GPU VAs + * and all CPU mappings associated with those physical pages. + */ + if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) + return -EINVAL; + + if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) + return -EINVAL; + + if (requested_dont_need_flag) + kbase_mem_evictable_make(reg->gpu_alloc); + else + ret = kbase_mem_evictable_unmake(reg->gpu_alloc) ? 0 : -ENOMEM; + } + + return ret; +} + +int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, + unsigned int mask) +{ + struct kbase_va_region *reg; + int ret = -EINVAL; + + KBASE_DEBUG_ASSERT(kctx); + + if (!gpu_addr) + return ret; + + if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) + return ret; + + flags &= mask; + + /* Lock down the context, and find the region */ + down_write(kbase_mem_get_process_mmap_lock()); + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + /* There is no use case to support MEM_FLAGS_CHANGE ioctl for allocations + * that have NO_USER_FREE flag set, to mark them as evictable/reclaimable. + * This would usually include JIT allocations, Tiler heap related allocations + * & GPU queue ringbuffer and none of them needs to be explicitly marked + * as evictable by Userspace. + */ + if (kbase_va_region_is_no_user_free(reg)) + goto out_unlock; + + /* Different memory flags are allowed for different memory types, hence + * this step of the validation process must be done at this point and + * cannot be done earlier. + * + * Once the region has been found and validated, the actions to take depend + * on the memory type of the region. + */ + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_NATIVE: { + if ((flags & ~(BASE_MEM_FLAGS_MODIFIABLE_NATIVE)) || + (mask & ~(BASE_MEM_FLAGS_MODIFIABLE_NATIVE))) { + ret = -EINVAL; + break; + } + + ret = kbase_mem_flags_change_native(kctx, flags, reg); + break; + } + case KBASE_MEM_TYPE_IMPORTED_UMM: { + if ((flags & ~(BASE_MEM_FLAGS_MODIFIABLE_IMPORTED_UMM)) || + (mask & ~(BASE_MEM_FLAGS_MODIFIABLE_IMPORTED_UMM))) { + ret = -EINVAL; + break; + } + + ret = kbase_mem_flags_change_imported_umm(kctx, flags, reg); + break; + } + default: { + /* Other memory types are not supported. Return error. */ + ret = -EINVAL; + break; + } + } + out_unlock: kbase_gpu_vm_unlock(kctx); up_write(kbase_mem_get_process_mmap_lock()); -out: + return ret; } #define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) -int kbase_mem_do_sync_imported(struct kbase_context *kctx, - struct kbase_va_region *reg, enum kbase_sync_type sync_fn) +int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg, + enum kbase_sync_type sync_fn) { int ret = -EINVAL; struct dma_buf __maybe_unused *dma_buf; @@ -1141,16 +1134,15 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, switch (sync_fn) { case KBASE_SYNC_TO_DEVICE: - dev_dbg(kctx->kbdev->dev, - "Syncing imported buffer at GPU VA %llx to GPU\n", + dev_dbg(kctx->kbdev->dev, "Syncing imported buffer at GPU VA %llx to GPU\n", reg->start_pfn); #ifdef KBASE_MEM_ION_SYNC_WORKAROUND if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) { - struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment; + struct dma_buf_attachment *attachment = + reg->gpu_alloc->imported.umm.dma_attachment; struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt; - dma_sync_sg_for_device(attachment->dev, sgt->sgl, - sgt->nents, dir); + dma_sync_sg_for_device(attachment->dev, sgt->sgl, sgt->nents, dir); ret = 0; } #else @@ -1158,16 +1150,15 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, #endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ break; case KBASE_SYNC_TO_CPU: - dev_dbg(kctx->kbdev->dev, - "Syncing imported buffer at GPU VA %llx to CPU\n", + dev_dbg(kctx->kbdev->dev, "Syncing imported buffer at GPU VA %llx to CPU\n", reg->start_pfn); #ifdef KBASE_MEM_ION_SYNC_WORKAROUND if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) { - struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment; + struct dma_buf_attachment *attachment = + reg->gpu_alloc->imported.umm.dma_attachment; struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt; - dma_sync_sg_for_cpu(attachment->dev, sgt->sgl, - sgt->nents, dir); + dma_sync_sg_for_cpu(attachment->dev, sgt->sgl, sgt->nents, dir); ret = 0; } #else @@ -1177,8 +1168,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, } if (unlikely(ret)) - dev_warn(kctx->kbdev->dev, - "Failed to sync mem region %pK at GPU VA %llx: %d\n", + dev_warn(kctx->kbdev->dev, "Failed to sync mem region %pK at GPU VA %llx: %d\n", reg, reg->start_pfn, ret); return ret; @@ -1197,8 +1187,8 @@ static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx, { struct tagged_addr *pa = alloc->pages; - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, - alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, + DMA_BIDIRECTIONAL); alloc->imported.umm.sgt = NULL; kbase_remove_dma_buf_usage(kctx, alloc); @@ -1220,8 +1210,7 @@ static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx, * * Return: 0 on success, or negative error code */ -static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, - struct kbase_va_region *reg) +static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, struct kbase_va_region *reg) { struct sg_table *sgt; struct scatterlist *s; @@ -1234,8 +1223,7 @@ static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, WARN_ON_ONCE(alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM); WARN_ON_ONCE(alloc->imported.umm.sgt); - sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, - DMA_BIDIRECTIONAL); + sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL); if (IS_ERR_OR_NULL(sgt)) return -EINVAL; @@ -1247,26 +1235,23 @@ static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, for_each_sg(sgt->sgl, s, sgt->nents, i) { size_t j, pages = PFN_UP(MALI_SG_DMA_LEN(s)); - WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1), - "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", - MALI_SG_DMA_LEN(s)); + WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE - 1), + "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", MALI_SG_DMA_LEN(s)); - WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), - "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", - (unsigned long long) sg_dma_address(s)); + WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE - 1), + "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", + (unsigned long long)sg_dma_address(s)); for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) - *pa++ = as_tagged(sg_dma_address(s) + - (j << PAGE_SHIFT)); - WARN_ONCE(j < pages, - "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", - alloc->imported.umm.dma_buf->size); + *pa++ = as_tagged(sg_dma_address(s) + (j << PAGE_SHIFT)); + WARN_ONCE(j < pages, "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size); } if (!(reg->flags & KBASE_REG_IMPORT_PAD) && - WARN_ONCE(count < reg->nr_pages, - "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", - alloc->imported.umm.dma_buf->size)) { + WARN_ONCE(count < reg->nr_pages, + "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size)) { err = -EINVAL; goto err_unmap_attachment; } @@ -1283,8 +1268,7 @@ err_unmap_attachment: return err; } -int kbase_mem_umm_map(struct kbase_context *kctx, - struct kbase_va_region *reg) +int kbase_mem_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg) { int err; struct kbase_mem_phy_alloc *alloc; @@ -1302,10 +1286,9 @@ int kbase_mem_umm_map(struct kbase_context *kctx, alloc->imported.umm.current_mapping_usage_count++; if (alloc->imported.umm.current_mapping_usage_count != 1) { if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || - alloc->imported.umm.need_sync) { + alloc->imported.umm.need_sync) { if (!kbase_is_region_invalid_or_free(reg)) { - err = kbase_mem_do_sync_imported(kctx, reg, - KBASE_SYNC_TO_DEVICE); + err = kbase_mem_do_sync_imported(kctx, reg, KBASE_SYNC_TO_DEVICE); WARN_ON_ONCE(err); } } @@ -1321,16 +1304,15 @@ int kbase_mem_umm_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, - mmu_sync_info, NULL); + err = kbase_mmu_insert_pages_skip_status_update(kctx->kbdev, &kctx->mmu, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, + alloc->group_id, mmu_sync_info, NULL); if (err) goto bad_insert; - if (reg->flags & KBASE_REG_IMPORT_PAD && - !WARN_ON(reg->nr_pages < alloc->nents)) { + if (reg->flags & KBASE_REG_IMPORT_PAD && !WARN_ON(reg->nr_pages < alloc->nents)) { /* For padded imported dma-buf memory, map the dummy aliasing * page from the end of the dma-buf pages, to the end of the * region using a read only mapping. @@ -1350,8 +1332,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx, return 0; bad_pad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - alloc->nents, alloc->nents, kctx->as_nr, true); + kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + alloc->nents, alloc->nents, kctx->as_nr); bad_insert: kbase_mem_umm_unmap_attachment(kctx, alloc); bad_map_attachment: @@ -1360,16 +1342,15 @@ bad_map_attachment: return err; } -void kbase_mem_umm_unmap(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +void kbase_mem_umm_unmap(struct kbase_context *kctx, struct kbase_va_region *reg, + struct kbase_mem_phy_alloc *alloc) { alloc->imported.umm.current_mapping_usage_count--; if (alloc->imported.umm.current_mapping_usage_count) { if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || - alloc->imported.umm.need_sync) { + alloc->imported.umm.need_sync) { if (!kbase_is_region_invalid_or_free(reg)) { - int err = kbase_mem_do_sync_imported(kctx, reg, - KBASE_SYNC_TO_CPU); + int err = kbase_mem_do_sync_imported(kctx, reg, KBASE_SYNC_TO_CPU); WARN_ON_ONCE(err); } } @@ -1379,29 +1360,27 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) { int err; - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, reg->nr_pages, reg->nr_pages, - kctx->as_nr, true); + err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, reg->nr_pages, reg->nr_pages, + kctx->as_nr); WARN_ON(err); } kbase_mem_umm_unmap_attachment(kctx, alloc); } -static int get_umm_memory_group_id(struct kbase_context *kctx, - struct dma_buf *dma_buf) +static int get_umm_memory_group_id(struct kbase_context *kctx, struct dma_buf *dma_buf) { int group_id = BASE_MEM_GROUP_DEFAULT; if (kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id) { struct memory_group_manager_import_data mgm_import_data; - mgm_import_data.type = - MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF; + mgm_import_data.type = MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF; mgm_import_data.u.dma_buf = dma_buf; - group_id = kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id( - kctx->kbdev->mgm_dev, &mgm_import_data); + group_id = kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id(kctx->kbdev->mgm_dev, + &mgm_import_data); } return group_id; @@ -1422,12 +1401,13 @@ static int get_umm_memory_group_id(struct kbase_context *kctx, * This function imports a dma-buf into kctx, and created a kbase_va_region * object that wraps the dma-buf. */ -static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, - int fd, u64 *va_pages, u64 *flags, u32 padding) +static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, + u64 *flags, u32 padding) { struct kbase_va_region *reg; struct dma_buf *dma_buf; struct dma_buf_attachment *dma_attachment; + enum kbase_memory_zone zone; bool shared_zone = false; bool need_sync = false; int group_id; @@ -1486,12 +1466,11 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages, - KBASE_REG_ZONE_SAME_VA); - } else { - reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages, - KBASE_REG_ZONE_CUSTOM_VA); - } + zone = SAME_VA_ZONE; + } else + zone = CUSTOM_VA_ZONE; + + reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *va_pages); if (!reg) { dma_buf_detach(dma_buf, dma_attachment); @@ -1501,8 +1480,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, group_id = get_umm_memory_group_id(kctx, dma_buf); - reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, - KBASE_MEM_TYPE_IMPORTED_UMM, group_id); + reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_UMM, group_id); if (IS_ERR_OR_NULL(reg->gpu_alloc)) goto no_alloc; @@ -1515,8 +1493,8 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, reg->gpu_alloc->nents = 0; reg->flags &= ~KBASE_REG_FREE; - reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ - reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ + reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ if (*flags & BASE_MEM_PROTECTED) reg->flags |= KBASE_REG_PROTECTED; @@ -1540,8 +1518,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, err = kbase_mem_umm_map_attachment(kctx, reg); if (err) { - dev_warn(kctx->kbdev->dev, - "Failed to map dma-buf %pK on GPU: %d\n", + dev_warn(kctx->kbdev->dev, "Failed to map dma-buf %pK on GPU: %d\n", dma_buf, err); goto error_out; } @@ -1563,59 +1540,52 @@ no_alloc: u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev) { u32 cpu_cache_line_size = cache_line_size(); - u32 gpu_cache_line_size = - (1UL << kbdev->gpu_props.props.l2_props.log2_line_size); + u32 gpu_cache_line_size = (1UL << kbdev->gpu_props.log2_line_size); - return ((cpu_cache_line_size > gpu_cache_line_size) ? - cpu_cache_line_size : - gpu_cache_line_size); + return ((cpu_cache_line_size > gpu_cache_line_size) ? cpu_cache_line_size : + gpu_cache_line_size); } -static struct kbase_va_region *kbase_mem_from_user_buffer( - struct kbase_context *kctx, unsigned long address, - unsigned long size, u64 *va_pages, u64 *flags) +static struct kbase_va_region *kbase_mem_from_user_buffer(struct kbase_context *kctx, + unsigned long address, unsigned long size, + u64 *va_pages, u64 *flags) { - long i, dma_mapped_pages; struct kbase_va_region *reg; - struct rb_root *rbtree; - long faulted_pages; - int zone = KBASE_REG_ZONE_CUSTOM_VA; + enum kbase_memory_zone zone = CUSTOM_VA_ZONE; bool shared_zone = false; u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); struct kbase_alloc_import_user_buf *user_buf; - struct page **pages = NULL; - struct tagged_addr *pa; - struct device *dev; int write; + long faulted_pages; /* Flag supported only for dma-buf imported memory */ if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) return NULL; if ((address & (cache_line_alignment - 1)) != 0 || - (size & (cache_line_alignment - 1)) != 0) { + (size & (cache_line_alignment - 1)) != 0) { if (*flags & BASE_MEM_UNCACHED_GPU) { - dev_warn(kctx->kbdev->dev, - "User buffer is not cache line aligned and marked as GPU uncached\n"); + dev_warn( + kctx->kbdev->dev, + "User buffer is not cache line aligned and marked as GPU uncached\n"); goto bad_size; } /* Coherency must be enabled to handle partial cache lines */ - if (*flags & (BASE_MEM_COHERENT_SYSTEM | - BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { + if (*flags & (BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { /* Force coherent system required flag, import will * then fail if coherency isn't available */ *flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED; } else { - dev_warn(kctx->kbdev->dev, - "User buffer is not cache line aligned and no coherency enabled\n"); + dev_warn( + kctx->kbdev->dev, + "User buffer is not cache line aligned and no coherency enabled\n"); goto bad_size; } } - *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - - PFN_DOWN(address); + *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - PFN_DOWN(address); if (!*va_pages) goto bad_size; @@ -1642,19 +1612,15 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; - zone = KBASE_REG_ZONE_SAME_VA; - rbtree = &kctx->reg_rbtree_same; - } else - rbtree = &kctx->reg_rbtree_custom; - - reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone); + zone = SAME_VA_ZONE; + } + reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *va_pages); if (!reg) goto no_region; - reg->gpu_alloc = kbase_alloc_create( - kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_USER_BUF, - BASE_MEM_GROUP_DEFAULT); + reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_USER_BUF, + BASE_MEM_GROUP_DEFAULT); if (IS_ERR_OR_NULL(reg->gpu_alloc)) goto no_alloc_obj; @@ -1673,130 +1639,36 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( user_buf->address = address; user_buf->nr_pages = *va_pages; user_buf->mm = current->mm; + user_buf->current_mapping_usage_count = 0; + kbase_user_buf_empty_init(reg); kbase_mem_mmgrab(); if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) user_buf->pages = vmalloc(*va_pages * sizeof(struct page *)); else - user_buf->pages = kmalloc_array(*va_pages, - sizeof(struct page *), GFP_KERNEL); + user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *), GFP_KERNEL); if (!user_buf->pages) goto no_page_array; - /* If the region is coherent with the CPU then the memory is imported - * and mapped onto the GPU immediately. - * Otherwise get_user_pages is called as a sanity check, but with - * NULL as the pages argument which will fault the pages, but not - * pin them. The memory will then be pinned only around the jobs that - * specify the region as an external resource. - */ - if (reg->flags & KBASE_REG_SHARE_BOTH) { - pages = user_buf->pages; - *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; - } - - down_read(kbase_mem_get_process_mmap_lock()); + reg->gpu_alloc->nents = 0; + reg->extension = 0; write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); -#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE - faulted_pages = get_user_pages(address, *va_pages, - write ? FOLL_WRITE : 0, pages, NULL); -#else - /* pin_user_pages function cannot be called with pages param NULL. - * get_user_pages function will be used instead because it is safe to be - * used with NULL pages param as long as it doesn't have FOLL_GET flag. - */ - if (pages != NULL) { - faulted_pages = - pin_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL); - } else { - faulted_pages = - get_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL); - } -#endif - + down_read(kbase_mem_get_process_mmap_lock()); + faulted_pages = + kbase_get_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, NULL, NULL); up_read(kbase_mem_get_process_mmap_lock()); if (faulted_pages != *va_pages) goto fault_mismatch; - reg->gpu_alloc->nents = 0; - reg->extension = 0; - - pa = kbase_get_gpu_phy_pages(reg); - dev = kctx->kbdev->dev; - - if (pages) { - /* Top bit signifies that this was pinned on import */ - user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; - - /* Manual CPU cache synchronization. - * - * The driver disables automatic CPU cache synchronization because the - * memory pages that enclose the imported region may also contain - * sub-regions which are not imported and that are allocated and used - * by the user process. This may be the case of memory at the beginning - * of the first page and at the end of the last page. Automatic CPU cache - * synchronization would force some operations on those memory allocations, - * unbeknown to the user process: in particular, a CPU cache invalidate - * upon unmapping would destroy the content of dirty CPU caches and cause - * the user process to lose CPU writes to the non-imported sub-regions. - * - * When the GPU claims ownership of the imported memory buffer, it shall - * commit CPU writes for the whole of all pages that enclose the imported - * region, otherwise the initial content of memory would be wrong. - */ - for (i = 0; i < faulted_pages; i++) { - dma_addr_t dma_addr; -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); -#else - dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, - DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); -#endif - if (dma_mapping_error(dev, dma_addr)) - goto unwind_dma_map; - - user_buf->dma_addrs[i] = dma_addr; - pa[i] = as_tagged(page_to_phys(pages[i])); - - dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); - } - - reg->gpu_alloc->nents = faulted_pages; - } + if (reg->flags & KBASE_REG_SHARE_BOTH) + *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; return reg; -unwind_dma_map: - dma_mapped_pages = i; - /* Run the unmap loop in the same order as map loop, and perform again - * CPU cache synchronization to re-write the content of dirty CPU caches - * to memory. This precautionary measure is kept here to keep this code - * aligned with kbase_jd_user_buf_map() to allow for a potential refactor - * in the future. - */ - for (i = 0; i < dma_mapped_pages; i++) { - dma_addr_t dma_addr = user_buf->dma_addrs[i]; - - dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -#else - dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); -#endif - } fault_mismatch: - if (pages) { - /* In this case, the region was not yet in the region tracker, - * and so there are no CPU mappings to remove before we unpin - * the page - */ - for (i = 0; i < faulted_pages; i++) - kbase_unpin_user_buf_page(pages[i]); - } no_page_array: invalid_flags: kbase_mem_phy_alloc_put(reg->cpu_alloc); @@ -1808,16 +1680,15 @@ bad_size: return NULL; } - -u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, - u64 nents, struct base_mem_aliasing_info *ai, - u64 *num_pages) +u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, + struct base_mem_aliasing_info *ai, u64 *num_pages) { struct kbase_va_region *reg; u64 gpu_va; size_t i; bool coherent; uint64_t max_stride; + enum kbase_memory_zone zone; /* Calls to this function are inherently asynchronous, with respect to * MMU operations. @@ -1830,19 +1701,17 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, KBASE_DEBUG_ASSERT(num_pages); /* mask to only allowed flags */ - *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | - BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | - BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED); + *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_COHERENT_SYSTEM | + BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD | + BASE_MEM_COHERENT_SYSTEM_REQUIRED); if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { - dev_warn(kctx->kbdev->dev, - "%s called with bad flags (%llx)", - __func__, - (unsigned long long)*flags); + dev_warn(kctx->kbdev->dev, "%s called with bad flags (%llx)", __func__, + (unsigned long long)*flags); goto bad_flags; } coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 || - (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0; + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0; if (!stride) goto bad_stride; @@ -1869,20 +1738,18 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* 64-bit tasks must MMAP anyway, but not expose this address to * clients */ + zone = SAME_VA_ZONE; *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages, - KBASE_REG_ZONE_SAME_VA); - } else { - reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages, - KBASE_REG_ZONE_CUSTOM_VA); - } + } else + zone = CUSTOM_VA_ZONE; + + reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *num_pages); if (!reg) goto no_reg; /* zero-sized page array, as we don't need one/can support one */ - reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS, - BASE_MEM_GROUP_DEFAULT); + reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS, BASE_MEM_GROUP_DEFAULT); if (IS_ERR_OR_NULL(reg->gpu_alloc)) goto no_alloc_obj; @@ -1893,7 +1760,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, reg->gpu_alloc->imported.alias.nents = nents; reg->gpu_alloc->imported.alias.stride = stride; - reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents); + reg->gpu_alloc->imported.alias.aliased = + vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents); if (!reg->gpu_alloc->imported.alias.aliased) goto no_aliased_array; @@ -1902,8 +1770,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* validate and add src handles */ for (i = 0; i < nents; i++) { if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { - if (ai[i].handle.basep.handle != - BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) + if (ai[i].handle.basep.handle != BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) goto bad_handle; /* unsupported magic handle */ if (!ai[i].length) goto bad_handle; /* must be > 0 */ @@ -1917,8 +1784,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, struct kbase_mem_phy_alloc *alloc; aliasing_reg = kbase_region_tracker_find_region_base_address( - kctx, - (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); + kctx, (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); /* validate found region */ if (kbase_is_region_invalid_or_free(aliasing_reg)) @@ -1962,7 +1828,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, if (ai[i].offset + ai[i].length > alloc->nents) goto bad_handle; /* beyond end */ - reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); + reg->gpu_alloc->imported.alias.aliased[i].alloc = + kbase_mem_phy_alloc_get(alloc); reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; @@ -1997,8 +1864,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, gpu_va <<= PAGE_SHIFT; } else { /* we control the VA */ - if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1, - mmu_sync_info) != 0) { + if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1, mmu_sync_info) != 0) { dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); goto no_mmap; } @@ -2036,8 +1902,7 @@ bad_flags: } int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, - void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, - u64 *flags) + void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, u64 *flags) { struct kbase_va_region *reg; @@ -2051,32 +1916,28 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, KBASE_DEBUG_ASSERT(va_pages); KBASE_DEBUG_ASSERT(flags); - if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && - kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) + if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) *flags |= BASE_MEM_SAME_VA; if (!kbase_check_import_flags(*flags)) { - dev_warn(kctx->kbdev->dev, - "%s called with bad flags (%llx)", - __func__, - (unsigned long long)*flags); + dev_warn(kctx->kbdev->dev, "%s called with bad flags (%llx)", __func__, + (unsigned long long)*flags); goto bad_flags; } if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && - (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; } if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && - !kbase_device_is_cpu_coherent(kctx->kbdev)) { - dev_warn(kctx->kbdev->dev, - "%s call required coherent mem when unavailable", - __func__); + !kbase_device_is_cpu_coherent(kctx->kbdev)) { + dev_warn(kctx->kbdev->dev, "%s call required coherent mem when unavailable", + __func__); goto bad_flags; } if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && - !kbase_device_is_cpu_coherent(kctx->kbdev)) { + !kbase_device_is_cpu_coherent(kctx->kbdev)) { /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ *flags &= ~BASE_MEM_COHERENT_SYSTEM; } @@ -2085,8 +1946,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, goto bad_flags; } if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { - dev_warn(kctx->kbdev->dev, - "padding is only supported for UMM"); + dev_warn(kctx->kbdev->dev, "padding is only supported for UMM"); goto bad_flags; } @@ -2097,16 +1957,13 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, if (get_user(fd, (int __user *)phandle)) reg = NULL; else - reg = kbase_mem_from_umm(kctx, fd, va_pages, flags, - padding); - } - break; + reg = kbase_mem_from_umm(kctx, fd, va_pages, flags, padding); + } break; case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { struct base_mem_import_user_buffer user_buffer; void __user *uptr; - if (copy_from_user(&user_buffer, phandle, - sizeof(user_buffer))) { + if (copy_from_user(&user_buffer, phandle, sizeof(user_buffer))) { reg = NULL; } else { #if IS_ENABLED(CONFIG_COMPAT) @@ -2116,9 +1973,8 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, #endif uptr = u64_to_user_ptr(user_buffer.ptr); - reg = kbase_mem_from_user_buffer(kctx, - (unsigned long)uptr, user_buffer.length, - va_pages, flags); + reg = kbase_mem_from_user_buffer(kctx, (unsigned long)uptr, + user_buffer.length, va_pages, flags); } break; } @@ -2148,10 +2004,9 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); *gpu_va <<= PAGE_SHIFT; - } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { + } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { /* we control the VA, mmap now to the GPU */ - if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1, mmu_sync_info) != - 0) + if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1, mmu_sync_info) != 0) goto no_gpu_va; /* return real GPU VA */ *gpu_va = reg->start_pfn << PAGE_SHIFT; @@ -2184,9 +2039,8 @@ bad_flags: return -ENOMEM; } -int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 new_pages, - u64 old_pages, +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 new_pages, u64 old_pages, enum kbase_caller_mmu_sync_info mmu_sync_info) { struct tagged_addr *phy_pages; @@ -2199,14 +2053,13 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, phy_pages = kbase_get_gpu_phy_pages(reg); ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, phy_pages + old_pages, delta, reg->flags, kctx->as_nr, - reg->gpu_alloc->group_id, mmu_sync_info, reg, false); + reg->gpu_alloc->group_id, mmu_sync_info, reg); return ret; } -void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages) +void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) { u64 gpu_va_start = reg->start_pfn; @@ -2214,9 +2067,9 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, /* Nothing to do */ return; - unmap_mapping_range(kctx->filp->f_inode->i_mapping, - (gpu_va_start + new_pages)<kfile->filp->f_inode->i_mapping, + (gpu_va_start + new_pages) << PAGE_SHIFT, + (old_pages - new_pages) << PAGE_SHIFT, 1); } int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, @@ -2228,7 +2081,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, int ret = 0; ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, - alloc->pages + new_pages, delta, delta, kctx->as_nr, false); + alloc->pages + new_pages, delta, delta, kctx->as_nr); return ret; } @@ -2247,10 +2100,15 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(gpu_addr != 0); + + if (unlikely(gpu_addr == 0)) { + dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr is 0"); + return -EINVAL; + } if (gpu_addr & ~PAGE_MASK) { - dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid"); + dev_warn(kctx->kbdev->dev, + "kbase:mem_commit: gpu_addr: passed parameter is invalid"); return -EINVAL; } @@ -2328,11 +2186,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) goto out_unlock; } if (reg->cpu_alloc != reg->gpu_alloc) { - if (kbase_alloc_phy_pages_helper( - reg->gpu_alloc, delta) != 0) { + if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta) != 0) { res = -ENOMEM; - kbase_free_phy_pages_helper(reg->cpu_alloc, - delta); + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); goto out_unlock; } } @@ -2340,15 +2196,13 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) /* No update required for CPU mappings, that's done on fault. */ /* Update GPU mapping. */ - res = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, - old_pages, mmu_sync_info); + res = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages, mmu_sync_info); /* On error free the new pages */ if (res) { kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) - kbase_free_phy_pages_helper(reg->gpu_alloc, - delta); + kbase_free_phy_pages_helper(reg->gpu_alloc, delta); res = -ENOMEM; goto out_unlock; } @@ -2368,8 +2222,8 @@ out_unlock: return res; } -int kbase_mem_shrink(struct kbase_context *const kctx, - struct kbase_va_region *const reg, u64 new_pages) +int kbase_mem_shrink(struct kbase_context *const kctx, struct kbase_va_region *const reg, + u64 new_pages) { u64 delta, old_pages; int err; @@ -2387,37 +2241,36 @@ int kbase_mem_shrink(struct kbase_context *const kctx, return -EINVAL; delta = old_pages - new_pages; + if (kctx->kbdev->pagesize_2mb) { + struct tagged_addr *start_free = reg->gpu_alloc->pages + new_pages; + + /* Move the end of new commited range to a valid location. + * This mirrors the adjustment done inside kbase_free_phy_pages_helper(). + */ + while (delta && is_huge(*start_free) && !is_huge_head(*start_free)) { + start_free++; + new_pages++; + delta--; + } + + if (!delta) + return 0; + } /* Update the GPU mapping */ - err = kbase_mem_shrink_gpu_mapping(kctx, reg, - new_pages, old_pages); + err = kbase_mem_shrink_gpu_mapping(kctx, reg, new_pages, old_pages); if (err >= 0) { /* Update all CPU mapping(s) */ - kbase_mem_shrink_cpu_mapping(kctx, reg, - new_pages, old_pages); + kbase_mem_shrink_cpu_mapping(kctx, reg, new_pages, old_pages); kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, delta); - - if (kctx->kbdev->pagesize_2mb) { - if (kbase_reg_current_backed_size(reg) > new_pages) { - old_pages = new_pages; - new_pages = kbase_reg_current_backed_size(reg); - - /* Update GPU mapping. */ - err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages, - CALLER_MMU_ASYNC); - } - } else { - WARN_ON(kbase_reg_current_backed_size(reg) != new_pages); - } } return err; } - static void kbase_cpu_vm_open(struct vm_area_struct *vma) { struct kbase_cpu_mapping *map = vma->vm_private_data; @@ -2445,8 +2298,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) kbase_gpu_vm_lock(map->kctx); if (map->free_on_close) { - KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == - KBASE_REG_ZONE_SAME_VA); + KBASE_DEBUG_ASSERT(kbase_bits_to_zone(map->region->flags) == SAME_VA_ZONE); /* Avoid freeing memory on the process death which results in * GPU Page Fault. Memory will be freed in kbase_destroy_context */ @@ -2460,21 +2312,20 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) kbase_gpu_vm_unlock(map->kctx); kbase_mem_phy_alloc_put(map->alloc); + kbase_file_dec_cpu_mapping_count(map->kctx->kfile); kfree(map); } static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma, - struct kbase_va_region *reg, - pgoff_t *start_off, - size_t nr_pages) + struct kbase_va_region *reg, pgoff_t *start_off, + size_t nr_pages) { - struct kbase_aliased *aliased = - reg->cpu_alloc->imported.alias.aliased; + struct kbase_aliased *aliased = reg->cpu_alloc->imported.alias.aliased; - if (!reg->cpu_alloc->imported.alias.stride || - reg->nr_pages < (*start_off + nr_pages)) { + CSTD_UNUSED(vma); + + if (!reg->cpu_alloc->imported.alias.stride || reg->nr_pages < (*start_off + nr_pages)) return NULL; - } while (*start_off >= reg->cpu_alloc->imported.alias.stride) { aliased++; @@ -2495,8 +2346,7 @@ static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma, } #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) -static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) +static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { #else static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) @@ -2518,19 +2368,27 @@ static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) KBASE_DEBUG_ASSERT(map->kctx); KBASE_DEBUG_ASSERT(map->alloc); + kbase_gpu_vm_lock(map->kctx); + + /* Reject faults for SAME_VA mapping of UMM allocations */ + if ((map->alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) && map->free_on_close) { + dev_warn(map->kctx->kbdev->dev, "Invalid CPU access to UMM memory for ctx %d_%d", + map->kctx->tgid, map->kctx->id); + goto exit; + } + map_start_pgoff = vma->vm_pgoff - map->region->start_pfn; - kbase_gpu_vm_lock(map->kctx); if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) { struct kbase_aliased *aliased = - get_aliased_alloc(vma, map->region, &map_start_pgoff, 1); + get_aliased_alloc(vma, map->region, &map_start_pgoff, 1); if (!aliased) goto exit; nents = aliased->length; pages = aliased->alloc->pages + aliased->offset; - } else { + } else { nents = map->alloc->nents; pages = map->alloc->pages; } @@ -2552,15 +2410,16 @@ static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) addr = (pgoff_t)(vma->vm_start >> PAGE_SHIFT); mgm_dev = map->kctx->kbdev->mgm_dev; while (i < nents && (addr < vma->vm_end >> PAGE_SHIFT)) { - - ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, - map->alloc->group_id, vma, addr << PAGE_SHIFT, - PFN_DOWN(as_phys_addr_t(pages[i])), vma->vm_page_prot); + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, map->alloc->group_id, vma, + addr << PAGE_SHIFT, + PFN_DOWN(as_phys_addr_t(pages[i])), + vma->vm_page_prot); if (ret != VM_FAULT_NOPAGE) goto exit; - i++; addr++; + i++; + addr++; } exit: @@ -2568,19 +2427,13 @@ exit: return ret; } -const struct vm_operations_struct kbase_vm_ops = { - .open = kbase_cpu_vm_open, - .close = kbase_cpu_vm_close, - .fault = kbase_cpu_vm_fault -}; +const struct vm_operations_struct kbase_vm_ops = { .open = kbase_cpu_vm_open, + .close = kbase_cpu_vm_close, + .fault = kbase_cpu_vm_fault }; -static int kbase_cpu_mmap(struct kbase_context *kctx, - struct kbase_va_region *reg, - struct vm_area_struct *vma, - void *kaddr, - size_t nr_pages, - unsigned long aligned_offset, - int free_on_close) +static int kbase_cpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, + struct vm_area_struct *vma, void *kaddr, size_t nr_pages, + unsigned long aligned_offset, int free_on_close) { struct kbase_cpu_mapping *map; int err = 0; @@ -2607,15 +2460,13 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, * See MIDBASE-1057 */ - vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; + vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); vma->vm_ops = &kbase_vm_ops; vma->vm_private_data = map; if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) { - pgoff_t rel_pgoff = vma->vm_pgoff - reg->start_pfn + - (aligned_offset >> PAGE_SHIFT); - struct kbase_aliased *aliased = - get_aliased_alloc(vma, reg, &rel_pgoff, nr_pages); + pgoff_t rel_pgoff = vma->vm_pgoff - reg->start_pfn + (aligned_offset >> PAGE_SHIFT); + struct kbase_aliased *aliased = get_aliased_alloc(vma, reg, &rel_pgoff, nr_pages); if (!aliased) { err = -EINVAL; @@ -2625,7 +2476,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, } if (!(reg->flags & KBASE_REG_CPU_CACHED) && - (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { + (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_CPU_RD))) { /* We can't map vmalloc'd memory uncached. * Other memory will have been returned from * kbase_mem_pool which would be @@ -2635,12 +2486,12 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); } - if (!kaddr) { - vma->vm_flags |= VM_PFNMAP; - } else { + if (!kaddr) + vm_flags_set(vma, VM_PFNMAP); + else { WARN_ON(aligned_offset); /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ - vma->vm_flags |= VM_MIXEDMAP; + vm_flags_set(vma, VM_MIXEDMAP); /* vmalloc remaping is easy... */ err = remap_vmalloc_range(vma, kaddr, 0); WARN_ON(err); @@ -2661,8 +2512,9 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; list_add(&map->mappings_list, &map->alloc->mappings); + kbase_file_inc_cpu_mapping_count(kctx->kfile); - out: +out: return err; } @@ -2683,10 +2535,8 @@ static void kbase_free_unused_jit_allocations(struct kbase_context *kctx) ; } -static int kbase_mmu_dump_mmap(struct kbase_context *kctx, - struct vm_area_struct *vma, - struct kbase_va_region **const reg, - void **const kmap_addr) +static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, + struct kbase_va_region **const reg, void **const kmap_addr) { struct kbase_va_region *new_reg; void *kaddr; @@ -2709,16 +2559,15 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, goto out; } - new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages, - KBASE_REG_ZONE_SAME_VA); + new_reg = kbase_ctx_alloc_free_region(kctx, SAME_VA_ZONE, 0, nr_pages); if (!new_reg) { err = -ENOMEM; WARN_ON(1); goto out; } - new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW, - BASE_MEM_GROUP_DEFAULT); + new_reg->cpu_alloc = + kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW, BASE_MEM_GROUP_DEFAULT); if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { err = -ENOMEM; new_reg->cpu_alloc = NULL; @@ -2762,10 +2611,8 @@ void kbase_os_mem_map_unlock(struct kbase_context *kctx) up_read(kbase_mem_get_process_mmap_lock()); } -static int kbasep_reg_mmap(struct kbase_context *kctx, - struct vm_area_struct *vma, - struct kbase_va_region **regm, - size_t *nr_pages, size_t *aligned_offset) +static int kbasep_reg_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, + struct kbase_va_region **regm, size_t *nr_pages, size_t *aligned_offset) { unsigned int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); @@ -2802,18 +2649,16 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) { /* VM flags inconsistent with region flags */ err = -EPERM; - dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n", - __FILE__, __LINE__); + dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n", __FILE__, __LINE__); goto out; } /* adjust down nr_pages to what we have physically */ *nr_pages = kbase_reg_current_backed_size(reg); - if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, - reg->nr_pages, 1, mmu_sync_info) != 0) { - dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); + if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, reg->nr_pages, 1, + mmu_sync_info) != 0) { /* Unable to map in GPU space. */ - WARN_ON(1); + dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); err = -ENOMEM; goto out; } @@ -2832,7 +2677,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, * offset so that when user space trims the mapping the beginning of * the trimmed VMA has the correct vm_pgoff; */ - vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT); + vma->vm_pgoff = reg->start_pfn - ((*aligned_offset) >> PAGE_SHIFT); out: *regm = reg; dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); @@ -2840,8 +2685,7 @@ out: return err; } -int kbase_context_mmap(struct kbase_context *const kctx, - struct vm_area_struct *const vma) +int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct *const vma) { struct kbase_va_region *reg = NULL; void *kaddr = NULL; @@ -2854,9 +2698,9 @@ int kbase_context_mmap(struct kbase_context *const kctx, dev_dbg(dev, "kbase_mmap\n"); if (!(vma->vm_flags & VM_READ)) - vma->vm_flags &= ~VM_MAYREAD; + vm_flags_clear(vma, VM_MAYREAD); if (!(vma->vm_flags & VM_WRITE)) - vma->vm_flags &= ~VM_MAYWRITE; + vm_flags_clear(vma, VM_MAYWRITE); if (nr_pages == 0) { err = -EINVAL; @@ -2906,8 +2750,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, kbase_gpu_vm_unlock(kctx); err = kbase_csf_cpu_mmap_user_reg_page(kctx, vma); goto out; - case PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) ... - PFN_DOWN(BASE_MEM_COOKIE_BASE) - 1: { + case PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE)... PFN_DOWN(BASE_MEM_COOKIE_BASE) - 1: { kbase_gpu_vm_unlock(kctx); mutex_lock(&kctx->csf.lock); err = kbase_csf_cpu_mmap_user_io_pages(kctx, vma); @@ -2915,10 +2758,8 @@ int kbase_context_mmap(struct kbase_context *const kctx, goto out; } #endif - case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... - PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { - err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, - &aligned_offset); + case PFN_DOWN(BASE_MEM_COOKIE_BASE)... PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { + err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, &aligned_offset); if (err != 0) goto out_unlock; /* free the region on munmap */ @@ -2926,58 +2767,54 @@ int kbase_context_mmap(struct kbase_context *const kctx, break; } default: { - reg = kbase_region_tracker_find_region_enclosing_address(kctx, - (u64)vma->vm_pgoff << PAGE_SHIFT); + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, (u64)vma->vm_pgoff << PAGE_SHIFT); if (!kbase_is_region_invalid_or_free(reg)) { /* will this mapping overflow the size of the region? */ - if (nr_pages > (reg->nr_pages - - (vma->vm_pgoff - reg->start_pfn))) { + if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn))) { err = -ENOMEM; goto out_unlock; } - if ((vma->vm_flags & VM_READ && - !(reg->flags & KBASE_REG_CPU_RD)) || - (vma->vm_flags & VM_WRITE && - !(reg->flags & KBASE_REG_CPU_WR))) { + if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) || + (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) { /* VM flags inconsistent with region flags */ err = -EPERM; - dev_err(dev, "%s:%d inconsistent VM flags\n", - __FILE__, __LINE__); + dev_err(dev, "%s:%d inconsistent VM flags\n", __FILE__, __LINE__); goto out_unlock; } - if (KBASE_MEM_TYPE_IMPORTED_UMM == - reg->cpu_alloc->type) { + if (reg->cpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { if (0 != (vma->vm_pgoff - reg->start_pfn)) { err = -EINVAL; - dev_warn(dev, "%s:%d attempt to do a partial map in a dma_buf: non-zero offset to dma_buf mapping!\n", + dev_warn( + dev, + "%s:%d attempt to do a partial map in a dma_buf: non-zero offset to dma_buf mapping!\n", __FILE__, __LINE__); goto out_unlock; } - err = dma_buf_mmap( - reg->cpu_alloc->imported.umm.dma_buf, - vma, vma->vm_pgoff - reg->start_pfn); + err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, + vma->vm_pgoff - reg->start_pfn); goto out_unlock; } if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { /* initial params check for aliased dumping map */ if (nr_pages > reg->gpu_alloc->imported.alias.stride || - !reg->gpu_alloc->imported.alias.stride || - !nr_pages) { + !reg->gpu_alloc->imported.alias.stride || !nr_pages) { err = -EINVAL; dev_warn(dev, "mmap aliased: invalid params!\n"); goto out_unlock; } } else if (reg->cpu_alloc->nents < - (vma->vm_pgoff - reg->start_pfn + nr_pages)) { + (vma->vm_pgoff - reg->start_pfn + nr_pages)) { /* limit what we map to the amount currently backed */ if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) nr_pages = 0; else - nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn); + nr_pages = reg->cpu_alloc->nents - + (vma->vm_pgoff - reg->start_pfn); } } else { err = -ENOMEM; @@ -2986,8 +2823,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, } /* default */ } /* switch */ - err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, - free_on_close); + err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, free_on_close); #if defined(CONFIG_MALI_VECTOR_DUMP) if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { /* MMU dump - userspace should now have a reference on @@ -3019,15 +2855,15 @@ out: KBASE_EXPORT_TEST_API(kbase_context_mmap); -void kbase_sync_mem_regions(struct kbase_context *kctx, - struct kbase_vmap_struct *map, enum kbase_sync_type dest) +void kbase_sync_mem_regions(struct kbase_context *kctx, struct kbase_vmap_struct *map, + enum kbase_sync_type dest) { size_t i; off_t const offset = map->offset_in_page; size_t const page_count = PFN_UP(offset + map->size); /* Sync first page */ - size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size); + size_t sz = MIN(((size_t)PAGE_SIZE - offset), map->size); struct tagged_addr cpu_pa = map->cpu_pages[0]; struct tagged_addr gpu_pa = map->gpu_pages[0]; @@ -3069,6 +2905,9 @@ static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pag { size_t i; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; + for (i = 0; i < page_count; i++) { struct page *p = as_page(pages[i]); struct kbase_page_metadata *page_md = kbase_page_private(p); @@ -3118,6 +2957,9 @@ static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *p { size_t i; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; + for (i = 0; i < page_count; i++) { struct page *p = as_page(pages[i]); struct kbase_page_metadata *page_md = kbase_page_private(p); @@ -3218,7 +3060,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi * of all physical pages. In case of errors, e.g. too many mappings, * make the page not movable to prevent trouble. */ - if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type)) + if (kbase_is_page_migration_enabled() && !kbase_mem_is_imported(reg->gpu_alloc->type)) kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags); kfree(pages); @@ -3307,7 +3149,7 @@ out_unlock: } void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, - struct kbase_vmap_struct *map) + struct kbase_vmap_struct *map) { /* 0 is specified for prot_request to indicate no access checks should * be made. @@ -3319,8 +3161,7 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, } KBASE_EXPORT_TEST_API(kbase_vmap); -static void kbase_vunmap_phy_pages(struct kbase_context *kctx, - struct kbase_vmap_struct *map) +static void kbase_vunmap_phy_pages(struct kbase_context *kctx, struct kbase_vmap_struct *map) { void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); @@ -3330,7 +3171,7 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, * for all physical pages. Now is a good time to do it because references * haven't been released yet. */ - if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) { + if (kbase_is_page_migration_enabled() && !kbase_mem_is_imported(map->gpu_alloc->type)) { const size_t page_count = PFN_UP(map->offset_in_page + map->size); struct tagged_addr *pages_array = map->cpu_pages; @@ -3342,7 +3183,7 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) { size_t page_count = PFN_UP(map->offset_in_page + map->size); - WARN_ON(page_count > atomic_read(&kctx->permanent_mapped_pages)); + WARN_ON(page_count > (size_t)atomic_read(&kctx->permanent_mapped_pages)); atomic_sub(page_count, &kctx->permanent_mapped_pages); } @@ -3366,11 +3207,14 @@ KBASE_EXPORT_TEST_API(kbase_vunmap); static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) { -#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) - /* To avoid the build breakage due to an unexported kernel symbol - * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards, - * we inline here the equivalent of 'add_mm_counter()' from linux - * kernel V5.4.0~8. +#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) + /* To avoid the build breakage due to the type change in rss_stat, + * we inline here the equivalent of 'add_mm_counter()' from linux kernel V6.2. + */ + percpu_counter_add(&mm->rss_stat[member], value); +#elif (KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE) + /* To avoid the build breakage due to an unexported kernel symbol 'mm_trace_rss_stat', + * we inline here the equivalent of 'add_mm_counter()' from linux kernel V5.5. */ atomic_long_add(value, &mm->rss_stat.count[member]); #else @@ -3395,21 +3239,42 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) #endif } +static void kbase_special_vm_open(struct vm_area_struct *vma) +{ + struct kbase_context *kctx = vma->vm_private_data; + + kbase_file_inc_cpu_mapping_count(kctx->kfile); +} + +static void kbase_special_vm_close(struct vm_area_struct *vma) +{ + struct kbase_context *kctx = vma->vm_private_data; + + kbase_file_dec_cpu_mapping_count(kctx->kfile); +} + +static const struct vm_operations_struct kbase_vm_special_ops = { + .open = kbase_special_vm_open, + .close = kbase_special_vm_close, +}; + static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) { if (vma_pages(vma) != 1) return -EINVAL; /* no real access */ - vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; + vm_flags_clear(vma, VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); + vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO); + vma->vm_ops = &kbase_vm_special_ops; + vma->vm_private_data = kctx; + kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; } #if MALI_USE_CSF -static unsigned long get_queue_doorbell_pfn(struct kbase_device *kbdev, - struct kbase_queue *queue) +static unsigned long get_queue_doorbell_pfn(struct kbase_device *kbdev, struct kbase_queue *queue) { lockdep_assert_held(&kbdev->csf.reg_lock); @@ -3429,18 +3294,25 @@ static unsigned long get_queue_doorbell_pfn(struct kbase_device *kbdev, static int #if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE || \ - KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE) + KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE) kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma) +{ #else kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma, unsigned long flags) -#endif { + CSTD_UNUSED(flags); +#endif + CSTD_UNUSED(vma); + pr_debug("Unexpected call to mremap method for User IO pages mapping vma\n"); return -EINVAL; } static int kbase_csf_user_io_pages_vm_split(struct vm_area_struct *vma, unsigned long addr) { + CSTD_UNUSED(vma); + CSTD_UNUSED(addr); + pr_debug("Unexpected call to split method for User IO pages mapping vma\n"); return -EINVAL; } @@ -3458,6 +3330,7 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) struct kbase_device *kbdev; int err; bool reset_prevented = false; + struct kbase_file *kfile; if (!queue) { pr_debug("Close method called for the new User IO pages mapping vma\n"); @@ -3466,6 +3339,7 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) kctx = queue->kctx; kbdev = kctx->kbdev; + kfile = kctx->kfile; err = kbase_reset_gpu_prevent_and_wait(kbdev); if (err) @@ -3483,13 +3357,13 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) if (reset_prevented) kbase_reset_gpu_allow(kbdev); + kbase_file_dec_cpu_mapping_count(kfile); /* Now as the vma is closed, drop the reference on mali device file */ - fput(kctx->filp); + fput(kfile->filp); } #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) -static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) +static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { #else static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf) @@ -3537,25 +3411,25 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf) if (vmf->address == doorbell_cpu_addr) { #endif doorbell_page_pfn = get_queue_doorbell_pfn(kbdev, queue); - ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, - KBASE_MEM_GROUP_CSF_IO, vma, doorbell_cpu_addr, - doorbell_page_pfn, doorbell_pgprot); + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, KBASE_MEM_GROUP_CSF_IO, vma, + doorbell_cpu_addr, doorbell_page_pfn, + doorbell_pgprot); } else { /* Map the Input page */ input_cpu_addr = doorbell_cpu_addr + PAGE_SIZE; input_page_pfn = PFN_DOWN(as_phys_addr_t(queue->phys[0])); - ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, - KBASE_MEM_GROUP_CSF_IO, vma, input_cpu_addr, - input_page_pfn, input_page_pgprot); + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, KBASE_MEM_GROUP_CSF_IO, vma, + input_cpu_addr, input_page_pfn, + input_page_pgprot); if (ret != VM_FAULT_NOPAGE) goto exit; /* Map the Output page */ output_cpu_addr = input_cpu_addr + PAGE_SIZE; output_page_pfn = PFN_DOWN(as_phys_addr_t(queue->phys[1])); - ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, - KBASE_MEM_GROUP_CSF_IO, vma, output_cpu_addr, - output_page_pfn, output_page_pgprot); + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, KBASE_MEM_GROUP_CSF_IO, vma, + output_cpu_addr, output_page_pfn, + output_page_pgprot); } exit: @@ -3579,11 +3453,9 @@ static const struct vm_operations_struct kbase_csf_user_io_pages_vm_ops = { * input/output pages & Hw doorbell page. The caller should have validated that * vma->vm_pgoff maps to the range of csf cookies. */ -static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, - struct vm_area_struct *vma) +static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct vm_area_struct *vma) { - unsigned long cookie = - vma->vm_pgoff - PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); + unsigned long cookie = vma->vm_pgoff - PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); size_t nr_pages = vma_pages(vma); struct kbase_queue *queue; int err = 0; @@ -3617,13 +3489,13 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, if (err) goto map_failed; - vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; + vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); /* TODO use VM_MIXEDMAP, since it is more appropriate as both types of * memory with and without "struct page" backing are being inserted here. * Hw Doorbell pages comes from the device register area so kernel does * not use "struct page" for them. */ - vma->vm_flags |= VM_PFNMAP; + vm_flags_set(vma, VM_PFNMAP); vma->vm_ops = &kbase_csf_user_io_pages_vm_ops; vma->vm_private_data = queue; @@ -3637,6 +3509,7 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, /* Also adjust the vm_pgoff */ vma->vm_pgoff = queue->db_file_offset; + kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; map_failed: @@ -3676,6 +3549,7 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) { struct kbase_context *kctx = vma->vm_private_data; struct kbase_device *kbdev; + struct kbase_file *kfile; if (unlikely(!kctx)) { pr_debug("Close function called for the unexpected mapping"); @@ -3683,6 +3557,7 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) } kbdev = kctx->kbdev; + kfile = kctx->kfile; if (unlikely(!kctx->csf.user_reg.vma)) dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d", @@ -3694,8 +3569,9 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) kctx->csf.user_reg.vma = NULL; + kbase_file_dec_cpu_mapping_count(kfile); /* Now as the VMA is closed, drop the reference on mali device file */ - fput(kctx->filp); + fput(kfile->filp); } /** @@ -3712,19 +3588,22 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) */ static int #if ((KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) || \ - (KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE)) + (KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE)) kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma) +{ #else kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma, unsigned long flags) -#endif { + CSTD_UNUSED(flags); +#endif + CSTD_UNUSED(vma); + pr_debug("Unexpected call to mremap method for USER page mapping vma\n"); return -EINVAL; } #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) -static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) +static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { #else static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) @@ -3764,10 +3643,8 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); list_move_tail(&kctx->csf.user_reg.link, &kbdev->csf.user_reg.list); - ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, - KBASE_MEM_GROUP_CSF_FW, vma, - vma->vm_start, pfn, - vma->vm_page_prot); + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, KBASE_MEM_GROUP_CSF_FW, vma, + vma->vm_start, pfn, vma->vm_page_prot); mutex_unlock(&kbdev->csf.reg_lock); @@ -3781,8 +3658,7 @@ static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = { .fault = kbase_csf_user_reg_vm_fault }; -static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, - struct vm_area_struct *vma) +static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma) { size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); struct kbase_device *kbdev = kctx->kbdev; @@ -3800,12 +3676,12 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, /* Map uncached */ vma->vm_page_prot = pgprot_device(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; + vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); /* User register page comes from the device register area so * "struct page" isn't available for it. */ - vma->vm_flags |= VM_PFNMAP; + vm_flags_set(vma, VM_PFNMAP); kctx->csf.user_reg.vma = vma; @@ -3825,6 +3701,7 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, vma->vm_ops = &kbase_csf_user_reg_vm_ops; vma->vm_private_data = kctx; + kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h index 6dda44b9f128..28666037d8c6 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010, 2012-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,9 +28,9 @@ /* A HWC dump mapping */ struct kbase_hwc_dma_mapping { - void *cpu_va; - dma_addr_t dma_pa; - size_t size; + void *cpu_va; + dma_addr_t dma_pa; + size_t size; }; /** @@ -67,8 +67,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages * * Return: 0 on success or error code */ -int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, - u64 *const out); +int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *const out); /** * kbase_mem_import - Import the external memory for use by the GPU @@ -85,8 +84,7 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, * Return: 0 on success or error code */ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, - void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, - u64 *flags); + void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, u64 *flags); /** * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more @@ -101,7 +99,8 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, * * Return: 0 on failure or otherwise the GPU VA for the alias */ -u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); +u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, + struct base_mem_aliasing_info *ai, u64 *num_pages); /** * kbase_mem_flags_change - Change the flags for a memory region @@ -113,7 +112,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent * * Return: 0 on success or error code */ -int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); +int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, + unsigned int mask); /** * kbase_mem_commit - Change the physical backing size of a region @@ -135,8 +135,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages); * * Return: 0 on success or error code */ -int kbase_mem_shrink(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 new_pages); +int kbase_mem_shrink(struct kbase_context *kctx, struct kbase_va_region *reg, u64 new_pages); /** * kbase_context_mmap - Memory map method, gets invoked when mmap system call is @@ -150,10 +149,11 @@ int kbase_mem_shrink(struct kbase_context *kctx, int kbase_context_mmap(struct kbase_context *kctx, struct vm_area_struct *vma); /** - * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction - * mechanism. + * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction mechanism. * @kctx: The kbase context to initialize. * + * This function must be called only when a kbase context is instantiated. + * * Return: Zero on success or -errno on failure. */ int kbase_mem_evictable_init(struct kbase_context *kctx); @@ -180,17 +180,14 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx); * * Note: Caller must be holding the region lock. */ -int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 new_pages, - u64 old_pages, +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 new_pages, u64 old_pages, enum kbase_caller_mmu_sync_info mmu_sync_info); /** * kbase_mem_evictable_make - Make a physical allocation eligible for eviction * @gpu_alloc: The physical allocation to make evictable * - * Return: 0 on success, -errno on error. - * * Take the provided region and make all the physical pages within it * reclaimable by the kernel, updating the per-process VM stats as well. * Remove any CPU mappings (as these can't be removed in the shrinker callback @@ -199,7 +196,7 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, * * Note: Must be called with the region lock of the containing context. */ -int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); +void kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); /** * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for @@ -362,7 +359,7 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, * has been imported. */ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, - struct kbase_vmap_struct *map); + struct kbase_vmap_struct *map); /** * kbase_vunmap - Unmap a GPU VA range from the kernel @@ -394,8 +391,8 @@ extern const struct vm_operations_struct kbase_vm_ops; * Note: The caller shall ensure that CPU mapping is not revoked & remains * active whilst the maintenance is in progress. */ -void kbase_sync_mem_regions(struct kbase_context *kctx, - struct kbase_vmap_struct *map, enum kbase_sync_type dest); +void kbase_sync_mem_regions(struct kbase_context *kctx, struct kbase_vmap_struct *map, + enum kbase_sync_type dest); /** * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation @@ -407,9 +404,8 @@ void kbase_sync_mem_regions(struct kbase_context *kctx, * Shrink (or completely remove) all CPU mappings which reference the shrunk * part of the allocation. */ -void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); +void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); /** * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a @@ -420,8 +416,7 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, * This function will unmap the kernel mapping, and free any structures used to * track it. */ -void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc); +void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc); /** * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent @@ -465,7 +460,7 @@ void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, * - the region containing @gpu_addr does not support a permanent kernel mapping */ void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr, - struct kbase_vmap_struct **out_kern_mapping); + struct kbase_vmap_struct **out_kern_mapping); /** * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a @@ -487,7 +482,7 @@ void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr, * the physical allocation is still present. */ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, - struct kbase_vmap_struct *kern_mapping); + struct kbase_vmap_struct *kern_mapping); /** * kbase_get_cache_line_alignment - Return cache line alignment @@ -501,21 +496,6 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, */ u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev); -#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE) -static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, pgprot_t pgprot) -{ - int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); - - if (unlikely(err == -ENOMEM)) - return VM_FAULT_OOM; - if (unlikely(err < 0 && err != -EBUSY)) - return VM_FAULT_SIGBUS; - - return VM_FAULT_NOPAGE; -} -#endif - /** * kbase_mem_get_process_mmap_lock - Return the mmap lock for the current process * @@ -530,4 +510,4 @@ static inline struct rw_semaphore *kbase_mem_get_process_mmap_lock(void) #endif /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */ } -#endif /* _KBASE_MEM_LINUX_H_ */ +#endif /* _KBASE_MEM_LINUX_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h index 5a1bb16cca01..cb3b5038554c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2014, 2016-2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,34 +29,37 @@ #include /* Flags for kbase_phy_allocator_pages_alloc */ -#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ -#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ -#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ +#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ +#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ +#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ -#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON) +#define KBASE_PHY_PAGES_SUPPORTED_FLAGS \ + (KBASE_PHY_PAGES_FLAG_DEFAULT | KBASE_PHY_PAGES_FLAG_CLEAR | KBASE_PHY_PAGES_FLAG_POISON) -#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ +#define KBASE_PHY_PAGES_POISON_VALUE \ + 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ -enum kbase_sync_type { - KBASE_SYNC_TO_CPU, - KBASE_SYNC_TO_DEVICE +enum kbase_sync_type { KBASE_SYNC_TO_CPU, KBASE_SYNC_TO_DEVICE }; + +struct tagged_addr { + phys_addr_t tagged_addr; }; -struct tagged_addr { phys_addr_t tagged_addr; }; - -#define HUGE_PAGE (1u << 0) -#define HUGE_HEAD (1u << 1) +#define HUGE_PAGE (1u << 0) +#define HUGE_HEAD (1u << 1) #define FROM_PARTIAL (1u << 2) #define NUM_4K_PAGES_IN_2MB_PAGE (SZ_2M / SZ_4K) +#define KBASE_INVALID_PHYSICAL_ADDRESS (~(phys_addr_t)0 & PAGE_MASK) + /* * Note: if macro for converting physical address to page is not defined * in the kernel itself, it is defined hereby. This is to avoid build errors * which are reported during builds for some architectures. */ #ifndef phys_to_page -#define phys_to_page(phys) (pfn_to_page((phys) >> PAGE_SHIFT)) +#define phys_to_page(phys) (pfn_to_page((phys) >> PAGE_SHIFT)) #endif /** @@ -142,7 +145,7 @@ static inline bool is_huge(struct tagged_addr t) */ static inline bool is_huge_head(struct tagged_addr t) { - int mask = HUGE_HEAD | HUGE_PAGE; + phys_addr_t mask = HUGE_HEAD | HUGE_PAGE; return mask == (t.tagged_addr & mask); } @@ -176,4 +179,16 @@ static inline unsigned int index_in_large_page(struct tagged_addr t) return (PFN_DOWN(as_phys_addr_t(t)) & (NUM_4K_PAGES_IN_2MB_PAGE - 1)); } +/** + * is_valid_addr() - Check if the physical page has a valid address + * + * @t: tagged address storing the tag in the lower order bits. + * + * Return: true if page has valid physical address, or false + */ +static inline bool is_valid_addr(struct tagged_addr t) +{ + return (as_phys_addr_t(t) != KBASE_INVALID_PHYSICAL_ADDRESS); +} + #endif /* _KBASE_LOWLEVEL_H */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c index 737f7da5595d..c59036201fbe 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c @@ -32,10 +32,33 @@ * provided and if page migration feature is enabled. * Feature is disabled on all platforms by default. */ -int kbase_page_migration_enabled; +#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) +/* If page migration support is explicitly compiled out, there should be no way to change + * this int. Its value is automatically 0 as a global. + */ +const int kbase_page_migration_enabled; +/* module_param is not called so this value cannot be changed at insmod when compiled + * without support for page migration. + */ +#else +/* -1 as default, 0 when manually set as off and 1 when manually set as on */ +int kbase_page_migration_enabled = -1; module_param(kbase_page_migration_enabled, int, 0444); +MODULE_PARM_DESC(kbase_page_migration_enabled, + "Explicitly enable or disable page migration with 1 or 0 respectively."); +#endif /* !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) */ + KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); +bool kbase_is_page_migration_enabled(void) +{ + /* Handle uninitialised int case */ + if (kbase_page_migration_enabled < 0) + return false; + return IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) && kbase_page_migration_enabled; +} +KBASE_EXPORT_SYMBOL(kbase_is_page_migration_enabled); + #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) static const struct movable_operations movable_ops; #endif @@ -43,9 +66,15 @@ static const struct movable_operations movable_ops; bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, u8 group_id) { - struct kbase_page_metadata *page_md = - kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); + struct kbase_page_metadata *page_md; + /* A check for kbase_page_migration_enabled would help here too but it's already being + * checked in the only caller of this function. + */ + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return false; + + page_md = kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); if (!page_md) return false; @@ -95,6 +124,8 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, struct kbase_page_metadata *page_md; dma_addr_t dma_addr; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; page_md = kbase_page_private(p); if (!page_md) return; @@ -109,6 +140,10 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, ClearPagePrivate(p); } +#if IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) +/* This function is only called when page migration + * support is not explicitly compiled out. + */ static void kbase_free_pages_worker(struct work_struct *work) { struct kbase_mem_migrate *mem_migrate = @@ -121,14 +156,13 @@ static void kbase_free_pages_worker(struct work_struct *work) spin_lock(&mem_migrate->free_pages_lock); list_splice_init(&mem_migrate->free_pages_list, &free_list); spin_unlock(&mem_migrate->free_pages_lock); - list_for_each_entry_safe(p, tmp, &free_list, lru) { u8 group_id = 0; list_del_init(&p->lru); lock_page(p); page_md = kbase_page_private(p); - if (IS_PAGE_MOVABLE(page_md->status)) { + if (page_md && IS_PAGE_MOVABLE(page_md->status)) { __ClearPageMovable(p); page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); } @@ -138,11 +172,14 @@ static void kbase_free_pages_worker(struct work_struct *work) kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0); } } +#endif void kbase_free_page_later(struct kbase_device *kbdev, struct page *p) { struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; spin_lock(&mem_migrate->free_pages_lock); list_add(&p->lru, &mem_migrate->free_pages_list); spin_unlock(&mem_migrate->free_pages_lock); @@ -161,6 +198,9 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p) * the movable property. The meta data attached to the PGD page is transferred to the * new (replacement) page. * + * This function returns early with an error if called when not compiled with + * CONFIG_PAGE_MIGRATION_SUPPORT. + * * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure * and the migration is aborted. */ @@ -173,6 +213,9 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new dma_addr_t new_dma_addr; int ret; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return -EINVAL; + /* Create a new dma map for the new page */ new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(kbdev->dev, new_dma_addr)) @@ -229,6 +272,9 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new * allocation, which is used to create CPU mappings. Before returning, the new * page shall be set as movable and not isolated, while the old page shall lose * the movable property. + * + * This function returns early with an error if called when not compiled with + * CONFIG_PAGE_MIGRATION_SUPPORT. */ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page) { @@ -237,6 +283,8 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa dma_addr_t old_dma_addr, new_dma_addr; int ret; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return -EINVAL; old_dma_addr = page_md->dma_addr; new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr)) @@ -248,8 +296,8 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa kbase_gpu_vm_lock(kctx); /* Unmap the old physical range. */ - unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT, - PAGE_SIZE, 1); + unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping, + page_md->data.mapped.vpfn << PAGE_SHIFT, PAGE_SIZE, 1); ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr, new_dma_addr, @@ -294,6 +342,7 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa * @mode: LRU Isolation modes. * * Callback function for Linux to isolate a page and prepare it for migration. + * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT. * * Return: true on success, false otherwise. */ @@ -303,6 +352,8 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) struct kbase_mem_pool *mem_pool = NULL; struct kbase_page_metadata *page_md = kbase_page_private(p); + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return false; CSTD_UNUSED(mode); if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) @@ -394,6 +445,7 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) * * Callback function for Linux to migrate the content of the old page to the * new page provided. + * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT. * * Return: 0 on success, error code otherwise. */ @@ -419,7 +471,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum #endif CSTD_UNUSED(mode); - if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) + if (!kbase_is_page_migration_enabled() || !page_md || !IS_PAGE_MOVABLE(page_md->status)) return -EINVAL; if (!spin_trylock(&page_md->migrate_lock)) @@ -505,6 +557,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum * will only be called for a page that has been isolated but failed to * migrate. This function will put back the given page to the state it was * in before it was isolated. + * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT. */ static void kbase_page_putback(struct page *p) { @@ -514,6 +567,8 @@ static void kbase_page_putback(struct page *p) struct kbase_page_metadata *page_md = kbase_page_private(p); struct kbase_device *kbdev = NULL; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; /* If we don't have page metadata, the page may not belong to the * driver or may already have been freed, and there's nothing we can do */ @@ -591,6 +646,9 @@ static const struct address_space_operations kbase_address_space_ops = { #if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp) { + if (!kbase_is_page_migration_enabled()) + return; + mutex_lock(&kbdev->fw_load_lock); if (filp) { @@ -613,26 +671,41 @@ void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct void kbase_mem_migrate_init(struct kbase_device *kbdev) { +#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) + /* Page migration explicitly disabled at compile time - do nothing */ + return; +#else struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + /* Page migration support compiled in, either explicitly or + * by default, so the default behaviour is to follow the choice + * of large pages if not selected at insmod. Check insmod parameter + * integer for a negative value to see if insmod parameter was + * passed in at all (it will override the default negative value). + */ if (kbase_page_migration_enabled < 0) - kbase_page_migration_enabled = 0; + kbase_page_migration_enabled = kbdev->pagesize_2mb ? 1 : 0; + else + dev_info(kbdev->dev, "Page migration support explicitly %s at insmod.", + kbase_page_migration_enabled ? "enabled" : "disabled"); spin_lock_init(&mem_migrate->free_pages_lock); INIT_LIST_HEAD(&mem_migrate->free_pages_list); -#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) - mem_migrate->inode = NULL; -#endif mem_migrate->free_pages_workq = alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1); INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker); +#endif } void kbase_mem_migrate_term(struct kbase_device *kbdev) { struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; +#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) + /* Page migration explicitly disabled at compile time - do nothing */ + return; +#endif if (mem_migrate->free_pages_workq) destroy_workqueue(mem_migrate->free_pages_workq); #if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h index 76bbc999e110..a51a6ce19754 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h @@ -18,6 +18,8 @@ * http://www.gnu.org/licenses/gpl-2.0.html. * */ +#ifndef _KBASE_MEM_MIGRATE_H +#define _KBASE_MEM_MIGRATE_H /** * DOC: Base kernel page migration implementation. @@ -29,7 +31,7 @@ #define PAGE_ISOLATE_SHIFT (7) #define PAGE_ISOLATE_MASK ((u8)1 << PAGE_ISOLATE_SHIFT) -#define PAGE_ISOLATE_SET(status, value) \ +#define PAGE_ISOLATE_SET(status, value) \ ((status & ~PAGE_ISOLATE_MASK) | (value << PAGE_ISOLATE_SHIFT)) #define IS_PAGE_ISOLATED(status) ((bool)(status & PAGE_ISOLATE_MASK)) @@ -43,7 +45,11 @@ /* Global integer used to determine if module parameter value has been * provided and if page migration feature is enabled. */ +#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) +extern const int kbase_page_migration_enabled; +#else extern int kbase_page_migration_enabled; +#endif /** * kbase_alloc_page_metadata - Allocate and initialize page metadata @@ -63,6 +69,8 @@ extern int kbase_page_migration_enabled; bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, u8 group_id); +bool kbase_is_page_migration_enabled(void); + /** * kbase_free_page_later - Defer freeing of given page. * @kbdev: Pointer to kbase device @@ -94,6 +102,7 @@ void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct * * Enables page migration by default based on GPU and setup work queue to * defer freeing pages during page migration callbacks. + * This function must be called only when a kbase device is initialized. */ void kbase_mem_migrate_init(struct kbase_device *kbdev); @@ -106,3 +115,5 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev); * and destroy workqueue associated. */ void kbase_mem_migrate_term(struct kbase_device *kbdev); + +#endif /* _KBASE_migrate_H */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c index fa8f34d86c24..159d84042366 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,12 +34,10 @@ #include #endif -#define pool_dbg(pool, format, ...) \ - dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ - (pool->next_pool) ? "kctx" : "kbdev", \ - kbase_mem_pool_size(pool), \ - kbase_mem_pool_max_size(pool), \ - ##__VA_ARGS__) +#define pool_dbg(pool, format, ...) \ + dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ + (pool->next_pool) ? "kctx" : "kbdev", kbase_mem_pool_size(pool), \ + kbase_mem_pool_max_size(pool), ##__VA_ARGS__) #define NOT_DIRTY false #define NOT_RECLAIMED false @@ -51,34 +49,28 @@ * @page_owner: Pointer to the task/process that created the Kbase context * for which a page needs to be allocated. It can be NULL if * the page won't be associated with Kbase context. - * @alloc_from_kthread: Flag indicating that the current thread is a kernel thread. * - * This function checks if the current thread is a kernel thread and can make a - * request to kernel to allocate a physical page. If the kernel thread is allocating - * a page for the Kbase context and the process that created the context is exiting - * or is being killed, then there is no point in doing a page allocation. + * This function checks if the current thread can make a request to kernel to + * allocate a physical page. If the process that created the context is exiting or + * is being killed, then there is no point in doing a page allocation. * * The check done by the function is particularly helpful when the system is running * low on memory. When a page is allocated from the context of a kernel thread, OoM * killer doesn't consider the kernel thread for killing and kernel keeps retrying * to allocate the page as long as the OoM killer is able to kill processes. - * The check allows kernel thread to quickly exit the page allocation loop once OoM - * killer has initiated the killing of @page_owner, thereby unblocking the context - * termination for @page_owner and freeing of GPU memory allocated by it. This helps - * in preventing the kernel panic and also limits the number of innocent processes + * The check allows to quickly exit the page allocation loop once OoM killer has + * initiated the killing of @page_owner, thereby unblocking the context termination + * for @page_owner and freeing of GPU memory allocated by it. This helps in + * preventing the kernel panic and also limits the number of innocent processes * that get killed. * * Return: true if the page can be allocated otherwise false. */ -static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner, - const bool alloc_from_kthread) +static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner) { - if (likely(!alloc_from_kthread || !page_owner)) - return true; - - if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) { - dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", - __func__, page_owner->comm, task_pid_nr(page_owner)); + if (page_owner && ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner))) { + dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", __func__, page_owner->comm, + task_pid_nr(page_owner)); return false; } @@ -115,17 +107,21 @@ static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page * Only update page status and add the page to the memory pool if * it is not isolated. */ - spin_lock(&page_md->migrate_lock); - if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) { + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) not_movable = true; - } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) { - page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL); - page_md->data.mem_pool.pool = pool; - page_md->data.mem_pool.kbdev = pool->kbdev; - list_add(&p->lru, page_list); - (*list_size)++; + else { + spin_lock(&page_md->migrate_lock); + if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) { + not_movable = true; + } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) { + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL); + page_md->data.mem_pool.pool = pool; + page_md->data.mem_pool.kbdev = pool->kbdev; + list_add(&p->lru, page_list); + (*list_size)++; + } + spin_unlock(&page_md->migrate_lock); } - spin_unlock(&page_md->migrate_lock); if (not_movable) { kbase_free_page_later(pool->kbdev, p); @@ -135,14 +131,13 @@ static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page return not_movable; } -static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, - struct page *p) +static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, struct page *p) { bool queue_work_to_free = false; lockdep_assert_held(&pool->pool_lock); - if (!pool->order && kbase_page_migration_enabled) { + if (!pool->order && kbase_is_page_migration_enabled()) { if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) queue_work_to_free = true; } else { @@ -166,14 +161,14 @@ static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) kbase_mem_pool_unlock(pool); } -static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, - struct list_head *page_list, size_t nr_pages) +static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, struct list_head *page_list, + size_t nr_pages) { bool queue_work_to_free = false; lockdep_assert_held(&pool->pool_lock); - if (!pool->order && kbase_page_migration_enabled) { + if (!pool->order && kbase_is_page_migration_enabled()) { struct page *p, *tmp; list_for_each_entry_safe(p, tmp, page_list, lru) { @@ -195,8 +190,8 @@ static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, pool_dbg(pool, "added %zu pages\n", nr_pages); } -static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, - struct list_head *page_list, size_t nr_pages) +static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, struct list_head *page_list, + size_t nr_pages) { kbase_mem_pool_lock(pool); kbase_mem_pool_add_list_locked(pool, page_list, nr_pages); @@ -215,7 +210,7 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool, p = list_first_entry(&pool->page_list, struct page, lru); - if (!pool->order && kbase_page_migration_enabled) { + if (!pool->order && kbase_is_page_migration_enabled()) { struct kbase_page_metadata *page_md = kbase_page_private(p); spin_lock(&page_md->migrate_lock); @@ -244,8 +239,7 @@ static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool, return p; } -static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, - struct page *p) +static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, struct page *p) { struct device *dev = pool->kbdev->dev; dma_addr_t dma_addr = pool->order ? kbase_dma_addr_as_priv(p) : kbase_dma_addr(p); @@ -253,19 +247,17 @@ static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, dma_sync_single_for_device(dev, dma_addr, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); } -static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, - struct page *p) +static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, struct page *p) { - int i; + uint i; for (i = 0; i < (1U << pool->order); i++) - clear_highpage(p+i); + clear_highpage(p + i); kbase_mem_pool_sync_page(pool, p); } -static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, - struct page *p) +static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, struct page *p) { /* Zero page before spilling */ kbase_mem_pool_zero_page(next_pool, p); @@ -273,37 +265,42 @@ static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, kbase_mem_pool_add(next_pool, p); } -struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) +struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool, const bool alloc_from_kthread) { struct page *p; gfp_t gfp = __GFP_ZERO; struct kbase_device *const kbdev = pool->kbdev; struct device *const dev = kbdev->dev; dma_addr_t dma_addr; - int i; + uint i; /* don't warn on higher order failures */ if (pool->order) gfp |= GFP_HIGHUSER | __GFP_NOWARN; else - gfp |= kbase_page_migration_enabled ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; + gfp |= kbase_is_page_migration_enabled() ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; + /* Do not invoke OOM killer if allocation is done from the context of kernel thread */ + if (alloc_from_kthread) { +#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) + gfp |= __GFP_RETRY_MAYFAIL; +#else + gfp |= __GFP_REPEAT; +#endif + } - p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev, - pool->group_id, gfp, pool->order); + p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev, pool->group_id, gfp, pool->order); if (!p) return NULL; - dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order), - DMA_BIDIRECTIONAL); + dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, dma_addr)) { - kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, - pool->group_id, p, pool->order); + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, pool->order); return NULL; } /* Setup page metadata for 4KB pages when page migration is enabled */ - if (!pool->order && kbase_page_migration_enabled) { + if (!pool->order && kbase_is_page_migration_enabled()) { INIT_LIST_HEAD(&p->lru); if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) { dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -324,7 +321,7 @@ static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool) { struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; - if (!pool->order && kbase_page_migration_enabled) + if (!pool->order && kbase_is_page_migration_enabled()) queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); } @@ -339,11 +336,11 @@ void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p) kbdev = pool->kbdev; - if (!pool->order && kbase_page_migration_enabled) { + if (!pool->order && kbase_is_page_migration_enabled()) { kbase_free_page_later(kbdev, p); pool_dbg(pool, "page to be freed to kernel later\n"); } else { - int i; + uint i; dma_addr_t dma_addr = kbase_dma_addr_as_priv(p); for (i = 0; i < (1u << pool->order); i++) @@ -357,8 +354,7 @@ void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p) } } -static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, - size_t nr_to_shrink) +static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, size_t nr_to_shrink) { struct page *p; size_t i; @@ -376,8 +372,7 @@ static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, return i; } -static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, - size_t nr_to_shrink) +static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, size_t nr_to_shrink) { size_t nr_freed; @@ -408,10 +403,10 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow, } kbase_mem_pool_unlock(pool); - if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) + if (unlikely(!can_alloc_page(pool, page_owner))) return -ENOMEM; - p = kbase_mem_alloc_page(pool); + p = kbase_mem_alloc_page(pool, alloc_from_kthread); if (!p) { kbase_mem_pool_lock(pool); pool->dont_reclaim = false; @@ -448,9 +443,10 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) if (err) { size_t grown_size = kbase_mem_pool_size(pool); - dev_warn(pool->kbdev->dev, - "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n", - (new_size - cur_size), (grown_size - cur_size)); + dev_warn( + pool->kbdev->dev, + "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n", + (new_size - cur_size), (grown_size - cur_size)); } } @@ -474,11 +470,13 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) KBASE_EXPORT_TEST_API(kbase_mem_pool_set_max_size); static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, - struct shrink_control *sc) + struct shrink_control *sc) { struct kbase_mem_pool *pool; size_t pool_size; + CSTD_UNUSED(sc); + pool = container_of(s, struct kbase_mem_pool, reclaim); kbase_mem_pool_lock(pool); @@ -496,7 +494,7 @@ static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, } static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, - struct shrink_control *sc) + struct shrink_control *sc) { struct kbase_mem_pool *pool; unsigned long freed; @@ -527,8 +525,7 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool unsigned int order, int group_id, struct kbase_device *kbdev, struct kbase_mem_pool *next_pool) { - if (WARN_ON(group_id < 0) || - WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { + if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { return -EINVAL; } @@ -577,7 +574,7 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) size_t nr_to_spill = 0; LIST_HEAD(spill_list); LIST_HEAD(free_list); - int i; + size_t i; pool_dbg(pool, "terminate()\n"); @@ -629,9 +626,10 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) /* Before returning wait to make sure there are no pages undergoing page isolation * which will require reference to this pool. */ - while (atomic_read(&pool->isolation_in_progress_cnt)) - cpu_relax(); - + if (kbase_is_page_migration_enabled()) { + while (atomic_read(&pool->isolation_in_progress_cnt)) + cpu_relax(); + } pool_dbg(pool, "terminated\n"); } KBASE_EXPORT_TEST_API(kbase_mem_pool_term); @@ -661,8 +659,7 @@ struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool) return kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); } -void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, - bool dirty) +void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, bool dirty) { struct kbase_mem_pool *next_pool = pool->next_pool; @@ -685,8 +682,7 @@ void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, } } -void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, - bool dirty) +void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, bool dirty) { pool_dbg(pool, "free_locked()\n"); @@ -730,17 +726,15 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool)); while (nr_from_pool--) { - int j; + uint j; p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); if (pool->order) { - pages[i++] = as_tagged_tag(page_to_phys(p), - HUGE_HEAD | HUGE_PAGE); + pages[i++] = as_tagged_tag(page_to_phys(p), HUGE_HEAD | HUGE_PAGE); for (j = 1; j < (1u << pool->order); j++) - pages[i++] = as_tagged_tag(page_to_phys(p) + - PAGE_SIZE * j, - HUGE_PAGE); + pages[i++] = + as_tagged_tag(page_to_phys(p) + PAGE_SIZE * j, HUGE_PAGE); } else { pages[i++] = as_tagged(page_to_phys(p)); } @@ -759,10 +753,10 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, } else { /* Get any remaining pages from kernel */ while (i != nr_4k_pages) { - if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) + if (unlikely(!can_alloc_page(pool, page_owner))) goto err_rollback; - p = kbase_mem_alloc_page(pool); + p = kbase_mem_alloc_page(pool, alloc_from_kthread); if (!p) { if (partial_allowed) goto done; @@ -771,17 +765,14 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, } if (pool->order) { - int j; + uint j; - pages[i++] = as_tagged_tag(page_to_phys(p), - HUGE_PAGE | - HUGE_HEAD); + pages[i++] = as_tagged_tag(page_to_phys(p), HUGE_PAGE | HUGE_HEAD); for (j = 1; j < (1u << pool->order); j++) { phys_addr_t phys; phys = page_to_phys(p) + PAGE_SIZE * j; - pages[i++] = as_tagged_tag(phys, - HUGE_PAGE); + pages[i++] = as_tagged_tag(phys, HUGE_PAGE); } } else { pages[i++] = as_tagged(page_to_phys(p)); @@ -798,8 +789,8 @@ err_rollback: return err; } -int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, - size_t nr_4k_pages, struct tagged_addr *pages) +int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, size_t nr_4k_pages, + struct tagged_addr *pages) { struct page *p; size_t i; @@ -813,8 +804,7 @@ int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, return -EINVAL; pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages); - pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n", - nr_pages_internal); + pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n", nr_pages_internal); if (kbase_mem_pool_size(pool) < nr_pages_internal) { pool_dbg(pool, "Failed alloc\n"); @@ -822,16 +812,14 @@ int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, } for (i = 0; i < nr_pages_internal; i++) { - int j; + uint j; p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); if (pool->order) { - *pages++ = as_tagged_tag(page_to_phys(p), - HUGE_HEAD | HUGE_PAGE); + *pages++ = as_tagged_tag(page_to_phys(p), HUGE_HEAD | HUGE_PAGE); for (j = 1; j < (1u << pool->order); j++) { - *pages++ = as_tagged_tag(page_to_phys(p) + - PAGE_SIZE * j, - HUGE_PAGE); + *pages++ = + as_tagged_tag(page_to_phys(p) + PAGE_SIZE * j, HUGE_PAGE); } } else { *pages++ = as_tagged(page_to_phys(p)); @@ -841,9 +829,8 @@ int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, return nr_4k_pages; } -static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, - size_t nr_pages, struct tagged_addr *pages, - bool zero, bool sync) +static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, size_t nr_pages, + struct tagged_addr *pages, bool zero, bool sync) { struct page *p; size_t nr_to_pool = 0; @@ -853,12 +840,11 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, if (!nr_pages) return; - pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n", - nr_pages, zero, sync); + pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n", nr_pages, zero, sync); /* Zero/sync pages first without holding the pool lock */ for (i = 0; i < nr_pages; i++) { - if (unlikely(!as_phys_addr_t(pages[i]))) + if (unlikely(!is_valid_addr(pages[i]))) continue; if (is_huge_head(pages[i]) || !is_huge(pages[i])) { @@ -871,19 +857,17 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, list_add(&p->lru, &new_page_list); nr_to_pool++; } - pages[i] = as_tagged(0); + pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); } /* Add new page list to pool */ kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool); - pool_dbg(pool, "add_array(%zu) added %zu pages\n", - nr_pages, nr_to_pool); + pool_dbg(pool, "add_array(%zu) added %zu pages\n", nr_pages, nr_to_pool); } -static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, - size_t nr_pages, struct tagged_addr *pages, - bool zero, bool sync) +static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, size_t nr_pages, + struct tagged_addr *pages, bool zero, bool sync) { struct page *p; size_t nr_to_pool = 0; @@ -895,12 +879,11 @@ static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, if (!nr_pages) return; - pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n", - nr_pages, zero, sync); + pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n", nr_pages, zero, sync); /* Zero/sync pages first */ for (i = 0; i < nr_pages; i++) { - if (unlikely(!as_phys_addr_t(pages[i]))) + if (unlikely(!is_valid_addr(pages[i]))) continue; if (is_huge_head(pages[i]) || !is_huge(pages[i])) { @@ -913,18 +896,17 @@ static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, list_add(&p->lru, &new_page_list); nr_to_pool++; } - pages[i] = as_tagged(0); + pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); } /* Add new page list to pool */ kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool); - pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n", - nr_pages, nr_to_pool); + pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n", nr_pages, nr_to_pool); } void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - struct tagged_addr *pages, bool dirty, bool reclaimed) + struct tagged_addr *pages, bool dirty, bool reclaimed) { struct kbase_mem_pool *next_pool = pool->next_pool; struct page *p; @@ -949,25 +931,24 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, nr_to_pool = kbase_mem_pool_capacity(next_pool); nr_to_pool = min(nr_pages - i, nr_to_pool); - kbase_mem_pool_add_array(next_pool, nr_to_pool, - pages + i, true, dirty); + kbase_mem_pool_add_array(next_pool, nr_to_pool, pages + i, true, dirty); i += nr_to_pool; } } /* Free any remaining pages to kernel */ for (; i < nr_pages; i++) { - if (unlikely(!as_phys_addr_t(pages[i]))) + if (unlikely(!is_valid_addr(pages[i]))) continue; if (is_huge(pages[i]) && !is_huge_head(pages[i])) { - pages[i] = as_tagged(0); + pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); continue; } p = as_page(pages[i]); kbase_mem_pool_free_page(pool, p); - pages[i] = as_tagged(0); + pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); pages_released = true; } @@ -978,10 +959,8 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); } - -void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, - size_t nr_pages, struct tagged_addr *pages, bool dirty, - bool reclaimed) +void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, size_t nr_pages, + struct tagged_addr *pages, bool dirty, bool reclaimed) { struct page *p; size_t nr_to_pool; @@ -998,26 +977,25 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, nr_to_pool = kbase_mem_pool_capacity(pool); nr_to_pool = min(nr_pages, nr_to_pool); - kbase_mem_pool_add_array_locked(pool, nr_to_pool, pages, false, - dirty); + kbase_mem_pool_add_array_locked(pool, nr_to_pool, pages, false, dirty); i += nr_to_pool; } /* Free any remaining pages to kernel */ for (; i < nr_pages; i++) { - if (unlikely(!as_phys_addr_t(pages[i]))) + if (unlikely(!is_valid_addr(pages[i]))) continue; if (is_huge(pages[i]) && !is_huge_head(pages[i])) { - pages[i] = as_tagged(0); + pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); continue; } p = as_page(pages[i]); kbase_mem_pool_free_page(pool, p); - pages[i] = as_tagged(0); + pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); pages_released = true; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c index 3b1b2bae15c8..b757a1941124 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,25 +25,21 @@ #include "mali_kbase_mem_pool_debugfs.h" #include "mali_kbase_debugfs_helper.h" -void kbase_mem_pool_debugfs_trim(void *const array, size_t const index, - size_t const value) +void kbase_mem_pool_debugfs_trim(void *const array, size_t const index, size_t const value) { struct kbase_mem_pool *const mem_pools = array; - if (WARN_ON(!mem_pools) || - WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + if (WARN_ON(!mem_pools) || WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return; kbase_mem_pool_trim(&mem_pools[index], value); } -void kbase_mem_pool_debugfs_set_max_size(void *const array, - size_t const index, size_t const value) +void kbase_mem_pool_debugfs_set_max_size(void *const array, size_t const index, size_t const value) { struct kbase_mem_pool *const mem_pools = array; - if (WARN_ON(!mem_pools) || - WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + if (WARN_ON(!mem_pools) || WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return; kbase_mem_pool_set_max_size(&mem_pools[index], value); @@ -53,8 +49,7 @@ size_t kbase_mem_pool_debugfs_size(void *const array, size_t const index) { struct kbase_mem_pool *const mem_pools = array; - if (WARN_ON(!mem_pools) || - WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + if (WARN_ON(!mem_pools) || WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return 0; return kbase_mem_pool_size(&mem_pools[index]); @@ -64,32 +59,28 @@ size_t kbase_mem_pool_debugfs_max_size(void *const array, size_t const index) { struct kbase_mem_pool *const mem_pools = array; - if (WARN_ON(!mem_pools) || - WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + if (WARN_ON(!mem_pools) || WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return 0; return kbase_mem_pool_max_size(&mem_pools[index]); } -void kbase_mem_pool_config_debugfs_set_max_size(void *const array, - size_t const index, size_t const value) +void kbase_mem_pool_config_debugfs_set_max_size(void *const array, size_t const index, + size_t const value) { struct kbase_mem_pool_config *const configs = array; - if (WARN_ON(!configs) || - WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + if (WARN_ON(!configs) || WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return; kbase_mem_pool_config_set_max_size(&configs[index], value); } -size_t kbase_mem_pool_config_debugfs_max_size(void *const array, - size_t const index) +size_t kbase_mem_pool_config_debugfs_max_size(void *const array, size_t const index) { struct kbase_mem_pool_config *const configs = array; - if (WARN_ON(!configs) || - WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + if (WARN_ON(!configs) || WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) return 0; return kbase_mem_pool_config_get_max_size(&configs[index]); @@ -98,25 +89,24 @@ size_t kbase_mem_pool_config_debugfs_max_size(void *const array, static int kbase_mem_pool_debugfs_size_show(struct seq_file *sfile, void *data) { CSTD_UNUSED(data); - return kbase_debugfs_helper_seq_read(sfile, - MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_size); + return kbase_debugfs_helper_seq_read(sfile, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_size); } -static ssize_t kbase_mem_pool_debugfs_write(struct file *file, - const char __user *ubuf, size_t count, loff_t *ppos) +static ssize_t kbase_mem_pool_debugfs_write(struct file *file, const char __user *ubuf, + size_t count, loff_t *ppos) { - int err; + ssize_t err; CSTD_UNUSED(ppos); - err = kbase_debugfs_helper_seq_write(file, ubuf, count, - MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_trim); - return err ? err : count; + err = kbase_debugfs_helper_seq_write(file, ubuf, count, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_trim); + return err ? err : (ssize_t)count; } static int kbase_mem_pool_debugfs_open(struct inode *in, struct file *file) { - return single_open(file, kbase_mem_pool_debugfs_size_show, - in->i_private); + return single_open(file, kbase_mem_pool_debugfs_size_show, in->i_private); } static const struct file_operations kbase_mem_pool_debugfs_fops = { @@ -128,32 +118,27 @@ static const struct file_operations kbase_mem_pool_debugfs_fops = { .release = single_release, }; -static int kbase_mem_pool_debugfs_max_size_show(struct seq_file *sfile, - void *data) +static int kbase_mem_pool_debugfs_max_size_show(struct seq_file *sfile, void *data) { CSTD_UNUSED(data); - return kbase_debugfs_helper_seq_read(sfile, - MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_max_size); + return kbase_debugfs_helper_seq_read(sfile, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); } -static ssize_t kbase_mem_pool_debugfs_max_size_write(struct file *file, - const char __user *ubuf, size_t count, loff_t *ppos) +static ssize_t kbase_mem_pool_debugfs_max_size_write(struct file *file, const char __user *ubuf, + size_t count, loff_t *ppos) { - int err; + ssize_t err; CSTD_UNUSED(ppos); - err = kbase_debugfs_helper_seq_write(file, ubuf, count, - MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_set_max_size); - return err ? err : count; + err = kbase_debugfs_helper_seq_write(file, ubuf, count, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); + return err ? err : (ssize_t)count; } -static int kbase_mem_pool_debugfs_max_size_open(struct inode *in, - struct file *file) +static int kbase_mem_pool_debugfs_max_size_open(struct inode *in, struct file *file) { - return single_open(file, kbase_mem_pool_debugfs_max_size_show, - in->i_private); + return single_open(file, kbase_mem_pool_debugfs_max_size_show, in->i_private); } static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = { @@ -165,20 +150,19 @@ static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = { .release = single_release, }; -void kbase_mem_pool_debugfs_init(struct dentry *parent, - struct kbase_context *kctx) +void kbase_mem_pool_debugfs_init(struct dentry *parent, struct kbase_context *kctx) { const mode_t mode = 0644; - debugfs_create_file("mem_pool_size", mode, parent, - &kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops); + debugfs_create_file("mem_pool_size", mode, parent, &kctx->mem_pools.small, + &kbase_mem_pool_debugfs_fops); - debugfs_create_file("mem_pool_max_size", mode, parent, - &kctx->mem_pools.small, &kbase_mem_pool_debugfs_max_size_fops); + debugfs_create_file("mem_pool_max_size", mode, parent, &kctx->mem_pools.small, + &kbase_mem_pool_debugfs_max_size_fops); - debugfs_create_file("lp_mem_pool_size", mode, parent, - &kctx->mem_pools.large, &kbase_mem_pool_debugfs_fops); + debugfs_create_file("lp_mem_pool_size", mode, parent, &kctx->mem_pools.large, + &kbase_mem_pool_debugfs_fops); - debugfs_create_file("lp_mem_pool_max_size", mode, parent, - &kctx->mem_pools.large, &kbase_mem_pool_debugfs_max_size_fops); + debugfs_create_file("lp_mem_pool_max_size", mode, parent, &kctx->mem_pools.large, + &kbase_mem_pool_debugfs_max_size_fops); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h index 207b58536a58..9787bee40c0f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,8 +35,7 @@ * - lp_mem_pool_size: get/set the current sizes of @kctx: lp_mem_pool * - lp_mem_pool_max_size: get/set the max sizes of @kctx:lp_mem_pool */ -void kbase_mem_pool_debugfs_init(struct dentry *parent, - struct kbase_context *kctx); +void kbase_mem_pool_debugfs_init(struct dentry *parent, struct kbase_context *kctx); /** * kbase_mem_pool_debugfs_trim - Grow or shrink a memory pool to a new size @@ -64,8 +63,7 @@ void kbase_mem_pool_debugfs_trim(void *array, size_t index, size_t value); * If the maximum size is reduced, the pool will be shrunk to adhere to the * new limit. For details see kbase_mem_pool_shrink(). */ -void kbase_mem_pool_debugfs_set_max_size(void *array, size_t index, - size_t value); +void kbase_mem_pool_debugfs_set_max_size(void *array, size_t index, size_t value); /** * kbase_mem_pool_debugfs_size - Get number of free pages in a memory pool @@ -102,8 +100,7 @@ size_t kbase_mem_pool_debugfs_max_size(void *array, size_t index); * @value : Maximum number of free pages that a memory pool created from the * selected configuration can hold. */ -void kbase_mem_pool_config_debugfs_set_max_size(void *array, size_t index, - size_t value); +void kbase_mem_pool_config_debugfs_set_max_size(void *array, size_t index, size_t value); /** * kbase_mem_pool_config_debugfs_max_size - Get maximum number of free pages @@ -118,5 +115,4 @@ void kbase_mem_pool_config_debugfs_set_max_size(void *array, size_t index, */ size_t kbase_mem_pool_config_debugfs_max_size(void *array, size_t index); -#endif /*_KBASE_MEM_POOL_DEBUGFS_H_ */ - +#endif /*_KBASE_MEM_POOL_DEBUGFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c index 49c4b041e13a..c1fcca6b47dc 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,21 +25,17 @@ #include -void kbase_mem_pool_group_config_set_max_size( - struct kbase_mem_pool_group_config *const configs, - size_t const max_size) +void kbase_mem_pool_group_config_set_max_size(struct kbase_mem_pool_group_config *const configs, + size_t const max_size) { - size_t const large_max_size = max_size >> - (KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER - - KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); + size_t const large_max_size = max_size >> (KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER - + KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); int gid; for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { - kbase_mem_pool_config_set_max_size(&configs->small[gid], - max_size); + kbase_mem_pool_config_set_max_size(&configs->small[gid], max_size); - kbase_mem_pool_config_set_max_size(&configs->large[gid], - large_max_size); + kbase_mem_pool_config_set_max_size(&configs->large[gid], large_max_size); } } @@ -84,8 +80,7 @@ int kbase_mem_pool_group_init(struct kbase_mem_pool_group *const mem_pools, return err; } -void kbase_mem_pool_group_mark_dying( - struct kbase_mem_pool_group *const mem_pools) +void kbase_mem_pool_group_mark_dying(struct kbase_mem_pool_group *const mem_pools) { int gid; @@ -95,8 +90,7 @@ void kbase_mem_pool_group_mark_dying( } } -void kbase_mem_pool_group_term( - struct kbase_mem_pool_group *const mem_pools) +void kbase_mem_pool_group_term(struct kbase_mem_pool_group *const mem_pools) { int gid; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h index fe8ce775258f..27dd935c87cf 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,8 +34,8 @@ * * Return: A pointer to the selected memory pool. */ -static inline struct kbase_mem_pool *kbase_mem_pool_group_select( - struct kbase_device *kbdev, u32 mem_group_id, bool is_small_page) +static inline struct kbase_mem_pool * +kbase_mem_pool_group_select(struct kbase_device *kbdev, u32 mem_group_id, bool is_small_page) { if (WARN_ON(unlikely(kbdev == NULL))) return NULL; @@ -60,8 +60,8 @@ static inline struct kbase_mem_pool *kbase_mem_pool_group_select( * The equivalent number of 2 MiB pages is calculated automatically for the * purpose of configuring the large page pools. */ -void kbase_mem_pool_group_config_set_max_size( - struct kbase_mem_pool_group_config *configs, size_t max_size); +void kbase_mem_pool_group_config_set_max_size(struct kbase_mem_pool_group_config *configs, + size_t max_size); /** * kbase_mem_pool_group_init - Initialize a set of memory pools diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c index 9317023b71bb..dc41a3fa0503 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2017, 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,6 +39,8 @@ static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) { struct kbase_context *kctx = sfile->private; + CSTD_UNUSED(data); + mutex_lock(&kctx->mem_profile_lock); seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); @@ -66,8 +68,7 @@ static const struct file_operations kbasep_mem_profile_debugfs_fops = { .release = single_release, }; -int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size) +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, size_t size) { const mode_t mode = 0444; int err = 0; @@ -79,14 +80,13 @@ int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { - err = -ENOMEM; - } else if (IS_ERR_OR_NULL(debugfs_create_file("mem_profile", - mode, kctx->kctx_dentry, kctx, - &kbasep_mem_profile_debugfs_fops))) { + err = -ENOMEM; + } else if (IS_ERR_OR_NULL(debugfs_create_file("mem_profile", mode, + kctx->kctx_dentry, kctx, + &kbasep_mem_profile_debugfs_fops))) { err = -EAGAIN; } else { - kbase_ctx_flag_set(kctx, - KCTX_MEM_PROFILE_INITIALIZED); + kbase_ctx_flag_set(kctx, KCTX_MEM_PROFILE_INITIALIZED); } } @@ -98,8 +98,8 @@ int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, kfree(data); } - dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", - err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", err, + kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); mutex_unlock(&kctx->mem_profile_lock); @@ -111,7 +111,7 @@ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) mutex_lock(&kctx->mem_profile_lock); dev_dbg(kctx->kbdev->dev, "initialised: %d", - kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); kfree(kctx->mem_profile_data); kctx->mem_profile_data = NULL; @@ -122,8 +122,7 @@ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) #else /* CONFIG_DEBUG_FS */ -int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size) +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, size_t size) { kfree(data); return 0; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h index c30fca6652dc..d9729e3efbbb 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2016, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -57,8 +57,6 @@ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); * @post @ref mem_profile_initialized will be set to @c true * the first time this function succeeds. */ -int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size); - -#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, size_t size); +#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c index 10a7f506b1a4..e836011376b1 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,6 +26,8 @@ #include #include +#include + /** * kbase_native_mgm_alloc - Native physical memory allocation method * @@ -41,9 +43,8 @@ * * Return: Pointer to allocated page, or NULL if allocation failed. */ -static struct page *kbase_native_mgm_alloc( - struct memory_group_manager_device *mgm_dev, int group_id, - gfp_t gfp_mask, unsigned int order) +static struct page *kbase_native_mgm_alloc(struct memory_group_manager_device *mgm_dev, + int group_id, gfp_t gfp_mask, unsigned int order) { /* * Check that the base and the mgm defines, from separate header files, @@ -54,9 +55,8 @@ static struct page *kbase_native_mgm_alloc( * Check that the mask used for storing the memory group ID is big * enough for the largest possible memory group ID. */ - BUILD_BUG_ON((BASEP_CONTEXT_MMU_GROUP_ID_MASK - >> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) - < (BASE_MEM_GROUP_COUNT - 1)); + BUILD_BUG_ON((BASEP_CONTEXT_MMU_GROUP_ID_MASK >> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) < + (BASE_MEM_GROUP_COUNT - 1)); CSTD_UNUSED(mgm_dev); CSTD_UNUSED(group_id); @@ -78,8 +78,8 @@ static struct page *kbase_native_mgm_alloc( * * Delegates all memory freeing requests to the kernel's __free_pages function. */ -static void kbase_native_mgm_free(struct memory_group_manager_device *mgm_dev, - int group_id, struct page *page, unsigned int order) +static void kbase_native_mgm_free(struct memory_group_manager_device *mgm_dev, int group_id, + struct page *page, unsigned int order) { CSTD_UNUSED(mgm_dev); CSTD_UNUSED(group_id); @@ -104,10 +104,10 @@ static void kbase_native_mgm_free(struct memory_group_manager_device *mgm_dev, * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page table * entry was successfully installed. */ -static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot( - struct memory_group_manager_device *mgm_dev, int group_id, - struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, pgprot_t pgprot) +static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot(struct memory_group_manager_device *mgm_dev, + int group_id, struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, + pgprot_t pgprot) { CSTD_UNUSED(mgm_dev); CSTD_UNUSED(group_id); @@ -129,9 +129,8 @@ static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot( * * Return: A GPU page table entry to be stored in a page table. */ -static u64 -kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev, - int group_id, int mmu_level, u64 pte) +static u64 kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev, + int group_id, int mmu_level, u64 pte) { CSTD_UNUSED(mgm_dev); CSTD_UNUSED(group_id); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c index b65f9e7b5162..9f75c3371c15 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,9 +23,12 @@ #include #include -#define DTB_SET_SIZE 2 -static bool read_setting_valid(unsigned int id, unsigned int read_setting) +#if MALI_USE_CSF +#define DTB_SET_SIZE 2 +#endif + +static bool read_setting_valid(unsigned int prod_model, unsigned int id, unsigned int read_setting) { switch (id) { /* Valid ID - fall through all */ @@ -40,21 +43,26 @@ static bool read_setting_valid(unsigned int id, unsigned int read_setting) case SYSC_ALLOC_ID_R_ATTR: case SYSC_ALLOC_ID_R_SCM: case SYSC_ALLOC_ID_R_FSDC: - case SYSC_ALLOC_ID_R_VL: case SYSC_ALLOC_ID_R_PLR: case SYSC_ALLOC_ID_R_TEX: case SYSC_ALLOC_ID_R_LSC: - switch (read_setting) { - /* Valid setting value - fall through all */ - case SYSC_ALLOC_L2_ALLOC: - case SYSC_ALLOC_NEVER_ALLOC: - case SYSC_ALLOC_ALWAYS_ALLOC: - case SYSC_ALLOC_PTL_ALLOC: - case SYSC_ALLOC_L2_PTL_ALLOC: - return true; - default: + break; + case SYSC_ALLOC_ID_R_VL: + if (prod_model == GPU_ID_PRODUCT_TTIX) return false; - } + break; + default: + return false; + } + + switch (read_setting) { + /* Valid setting value - fall through all */ + case SYSC_ALLOC_L2_ALLOC: + case SYSC_ALLOC_NEVER_ALLOC: + case SYSC_ALLOC_ALWAYS_ALLOC: + case SYSC_ALLOC_PTL_ALLOC: + case SYSC_ALLOC_L2_PTL_ALLOC: + return true; default: return false; } @@ -63,8 +71,10 @@ static bool read_setting_valid(unsigned int id, unsigned int read_setting) return false; } -static bool write_setting_valid(unsigned int id, unsigned int write_setting) +static bool write_setting_valid(unsigned int prod_model, unsigned int id, + unsigned int write_setting) { + CSTD_UNUSED(prod_model); switch (id) { /* Valid ID - fall through all */ case SYSC_ALLOC_ID_W_OTHER: @@ -83,17 +93,19 @@ static bool write_setting_valid(unsigned int id, unsigned int write_setting) case SYSC_ALLOC_ID_W_TIB_DS_AFBCH: case SYSC_ALLOC_ID_W_TIB_DS_AFBCB: case SYSC_ALLOC_ID_W_LSC: - switch (write_setting) { - /* Valid setting value - fall through all */ - case SYSC_ALLOC_L2_ALLOC: - case SYSC_ALLOC_NEVER_ALLOC: - case SYSC_ALLOC_ALWAYS_ALLOC: - case SYSC_ALLOC_PTL_ALLOC: - case SYSC_ALLOC_L2_PTL_ALLOC: - return true; - default: - return false; - } + break; + default: + return false; + } + + switch (write_setting) { + /* Valid setting value - fall through all */ + case SYSC_ALLOC_L2_ALLOC: + case SYSC_ALLOC_NEVER_ALLOC: + case SYSC_ALLOC_ALWAYS_ALLOC: + case SYSC_ALLOC_PTL_ALLOC: + case SYSC_ALLOC_L2_PTL_ALLOC: + return true; default: return false; } @@ -112,14 +124,14 @@ struct settings_status { bool write; }; -static struct settings_status settings_valid(unsigned int id, unsigned int read_setting, - unsigned int write_setting) +static struct settings_status settings_valid(unsigned int prod_model, unsigned int id, + unsigned int read_setting, unsigned int write_setting) { - struct settings_status valid = { .overall = (id < SYSC_ALLOC_COUNT * sizeof(u32)) }; + struct settings_status valid = { .overall = (id < GPU_SYSC_ALLOC_COUNT * sizeof(u32)) }; if (valid.overall) { - valid.read = read_setting_valid(id, read_setting); - valid.write = write_setting_valid(id, write_setting); + valid.read = read_setting_valid(prod_model, id, read_setting); + valid.write = write_setting_valid(prod_model, id, write_setting); valid.overall = valid.read || valid.write; } @@ -128,32 +140,33 @@ static struct settings_status settings_valid(unsigned int id, unsigned int read_ bool kbasep_pbha_supported(struct kbase_device *kbdev) { - const u32 arch_maj_rev = - ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id); - - return (arch_maj_rev >= GPU_ID2_ARCH_MAJOR_REV_MAKE(11, 3)); + return kbdev->gpu_props.gpu_id.arch_id >= GPU_ID_ARCH_MAKE(11, 0, 3); } -int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime, - unsigned int id, unsigned int read_setting, - unsigned int write_setting) +int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime, unsigned int id, + unsigned int read_setting, unsigned int write_setting) { - struct settings_status const valid = settings_valid(id, read_setting, write_setting); + struct settings_status const valid = settings_valid(kbdev->gpu_props.gpu_id.product_model, + id, read_setting, write_setting); if (valid.overall) { unsigned int const sysc_alloc_num = id / sizeof(u32); u32 modified_reg; +#if MALI_USE_CSF if (runtime) { int i; kbase_pm_context_active(kbdev); /* Ensure host copy of SYSC_ALLOC is up to date */ - for (i = 0; i < SYSC_ALLOC_COUNT; i++) - kbdev->sysc_alloc[i] = kbase_reg_read( - kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i))); + for (i = 0; i < GPU_SYSC_ALLOC_COUNT; i++) + kbdev->sysc_alloc[i] = + kbase_reg_read32(kbdev, GPU_SYSC_ALLOC_OFFSET(i)); kbase_pm_context_idle(kbdev); } +#else + CSTD_UNUSED(runtime); +#endif /* MALI_USE_CSF */ modified_reg = kbdev->sysc_alloc[sysc_alloc_num]; @@ -161,34 +174,34 @@ int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime, case 0: modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC0_SET(modified_reg, read_setting) : - modified_reg; + modified_reg; modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC0_SET(modified_reg, write_setting) : - modified_reg; + modified_reg; break; case 1: modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC1_SET(modified_reg, read_setting) : - modified_reg; + modified_reg; modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC1_SET(modified_reg, write_setting) : - modified_reg; + modified_reg; break; case 2: modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC2_SET(modified_reg, read_setting) : - modified_reg; + modified_reg; modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC2_SET(modified_reg, write_setting) : - modified_reg; + modified_reg; break; case 3: modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC3_SET(modified_reg, read_setting) : - modified_reg; + modified_reg; modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC3_SET(modified_reg, write_setting) : - modified_reg; + modified_reg; break; } @@ -200,67 +213,80 @@ int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime, void kbase_pbha_write_settings(struct kbase_device *kbdev) { +#if MALI_USE_CSF if (kbasep_pbha_supported(kbdev)) { int i; - for (i = 0; i < SYSC_ALLOC_COUNT; ++i) - kbase_reg_write(kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i)), - kbdev->sysc_alloc[i]); + for (i = 0; i < GPU_SYSC_ALLOC_COUNT; ++i) + kbase_reg_write32(kbdev, GPU_SYSC_ALLOC_OFFSET(i), kbdev->sysc_alloc[i]); } +#else + CSTD_UNUSED(kbdev); +#endif /* MALI_USE_CSF */ } +#if MALI_USE_CSF static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev, const struct device_node *pbha_node) { - u32 dtb_data[SYSC_ALLOC_COUNT * sizeof(u32) * DTB_SET_SIZE]; + u32 dtb_data[GPU_SYSC_ALLOC_COUNT * sizeof(u32) * DTB_SET_SIZE]; int sz, i; bool valid = true; - sz = of_property_count_elems_of_size(pbha_node, "int_id_override", - sizeof(u32)); + sz = of_property_count_elems_of_size(pbha_node, "int-id-override", sizeof(u32)); + + if (sz == -EINVAL) { + /* There is no int-id-override field. Fallback to int_id_override instead */ + sz = of_property_count_elems_of_size(pbha_node, "int_id_override", sizeof(u32)); + } + if (sz == -EINVAL) { + /* There is no int_id_override field. This is valid - but there's nothing further + * to do here. + */ + return 0; + } if (sz <= 0 || (sz % DTB_SET_SIZE != 0)) { dev_err(kbdev->dev, "Bad DTB format: pbha.int_id_override\n"); return -EINVAL; } - if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data, - sz) != 0) { - dev_err(kbdev->dev, - "Failed to read DTB pbha.int_id_override\n"); - return -EINVAL; + if (of_property_read_u32_array(pbha_node, "int-id-override", dtb_data, sz) != 0) { + /* There may be no int-id-override field. Fallback to int_id_override instead */ + if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data, sz) != 0) { + dev_err(kbdev->dev, "Failed to read DTB pbha.int_id_override\n"); + return -EINVAL; + } } for (i = 0; valid && i < sz; i = i + DTB_SET_SIZE) { - unsigned int rdset = - SYSC_ALLOC_R_SYSC_ALLOC0_GET(dtb_data[i + 1]); - unsigned int wrset = - SYSC_ALLOC_W_SYSC_ALLOC0_GET(dtb_data[i + 1]); + unsigned int rdset = SYSC_ALLOC_R_SYSC_ALLOC0_GET(dtb_data[i + 1]); + unsigned int wrset = SYSC_ALLOC_W_SYSC_ALLOC0_GET(dtb_data[i + 1]); valid = valid && - (kbase_pbha_record_settings(kbdev, false, dtb_data[i], - rdset, wrset) == 0); + (kbase_pbha_record_settings(kbdev, false, dtb_data[i], rdset, wrset) == 0); if (valid) - dev_info(kbdev->dev, - "pbha.int_id_override 0x%x r0x%x w0x%x\n", - dtb_data[i], rdset, wrset); + dev_info(kbdev->dev, "pbha.int_id_override 0x%x r0x%x w0x%x\n", dtb_data[i], + rdset, wrset); } if (i != sz || (!valid)) { - dev_err(kbdev->dev, - "Failed recording DTB data (pbha.int_id_override)\n"); + dev_err(kbdev->dev, "Failed recording DTB data (pbha.int_id_override)\n"); return -EINVAL; } return 0; } -#if MALI_USE_CSF static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, const struct device_node *pbha_node) { - u32 bits; + u32 bits = 0; int err; if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) return 0; - err = of_property_read_u32(pbha_node, "propagate_bits", &bits); + err = of_property_read_u32(pbha_node, "propagate-bits", &bits); + + if (err == -EINVAL) { + err = of_property_read_u32(pbha_node, "propagate_bits", &bits); + } if (err < 0) { if (err != -EINVAL) { @@ -268,6 +294,10 @@ static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, "DTB value for propagate_bits is improperly formed (err=%d)\n", err); return err; + } else { + /* Property does not exist */ + kbdev->pbha_propagate_bits = 0; + return 0; } } @@ -279,10 +309,11 @@ static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, kbdev->pbha_propagate_bits = bits; return 0; } -#endif +#endif /* MALI_USE_CSF */ int kbase_pbha_read_dtb(struct kbase_device *kbdev) { +#if MALI_USE_CSF const struct device_node *pbha_node; int err; @@ -295,12 +326,12 @@ int kbase_pbha_read_dtb(struct kbase_device *kbdev) err = kbase_pbha_read_int_id_override_property(kbdev, pbha_node); -#if MALI_USE_CSF if (err < 0) return err; err = kbase_pbha_read_propagate_bits_property(kbdev, pbha_node); -#endif - return err; +#else + return 0; +#endif } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha.h b/drivers/gpu/arm/bifrost/mali_kbase_pbha.h index 79632194cabe..a8eb546a9a4d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pbha.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -49,9 +49,8 @@ bool kbasep_pbha_supported(struct kbase_device *kbdev); * * Return: 0 on success, otherwise error code. */ -int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime, - unsigned int id, unsigned int read_setting, - unsigned int write_setting); +int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime, unsigned int id, + unsigned int read_setting, unsigned int write_setting); /** * kbase_pbha_write_settings - write recorded PBHA settings to GPU diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c index 1cc29c700e5a..5b13a0bd8f32 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,15 +32,22 @@ static int int_id_overrides_show(struct seq_file *sfile, void *data) { struct kbase_device *kbdev = sfile->private; - int i; + uint i; + + CSTD_UNUSED(data); kbase_pm_context_active(kbdev); /* Minimal header for readability */ seq_puts(sfile, "// R W\n"); - for (i = 0; i < SYSC_ALLOC_COUNT; ++i) { - int j; - u32 reg = kbase_reg_read(kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i))); + for (i = 0; i < GPU_SYSC_ALLOC_COUNT; ++i) { + uint j; + +#if MALI_USE_CSF + u32 reg = kbase_reg_read32(kbdev, GPU_SYSC_ALLOC_OFFSET(i)); +#else /* MALI_USE_CSF */ + u32 reg = 0; +#endif /* MALI_USE_CSF */ for (j = 0; j < sizeof(u32); ++j) { u8 r_val; @@ -64,8 +71,7 @@ static int int_id_overrides_show(struct seq_file *sfile, void *data) w_val = SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg); break; } - seq_printf(sfile, "%2zu 0x%x 0x%x\n", - (i * sizeof(u32)) + j, r_val, w_val); + seq_printf(sfile, "%2zu 0x%x 0x%x\n", (i * sizeof(u32)) + j, r_val, w_val); } } kbase_pm_context_idle(kbdev); @@ -73,8 +79,7 @@ static int int_id_overrides_show(struct seq_file *sfile, void *data) return 0; } -static ssize_t int_id_overrides_write(struct file *file, - const char __user *ubuf, size_t count, +static ssize_t int_id_overrides_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) { struct seq_file *sfile = file->private_data; @@ -84,6 +89,8 @@ static ssize_t int_id_overrides_write(struct file *file, unsigned int r_val; unsigned int w_val; + CSTD_UNUSED(ppos); + if (count >= sizeof(raw_str)) return -E2BIG; if (copy_from_user(raw_str, ubuf, count)) @@ -98,8 +105,11 @@ static ssize_t int_id_overrides_write(struct file *file, /* This is a debugfs config write, so reset GPU such that changes take effect ASAP */ kbase_pm_context_active(kbdev); - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) { kbase_reset_gpu(kbdev); + kbase_reset_gpu_wait(kbdev); + } + kbase_pm_context_idle(kbdev); return count; @@ -124,9 +134,12 @@ static int propagate_bits_show(struct seq_file *sfile, void *data) struct kbase_device *kbdev = sfile->private; u32 l2_config_val; + CSTD_UNUSED(data); + kbase_csf_scheduler_pm_active(kbdev); kbase_pm_wait_for_l2_powered(kbdev); - l2_config_val = L2_CONFIG_PBHA_HWU_GET(kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG))); + l2_config_val = + L2_CONFIG_PBHA_HWU_GET(kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG))); kbase_csf_scheduler_pm_idle(kbdev); seq_printf(sfile, "PBHA Propagate Bits: 0x%x\n", l2_config_val); @@ -157,6 +170,8 @@ static ssize_t propagate_bits_write(struct file *file, const char __user *ubuf, char raw_str[32]; unsigned long propagate_bits; + CSTD_UNUSED(ppos); + if (count >= sizeof(raw_str)) return -E2BIG; if (copy_from_user(raw_str, ubuf, count)) @@ -207,8 +222,8 @@ void kbase_pbha_debugfs_init(struct kbase_device *kbdev) { if (kbasep_pbha_supported(kbdev)) { const mode_t mode = 0644; - struct dentry *debugfs_pbha_dir = debugfs_create_dir( - "pbha", kbdev->mali_debugfs_directory); + struct dentry *debugfs_pbha_dir = + debugfs_create_dir("pbha", kbdev->mali_debugfs_directory); if (IS_ERR_OR_NULL(debugfs_pbha_dir)) { dev_err(kbdev->dev, @@ -216,8 +231,8 @@ void kbase_pbha_debugfs_init(struct kbase_device *kbdev) return; } - debugfs_create_file("int_id_overrides", mode, debugfs_pbha_dir, - kbdev, &pbha_int_id_overrides_fops); + debugfs_create_file("int_id_overrides", mode, debugfs_pbha_dir, kbdev, + &pbha_int_id_overrides_fops); #if MALI_USE_CSF if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) debugfs_create_file("propagate_bits", mode, debugfs_pbha_dir, kbdev, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c b/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c index 265c676f13fa..80fa2cc4a8e6 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2014, 2016-2017, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -48,7 +48,8 @@ static struct platform_device *mali_device; * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. */ -static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) +static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, + struct resource *const linux_resources) { if (!io_resources || !linux_resources) { pr_err("%s: couldn't find proper resources\n", __func__); @@ -58,19 +59,19 @@ static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource)); linux_resources[0].start = io_resources->io_memory_region.start; - linux_resources[0].end = io_resources->io_memory_region.end; + linux_resources[0].end = io_resources->io_memory_region.end; linux_resources[0].flags = IORESOURCE_MEM; linux_resources[1].start = io_resources->job_irq_number; - linux_resources[1].end = io_resources->job_irq_number; + linux_resources[1].end = io_resources->job_irq_number; linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; linux_resources[2].start = io_resources->mmu_irq_number; - linux_resources[2].end = io_resources->mmu_irq_number; + linux_resources[2].end = io_resources->mmu_irq_number; linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; linux_resources[3].start = io_resources->gpu_irq_number; - linux_resources[3].end = io_resources->gpu_irq_number; + linux_resources[3].end = io_resources->gpu_irq_number; linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; } @@ -80,7 +81,8 @@ int kbase_platform_register(void) struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; int err; - config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ + config = + kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ if (config == NULL) { pr_err("%s: couldn't get platform config\n", __func__); return -ENODEV; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c index 62a132816a42..b636d4288511 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,8 +24,7 @@ */ #include -#include -#include +#include #include #include @@ -50,23 +49,21 @@ void kbase_pm_halt(struct kbase_device *kbdev) void kbase_pm_context_active(struct kbase_device *kbdev) { - (void)kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); + (void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); } int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, - enum kbase_pm_suspend_handler suspend_handler) + enum kbase_pm_suspend_handler suspend_handler) { int c; KBASE_DEBUG_ASSERT(kbdev != NULL); - dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, - suspend_handler, current->pid); + dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, suspend_handler, + current->pid); kbase_pm_lock(kbdev); #ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, - suspend_handler)) { + if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) { kbase_pm_unlock(kbdev); return 1; } @@ -117,7 +114,6 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev != NULL); - kbase_pm_lock(kbdev); c = --kbdev->pm.active_count; @@ -138,20 +134,55 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) } kbase_pm_unlock(kbdev); - dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, - kbdev->pm.active_count, current->pid); + dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, kbdev->pm.active_count, current->pid); } KBASE_EXPORT_TEST_API(kbase_pm_context_idle); +static void reenable_hwcnt_on_resume(struct kbase_device *kbdev) +{ + unsigned long flags; + + /* Re-enable GPU hardware counters */ +#if MALI_USE_CSF + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +#else + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif + + /* Resume HW counters intermediaries. */ + kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx); +} + +static void resume_job_scheduling(struct kbase_device *kbdev) +{ +#if !MALI_USE_CSF + /* Resume any blocked atoms (which may cause contexts to be scheduled in + * and dependent atoms to run) + */ + kbase_resume_suspended_soft_jobs(kbdev); + + /* Resume the Job Scheduler and associated components, and start running + * atoms + */ + kbasep_js_resume(kbdev); +#else + kbase_csf_scheduler_pm_resume(kbdev); +#endif +} + int kbase_pm_driver_suspend(struct kbase_device *kbdev) { - KBASE_DEBUG_ASSERT(kbdev); + bool scheduling_suspended = false; + bool timers_halted = false; /* Suspend HW counter intermediaries. This blocks until workers and timers * are no longer running. */ - kbase_vinstr_suspend(kbdev->vinstr_ctx); kbase_kinstr_prfcnt_suspend(kbdev->kinstr_prfcnt_ctx); /* Disable GPU hardware counters. @@ -162,6 +193,7 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) mutex_lock(&kbdev->pm.lock); if (WARN_ON(kbase_pm_is_suspending(kbdev))) { mutex_unlock(&kbdev->pm.lock); + /* No error handling for this condition */ return 0; } kbdev->pm.suspending = true; @@ -193,32 +225,42 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) */ kbasep_js_suspend(kbdev); #else - if (kbase_csf_scheduler_pm_suspend(kbdev)) { - mutex_lock(&kbdev->pm.lock); - kbdev->pm.suspending = false; - mutex_unlock(&kbdev->pm.lock); - return -1; - } + if (kbase_csf_scheduler_pm_suspend(kbdev)) + goto exit; #endif + scheduling_suspended = true; + /* Wait for the active count to reach zero. This is not the same as * waiting for a power down, since not all policies power down when this * reaches zero. */ dev_dbg(kbdev->dev, ">wait_event - waiting for active_count == 0 (pid = %d)\n", current->pid); - wait_event(kbdev->pm.zero_active_count_wait, - kbdev->pm.active_count == 0); + wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0); dev_dbg(kbdev->dev, ">wait_event - waiting done\n"); +#if MALI_USE_CSF + /* At this point, any kbase context termination should either have run to + * completion and any further context termination can only begin after + * the system resumes. Therefore, it is now safe to skip taking the context + * list lock when traversing the context list. + */ + if (kbase_csf_kcpu_queue_halt_timers(kbdev)) + goto exit; +#endif + + timers_halted = true; + /* NOTE: We synchronize with anything that was just finishing a * kbase_pm_context_idle() call by locking the pm.lock below */ if (kbase_hwaccess_pm_suspend(kbdev)) { - mutex_lock(&kbdev->pm.lock); - kbdev->pm.suspending = false; - mutex_unlock(&kbdev->pm.lock); - return -1; + /* No early return yet */ + if (IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT)) + WARN_ON_ONCE(1); + else + goto exit; } #ifdef CONFIG_MALI_ARBITER_SUPPORT @@ -230,36 +272,52 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) #endif /* CONFIG_MALI_ARBITER_SUPPORT */ return 0; + +exit: + if (timers_halted) { +#if MALI_USE_CSF + /* Resume the timers in case of suspend failure. But that needs to + * be done before clearing the 'pm.suspending' flag so as to keep the + * context termination blocked. + */ + kbase_csf_kcpu_queue_resume_timers(kbdev); +#endif + } + + mutex_lock(&kbdev->pm.lock); + kbdev->pm.suspending = false; + mutex_unlock(&kbdev->pm.lock); + + if (scheduling_suspended) + resume_job_scheduling(kbdev); + + reenable_hwcnt_on_resume(kbdev); + /* Wake up the threads blocked on the completion of System suspend/resume */ + wake_up_all(&kbdev->pm.resume_wait); + return -1; } void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) { - unsigned long flags; + CSTD_UNUSED(arb_gpu_start); /* MUST happen before any pm_context_active calls occur */ kbase_hwaccess_pm_resume(kbdev); /* Initial active call, to power on the GPU/cores if needed */ #ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbase_pm_context_active_handle_suspend(kbdev, - (arb_gpu_start ? - KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : - KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE))) + if (kbase_pm_context_active_handle_suspend( + kbdev, (arb_gpu_start ? KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE))) return; #else kbase_pm_context_active(kbdev); #endif -#if !MALI_USE_CSF - /* Resume any blocked atoms (which may cause contexts to be scheduled in - * and dependent atoms to run) - */ - kbase_resume_suspended_soft_jobs(kbdev); + resume_job_scheduling(kbdev); - /* Resume the Job Scheduler and associated components, and start running - * atoms - */ - kbasep_js_resume(kbdev); +#if MALI_USE_CSF + kbase_csf_kcpu_queue_resume_timers(kbdev); #endif /* Matching idle call, to power off the GPU/cores if we didn't actually @@ -267,20 +325,12 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) */ kbase_pm_context_idle(kbdev); - /* Re-enable GPU hardware counters */ -#if MALI_USE_CSF - kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - kbase_csf_scheduler_spin_unlock(kbdev, flags); -#else - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -#endif + reenable_hwcnt_on_resume(kbdev); - /* Resume HW counters intermediaries. */ - kbase_vinstr_resume(kbdev->vinstr_ctx); - kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx); + /* System resume callback is complete */ + kbdev->pm.resuming = false; + /* Unblock the threads waiting for the completion of System suspend/resume */ + wake_up_all(&kbdev->pm.resume_wait); } int kbase_pm_suspend(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_pm.h index 4bb90a4f6542..187fb9efaaf2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,15 +28,15 @@ #include "mali_kbase_hwaccess_pm.h" -#define PM_ENABLE_IRQS 0x01 -#define PM_HW_ISSUES_DETECT 0x02 +#define PM_ENABLE_IRQS 0x01 +#define PM_HW_ISSUES_DETECT 0x02 #ifdef CONFIG_MALI_ARBITER_SUPPORT /* In the case that the GPU was granted by the Arbiter, it will have * already been reset. The following flag ensures it is not reset * twice. */ -#define PM_NO_RESET 0x04 +#define PM_NO_RESET 0x04 #endif /** @@ -104,7 +104,6 @@ void kbase_pm_term(struct kbase_device *kbdev); */ void kbase_pm_context_active(struct kbase_device *kbdev); - /** Handler codes for doing kbase_pm_context_active_handle_suspend() */ enum kbase_pm_suspend_handler { /** A suspend is not expected/not possible - this is the same as @@ -144,7 +143,8 @@ enum kbase_pm_suspend_handler { * * Return: 0 on success, non-zero othrewise. */ -int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler); /** * kbase_pm_context_idle - Decrement the reference count of active contexts. @@ -239,7 +239,7 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev); * Despite kbase_pm_resume(), it will ignore to update Arbiter * status if MALI_ARBITER_SUPPORT is enabled. */ -void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start); +void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start); #ifdef CONFIG_MALI_ARBITER_SUPPORT /** diff --git a/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c b/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c new file mode 100644 index 000000000000..e30857c7b35d --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c @@ -0,0 +1,1517 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ +#include +#include + +unsigned long kbase_zone_to_bits(enum kbase_memory_zone zone) +{ + return ((((unsigned long)zone) & ((1 << KBASE_REG_ZONE_BITS) - 1ul)) + << KBASE_REG_ZONE_SHIFT); +} + +enum kbase_memory_zone kbase_bits_to_zone(unsigned long zone_bits) +{ + return (enum kbase_memory_zone)(((zone_bits)&KBASE_REG_ZONE_MASK) >> KBASE_REG_ZONE_SHIFT); +} +KBASE_EXPORT_TEST_API(kbase_bits_to_zone); + +char *kbase_reg_zone_get_name(enum kbase_memory_zone zone) +{ + switch (zone) { + case SAME_VA_ZONE: + return "SAME_VA"; + case CUSTOM_VA_ZONE: + return "CUSTOM_VA"; + case EXEC_VA_ZONE: + return "EXEC_VA"; +#if MALI_USE_CSF + case MCU_SHARED_ZONE: + return "MCU_SHARED"; + case EXEC_FIXED_VA_ZONE: + return "EXEC_FIXED_VA"; + case FIXED_VA_ZONE: + return "FIXED_VA"; +#endif + default: + return NULL; + } +} +KBASE_EXPORT_TEST_API(kbase_reg_zone_get_name); + +struct kbase_reg_zone *kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx, + enum kbase_memory_zone zone) +{ + WARN_ON(!kbase_is_ctx_reg_zone(zone)); + + return &kctx->reg_zone[zone]; +} + +struct kbase_reg_zone *kbase_ctx_reg_zone_get(struct kbase_context *kctx, + enum kbase_memory_zone zone) +{ + lockdep_assert_held(&kctx->reg_lock); + return kbase_ctx_reg_zone_get_nolock(kctx, zone); +} +KBASE_EXPORT_TEST_API(kbase_ctx_reg_zone_get); + +static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) +{ +#if defined(CONFIG_ARM64) + /* VA_BITS can be as high as 48 bits, but all bits are available for + * both user and kernel. + */ + size_t cpu_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + /* x86_64 can access 48 bits of VA, but the 48th is used to denote + * kernel (1) vs userspace (0), so the max here is 47. + */ + size_t cpu_va_bits = 47; +#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) + size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; +#else +#error "Unknown CPU VA width for this architecture" +#endif + + if (kbase_ctx_compat_mode(kctx)) + cpu_va_bits = 32; + + return cpu_va_bits; +} + +/** + * kbase_gpu_pfn_to_rbtree - find the rb-tree tracking the region with the indicated GPU + * page frame number + * @kctx: kbase context + * @gpu_pfn: GPU PFN address + * + * Context: any context. + * + * Return: reference to the rb-tree root, NULL if not found + */ +static struct rb_root *kbase_gpu_pfn_to_rbtree(struct kbase_context *kctx, u64 gpu_pfn) +{ + enum kbase_memory_zone zone_idx; + struct kbase_reg_zone *zone; + + for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) { + zone = &kctx->reg_zone[zone_idx]; + if ((gpu_pfn >= zone->base_pfn) && (gpu_pfn < kbase_reg_zone_end_pfn(zone))) + return &zone->reg_rbtree; + } + + return NULL; +} + +/* This function inserts a region into the tree. */ +static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) +{ + const u64 start_pfn = new_reg->start_pfn; + struct rb_node **link = NULL; + struct rb_node *parent = NULL; + struct rb_root *rbtree = NULL; + + rbtree = new_reg->rbtree; + + link = &(rbtree->rb_node); + /* Find the right place in the tree using tree search */ + while (*link) { + struct kbase_va_region *old_reg; + + parent = *link; + old_reg = rb_entry(parent, struct kbase_va_region, rblink); + + /* RBTree requires no duplicate entries. */ + KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); + + if (old_reg->start_pfn > start_pfn) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Put the new node there, and rebalance tree */ + rb_link_node(&(new_reg->rblink), parent, link); + + rb_insert_color(&(new_reg->rblink), rbtree); +} + +static struct kbase_va_region *find_region_enclosing_range_rbtree(struct rb_root *rbtree, + u64 start_pfn, size_t nr_pages) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + const u64 end_pfn = start_pfn + nr_pages; + + rbnode = rbtree->rb_node; + + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + reg->nr_pages; + + /* If start is lower than this, go left. */ + if (start_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (end_pfn > tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +static struct kbase_va_region *kbase_find_region_enclosing_address(struct rb_root *rbtree, + u64 gpu_addr) +{ + const u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_node *rbnode; + struct kbase_va_region *reg; + + rbnode = rbtree->rb_node; + + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + reg->nr_pages; + + /* If start is lower than this, go left. */ + if (gpu_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (gpu_pfn >= tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +/* Find region enclosing given address. */ +struct kbase_va_region * +kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_root *rbtree = NULL; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + lockdep_assert_held(&kctx->reg_lock); + + rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn); + if (unlikely(!rbtree)) + return NULL; + + return kbase_find_region_enclosing_address(rbtree, gpu_addr); +} +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); + +static struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_node *rbnode = NULL; + struct kbase_va_region *reg = NULL; + + rbnode = rbtree->rb_node; + + while (rbnode) { + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if (reg->start_pfn > gpu_pfn) + rbnode = rbnode->rb_left; + else if (reg->start_pfn < gpu_pfn) + rbnode = rbnode->rb_right; + else + return reg; + } + + return NULL; +} + +/* Find region with given base address */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, + u64 gpu_addr) +{ + const u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_root *rbtree = NULL; + + lockdep_assert_held(&kctx->reg_lock); + + rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn); + if (unlikely(!rbtree)) + return NULL; + + return kbase_find_region_base_address(rbtree, gpu_addr); +} +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); + +/* Find region meeting given requirements */ +static struct kbase_va_region * +kbase_region_tracker_find_region_meeting_reqs(struct kbase_va_region *reg_reqs, size_t nr_pages, + size_t align_offset, size_t align_mask, + u64 *out_start_pfn) +{ + struct rb_node *rbnode = NULL; + struct kbase_va_region *reg = NULL; + struct rb_root *rbtree = NULL; + + /* Note that this search is a linear search, as we do not have a target + * address in mind, so does not benefit from the rbtree search + */ + rbtree = reg_reqs->rbtree; + + for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if ((reg->nr_pages >= nr_pages) && (reg->flags & KBASE_REG_FREE)) { + /* Check alignment */ + u64 start_pfn = reg->start_pfn; + + /* When align_offset == align, this sequence is + * equivalent to: + * (start_pfn + align_mask) & ~(align_mask) + * + * Otherwise, it aligns to n*align + offset, for the + * lowest value n that makes this still >start_pfn + */ + start_pfn += align_mask; + start_pfn -= (start_pfn - align_offset) & (align_mask); + + if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) { + /* Can't end at 4GB boundary */ + if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB)) + start_pfn += align_offset; + + /* Can't start at 4GB boundary */ + if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB)) + start_pfn += align_offset; + + if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) || + !(start_pfn & BASE_MEM_PFN_MASK_4GB)) + continue; + } else if (reg_reqs->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { + u64 end_pfn = start_pfn + nr_pages - 1; + + if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) != + (end_pfn & ~BASE_MEM_PFN_MASK_4GB)) + start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB; + } + + if ((start_pfn >= reg->start_pfn) && + (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && + ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) { + *out_start_pfn = start_pfn; + return reg; + } + } + } + + return NULL; +} + +/** + * kbase_remove_va_region - Remove a region object from the global list. + * + * @kbdev: The kbase device + * @reg: Region object to remove + * + * The region reg is removed, possibly by merging with other free and + * compatible adjacent regions. It must be called with the context + * region lock held. The associated memory is not released (see + * kbase_free_alloced_region). Internal use only. + */ +void kbase_remove_va_region(struct kbase_device *kbdev, struct kbase_va_region *reg) +{ + struct rb_node *rbprev; + struct kbase_reg_zone *zone = container_of(reg->rbtree, struct kbase_reg_zone, reg_rbtree); + struct kbase_va_region *prev = NULL; + struct rb_node *rbnext; + struct kbase_va_region *next = NULL; + struct rb_root *reg_rbtree = NULL; + struct kbase_va_region *orig_reg = reg; + + int merged_front = 0; + int merged_back = 0; + + reg_rbtree = reg->rbtree; + + if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) + return; + + /* Try to merge with the previous block first */ + rbprev = rb_prev(&(reg->rblink)); + if (rbprev) { + prev = rb_entry(rbprev, struct kbase_va_region, rblink); + if (prev->flags & KBASE_REG_FREE) { + /* We're compatible with the previous VMA, merge with + * it, handling any gaps for robustness. + */ + u64 prev_end_pfn = prev->start_pfn + prev->nr_pages; + + WARN_ON((kbase_bits_to_zone(prev->flags)) != + (kbase_bits_to_zone(reg->flags))); + if (!WARN_ON(reg->start_pfn < prev_end_pfn)) + prev->nr_pages += reg->start_pfn - prev_end_pfn; + prev->nr_pages += reg->nr_pages; + rb_erase(&(reg->rblink), reg_rbtree); + reg = prev; + merged_front = 1; + } + } + + /* Try to merge with the next block second */ + /* Note we do the lookup here as the tree may have been rebalanced. */ + rbnext = rb_next(&(reg->rblink)); + if (rbnext) { + next = rb_entry(rbnext, struct kbase_va_region, rblink); + if (next->flags & KBASE_REG_FREE) { + /* We're compatible with the next VMA, merge with it, + * handling any gaps for robustness. + */ + u64 reg_end_pfn = reg->start_pfn + reg->nr_pages; + + WARN_ON((kbase_bits_to_zone(next->flags)) != + (kbase_bits_to_zone(reg->flags))); + if (!WARN_ON(next->start_pfn < reg_end_pfn)) + next->nr_pages += next->start_pfn - reg_end_pfn; + next->start_pfn = reg->start_pfn; + next->nr_pages += reg->nr_pages; + rb_erase(&(reg->rblink), reg_rbtree); + merged_back = 1; + } + } + + if (merged_front && merged_back) { + /* We already merged with prev, free it */ + kfree(reg); + } else if (!(merged_front || merged_back)) { + /* If we failed to merge then we need to add a new block */ + + /* + * We didn't merge anything. Try to add a new free + * placeholder, and in any case, remove the original one. + */ + struct kbase_va_region *free_reg; + + free_reg = kbase_alloc_free_region(zone, reg->start_pfn, reg->nr_pages); + if (!free_reg) { + /* In case of failure, we cannot allocate a replacement + * free region, so we will be left with a 'gap' in the + * region tracker's address range (though, the rbtree + * will itself still be correct after erasing + * 'reg'). + * + * The gap will be rectified when an adjacent region is + * removed by one of the above merging paths. Other + * paths will gracefully fail to allocate if they try + * to allocate in the gap. + * + * There is nothing that the caller can do, since free + * paths must not fail. The existing 'reg' cannot be + * repurposed as the free region as callers must have + * freedom of use with it by virtue of it being owned + * by them, not the region tracker insert/remove code. + */ + dev_warn( + kbdev->dev, + "Could not alloc a replacement free region for 0x%.16llx..0x%.16llx", + (unsigned long long)reg->start_pfn << PAGE_SHIFT, + (unsigned long long)(reg->start_pfn + reg->nr_pages) << PAGE_SHIFT); + rb_erase(&(reg->rblink), reg_rbtree); + + goto out; + } + rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); + } + + /* This operation is always safe because the function never frees + * the region. If the region has been merged to both front and back, + * then it's the previous region that is supposed to be freed. + */ + orig_reg->start_pfn = 0; + +out: + return; +} +KBASE_EXPORT_TEST_API(kbase_remove_va_region); + +/** + * kbase_insert_va_region_nolock - Insert a VA region to the list, + * replacing the existing one. + * + * @kbdev: The kbase device + * @new_reg: The new region to insert + * @at_reg: The region to replace + * @start_pfn: The Page Frame Number to insert at + * @nr_pages: The number of pages of the region + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, + struct kbase_va_region *new_reg, + struct kbase_va_region *at_reg, u64 start_pfn, + size_t nr_pages) +{ + struct rb_root *reg_rbtree = NULL; + struct kbase_reg_zone *zone = + container_of(at_reg->rbtree, struct kbase_reg_zone, reg_rbtree); + int err = 0; + + CSTD_UNUSED(kbdev); + + reg_rbtree = at_reg->rbtree; + + /* Must be a free region */ + KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); + /* start_pfn should be contained within at_reg */ + KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && + (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); + /* at least nr_pages from start_pfn should be contained within at_reg */ + KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); + /* having at_reg means the rb_tree should not be empty */ + if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) + return -ENOMEM; + + new_reg->start_pfn = start_pfn; + new_reg->nr_pages = nr_pages; + + /* Regions are a whole use, so swap and delete old one. */ + if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { + rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), reg_rbtree); + kfree(at_reg); + } + /* New region replaces the start of the old one, so insert before. */ + else if (at_reg->start_pfn == start_pfn) { + at_reg->start_pfn += nr_pages; + KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); + at_reg->nr_pages -= nr_pages; + + kbase_region_tracker_insert(new_reg); + } + /* New region replaces the end of the old one, so insert after. */ + else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { + at_reg->nr_pages -= nr_pages; + + kbase_region_tracker_insert(new_reg); + } + /* New region splits the old one, so insert and create new */ + else { + struct kbase_va_region *new_front_reg; + + new_front_reg = kbase_alloc_free_region(zone, at_reg->start_pfn, + start_pfn - at_reg->start_pfn); + + if (new_front_reg) { + at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; + at_reg->start_pfn = start_pfn + nr_pages; + + kbase_region_tracker_insert(new_front_reg); + kbase_region_tracker_insert(new_reg); + } else { + err = -ENOMEM; + } + } + + return err; +} + +/** + * kbase_add_va_region - Add a VA region to the region list for a context. + * + * @kctx: kbase context containing the region + * @reg: the region to add + * @addr: the address to insert the region at + * @nr_pages: the number of pages in the region + * @align: the minimum alignment in pages + * + * Return: 0 on success, error code otherwise. + */ +int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, + size_t nr_pages, size_t align) +{ + int err = 0; + struct kbase_device *kbdev = kctx->kbdev; + const int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx); + const int gpu_pc_bits = kbdev->gpu_props.log2_program_counter_size; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(reg != NULL); + + lockdep_assert_held(&kctx->reg_lock); + + /* The executable allocation from the SAME_VA zone should already have an + * appropriately aligned GPU VA chosen for it. + * Also, executable allocations from EXEC_VA don't need the special + * alignment. + */ +#if MALI_USE_CSF + /* The same is also true for the EXEC_FIXED_VA zone. + */ +#endif + if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && +#if MALI_USE_CSF + ((kbase_bits_to_zone(reg->flags)) != EXEC_FIXED_VA_ZONE) && +#endif + ((kbase_bits_to_zone(reg->flags)) != EXEC_VA_ZONE)) { + if (cpu_va_bits > gpu_pc_bits) { + align = max(align, (size_t)((1ULL << gpu_pc_bits) >> PAGE_SHIFT)); + } + } + + do { + err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages, align); + if (err != -ENOMEM) + break; + + /* + * If the allocation is not from the same zone as JIT + * then don't retry, we're out of VA and there is + * nothing which can be done about it. + */ + if ((kbase_bits_to_zone(reg->flags)) != CUSTOM_VA_ZONE) + break; + } while (kbase_jit_evict(kctx)); + + return err; +} +KBASE_EXPORT_TEST_API(kbase_add_va_region); + +/** + * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree + * + * @kbdev: The kbase device + * @reg: The region to add + * @addr: The address to add the region at, or 0 to map at any available address + * @nr_pages: The size of the region in pages + * @align: The minimum alignment in pages + * + * Insert a region into the rbtree that was specified when the region was + * created. If addr is 0 a free area in the rbtree is used, otherwise the + * specified address is used. + * + * Return: 0 on success, error code otherwise. + */ +int kbase_add_va_region_rbtree(struct kbase_device *kbdev, struct kbase_va_region *reg, u64 addr, + size_t nr_pages, size_t align) +{ + struct device *const dev = kbdev->dev; + struct rb_root *rbtree = NULL; + struct kbase_va_region *tmp; + const u64 gpu_pfn = addr >> PAGE_SHIFT; + int err = 0; + + rbtree = reg->rbtree; + + if (!align) + align = 1; + + /* must be a power of 2 */ + KBASE_DEBUG_ASSERT(is_power_of_2(align)); + KBASE_DEBUG_ASSERT(nr_pages > 0); + + /* Path 1: Map a specific address. Find the enclosing region, + * which *must* be free. + */ + if (gpu_pfn) { + KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); + + tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn, nr_pages); + if (kbase_is_region_invalid(tmp)) { + dev_warn( + dev, + "Enclosing region not found or invalid: 0x%08llx gpu_pfn, %zu nr_pages", + gpu_pfn, nr_pages); + err = -ENOMEM; + goto exit; + } else if (!kbase_is_region_free(tmp)) { + dev_warn( + dev, + "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", + tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages); + err = -ENOMEM; + goto exit; + } + + err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages); + if (err) { + dev_warn(dev, "Failed to insert va region"); + err = -ENOMEM; + } + } else { + /* Path 2: Map any free address which meets the requirements. */ + u64 start_pfn; + size_t align_offset = align; + size_t align_mask = align - 1; + +#if !MALI_USE_CSF + if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { + WARN(align > 1, + "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", + __func__, (unsigned long)align); + align_mask = reg->extension - 1; + align_offset = reg->extension - reg->initial_commit; + } +#endif /* !MALI_USE_CSF */ + + tmp = kbase_region_tracker_find_region_meeting_reqs(reg, nr_pages, align_offset, + align_mask, &start_pfn); + if (tmp) { + err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages); + if (unlikely(err)) { + dev_warn( + dev, + "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages", + start_pfn, nr_pages); + } + } else { + dev_dbg(dev, + "Failed to find a suitable region: %zu nr_pages, %zu align_offset, %zu align_mask\n", + nr_pages, align_offset, align_mask); + err = -ENOMEM; + } + } + +exit: + return err; +} + +struct kbase_context *kbase_reg_to_kctx(struct kbase_va_region *reg) +{ + struct rb_root *rbtree = reg->rbtree; + struct kbase_reg_zone *zone = container_of(rbtree, struct kbase_reg_zone, reg_rbtree); + + if (!kbase_is_ctx_reg_zone(zone->id)) + return NULL; + + return container_of(zone - zone->id, struct kbase_context, reg_zone[0]); +} + +/** + * kbase_region_tracker_erase_rbtree - Free memory for a region tracker + * + * @rbtree: Root of the red-black tree to erase. + * + * This will free all the regions within the region tracker. + */ +static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + + do { + rbnode = rb_first(rbtree); + if (rbnode) { + rb_erase(rbnode, rbtree); + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + WARN_ON(kbase_refcount_read(®->va_refcnt) != 1); + if (kbase_is_page_migration_enabled()) { + struct kbase_context *kctx = kbase_reg_to_kctx(reg); + + if (kctx) + kbase_gpu_munmap(kctx, reg); + } + /* Reset the start_pfn - as the rbtree is being + * destroyed and we've already erased this region, there + * is no further need to attempt to remove it. + * This won't affect the cleanup if the region was + * being used as a sticky resource as the cleanup + * related to sticky resources anyways need to be + * performed before the term of region tracker. + */ + reg->start_pfn = 0; + kbase_free_alloced_region(reg); + } + } while (rbnode); +} + +void kbase_reg_zone_term(struct kbase_reg_zone *zone) +{ + kbase_region_tracker_erase_rbtree(&zone->reg_rbtree); +} + +static size_t kbase_get_same_va_bits(struct kbase_context *kctx) +{ + return min_t(size_t, kbase_get_num_cpu_va_bits(kctx), kctx->kbdev->gpu_props.mmu.va_bits); +} + +static int kbase_reg_zone_same_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + int err; + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE); + const size_t same_va_bits = kbase_get_same_va_bits(kctx); + const u64 base_pfn = 1u; + u64 nr_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - base_pfn; + + CSTD_UNUSED(gpu_va_limit); + + lockdep_assert_held(&kctx->reg_lock); + +#if MALI_USE_CSF + if ((base_pfn + nr_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) { + /* Depending on how the kernel is configured, it's possible (eg on aarch64) for + * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone + * doesn't cross into the exec_va zone. + */ + nr_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - base_pfn; + } +#endif + err = kbase_reg_zone_init(kctx->kbdev, zone, SAME_VA_ZONE, base_pfn, nr_pages); + if (err) + return -ENOMEM; + + kctx->gpu_va_end = base_pfn + nr_pages; + + return 0; +} + +static void kbase_reg_zone_same_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE); + + kbase_reg_zone_term(zone); +} + +static int kbase_reg_zone_custom_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE); + u64 nr_pages = KBASE_REG_ZONE_CUSTOM_VA_SIZE; + + /* If the context does not support CUSTOM_VA zones, then we don't need to + * proceed past this point, and can pretend that it was initialized properly. + * In practice, this will mean that the zone metadata structure will be zero + * initialized and not contain a valid zone ID. + */ + if (!kbase_ctx_compat_mode(kctx)) + return 0; + + if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) + return -EINVAL; + + /* If the current size of TMEM is out of range of the + * virtual address space addressable by the MMU then + * we should shrink it to fit + */ + if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) + nr_pages = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; + + if (kbase_reg_zone_init(kctx->kbdev, zone, CUSTOM_VA_ZONE, KBASE_REG_ZONE_CUSTOM_VA_BASE, + nr_pages)) + return -ENOMEM; + + /* On JM systems, this is the last memory zone that gets initialized, + * so the GPU VA ends right after the end of the CUSTOM_VA zone. On CSF, + * setting here is harmless, as the FIXED_VA initializer will overwrite + * it. + */ + kctx->gpu_va_end += nr_pages; + + return 0; +} + +static void kbase_reg_zone_custom_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE); + + kbase_reg_zone_term(zone); +} + +static inline u64 kbase_get_exec_va_zone_base(struct kbase_context *kctx) +{ + u64 base_pfn; + +#if MALI_USE_CSF + base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_64; + if (kbase_ctx_compat_mode(kctx)) + base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_32; +#else + CSTD_UNUSED(kctx); + /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is + * initially U64_MAX + */ + base_pfn = U64_MAX; +#endif + + return base_pfn; +} + +static inline int kbase_reg_zone_exec_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); + const u64 base_pfn = kbase_get_exec_va_zone_base(kctx); + u64 nr_pages = KBASE_REG_ZONE_EXEC_VA_SIZE; + + CSTD_UNUSED(gpu_va_limit); + +#if !MALI_USE_CSF + nr_pages = 0; +#endif + + return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_VA_ZONE, base_pfn, nr_pages); +} + +static void kbase_reg_zone_exec_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); + + kbase_reg_zone_term(zone); +} + +#if MALI_USE_CSF +static inline u64 kbase_get_exec_fixed_va_zone_base(struct kbase_context *kctx) +{ + return kbase_get_exec_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_VA_SIZE; +} + +static int kbase_reg_zone_exec_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE); + const u64 base_pfn = kbase_get_exec_fixed_va_zone_base(kctx); + + CSTD_UNUSED(gpu_va_limit); + + return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_FIXED_VA_ZONE, base_pfn, + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); +} + +static void kbase_reg_zone_exec_fixed_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE); + + WARN_ON(!list_empty(&kctx->csf.event_pages_head)); + kbase_reg_zone_term(zone); +} + +static int kbase_reg_zone_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE); + const u64 base_pfn = + kbase_get_exec_fixed_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE; + u64 fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; + u64 nr_pages; + CSTD_UNUSED(gpu_va_limit); + + if (kbase_ctx_compat_mode(kctx)) + fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; + + nr_pages = fixed_va_end - base_pfn; + + if (kbase_reg_zone_init(kctx->kbdev, zone, FIXED_VA_ZONE, base_pfn, nr_pages)) + return -ENOMEM; + + kctx->gpu_va_end = fixed_va_end; + + return 0; +} + +static void kbase_reg_zone_fixed_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE); + + kbase_reg_zone_term(zone); +} +#endif + +typedef int kbase_memory_zone_init(struct kbase_context *kctx, u64 gpu_va_limit); +typedef void kbase_memory_zone_term(struct kbase_context *kctx); + +struct kbase_memory_zone_init_meta { + kbase_memory_zone_init *init; + kbase_memory_zone_term *term; + char *error_msg; +}; + +static const struct kbase_memory_zone_init_meta zones_init[] = { + [SAME_VA_ZONE] = { kbase_reg_zone_same_va_init, kbase_reg_zone_same_va_term, + "Could not initialize SAME_VA zone" }, + [CUSTOM_VA_ZONE] = { kbase_reg_zone_custom_va_init, kbase_reg_zone_custom_va_term, + "Could not initialize CUSTOM_VA zone" }, + [EXEC_VA_ZONE] = { kbase_reg_zone_exec_va_init, kbase_reg_zone_exec_va_term, + "Could not initialize EXEC_VA zone" }, +#if MALI_USE_CSF + [EXEC_FIXED_VA_ZONE] = { kbase_reg_zone_exec_fixed_va_init, + kbase_reg_zone_exec_fixed_va_term, + "Could not initialize EXEC_FIXED_VA zone" }, + [FIXED_VA_ZONE] = { kbase_reg_zone_fixed_va_init, kbase_reg_zone_fixed_va_term, + "Could not initialize FIXED_VA zone" }, +#endif +}; + +int kbase_region_tracker_init(struct kbase_context *kctx) +{ + const u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits; + const u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT; + int err; + unsigned int i; + + /* Take the lock as kbase_free_alloced_region requires it */ + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < ARRAY_SIZE(zones_init); i++) { + err = zones_init[i].init(kctx, gpu_va_limit); + if (unlikely(err)) { + dev_err(kctx->kbdev->dev, "%s, err = %d\n", zones_init[i].error_msg, err); + goto term; + } + } +#if MALI_USE_CSF + INIT_LIST_HEAD(&kctx->csf.event_pages_head); +#endif + kctx->jit_va = false; + + kbase_gpu_vm_unlock(kctx); + + return 0; +term: + while (i-- > 0) + zones_init[i].term(kctx); + + kbase_gpu_vm_unlock(kctx); + return err; +} + +void kbase_region_tracker_term(struct kbase_context *kctx) +{ + unsigned int i; + + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", + kctx->tgid, kctx->id); + + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < ARRAY_SIZE(zones_init); i++) + zones_init[i].term(kctx); + + kbase_gpu_vm_unlock(kctx); +} + +static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx) +{ + struct kbase_reg_zone *exec_va_zone; + + lockdep_assert_held(&kctx->reg_lock); + exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); + + return (exec_va_zone->base_pfn != U64_MAX); +} + +bool kbase_has_exec_va_zone(struct kbase_context *kctx) +{ + bool has_exec_va_zone; + + kbase_gpu_vm_lock(kctx); + has_exec_va_zone = kbase_has_exec_va_zone_locked(kctx); + kbase_gpu_vm_unlock(kctx); + + return has_exec_va_zone; +} +KBASE_EXPORT_TEST_API(kbase_has_exec_va_zone); + +/** + * kbase_region_tracker_has_allocs - Determine if any allocations have been made + * on a context's region tracker + * + * @kctx: KBase context + * + * Check the context to determine if any allocations have been made yet from + * any of its zones. This check should be done before resizing a zone, e.g. to + * make space to add a second zone. + * + * Whilst a zone without allocations can be resized whilst other zones have + * allocations, we still check all of @kctx 's zones anyway: this is a stronger + * guarantee and should be adhered to when creating new zones anyway. + * + * Allocations from kbdev zones are not counted. + * + * Return: true if any allocs exist on any zone, false otherwise + */ +static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) +{ + unsigned int zone_idx; + + lockdep_assert_held(&kctx->reg_lock); + + for (zone_idx = 0; zone_idx < MEMORY_ZONE_MAX; zone_idx++) { + struct kbase_reg_zone *zone; + struct kbase_va_region *reg; + u64 zone_base_addr; + enum kbase_memory_zone reg_zone; + + if (!kbase_is_ctx_reg_zone(zone_idx)) + continue; + + zone = kbase_ctx_reg_zone_get(kctx, zone_idx); + zone_base_addr = zone->base_pfn << PAGE_SHIFT; + + reg = kbase_region_tracker_find_region_base_address(kctx, zone_base_addr); + + if (!zone->va_size_pages) { + WARN(reg, + "Should not have found a region that starts at 0x%.16llx for zone %s", + (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx)); + continue; + } + + if (WARN(!reg, + "There should always be a region that starts at 0x%.16llx for zone %s, couldn't find it", + (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx))) + return true; /* Safest return value */ + + reg_zone = kbase_bits_to_zone(reg->flags); + if (WARN(reg_zone != zone_idx, + "The region that starts at 0x%.16llx should be in zone %s but was found in the wrong zone %s", + (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx), + kbase_reg_zone_get_name(reg_zone))) + return true; /* Safest return value */ + + /* Unless the region is completely free, of the same size as + * the original zone, then it has allocs + */ + if ((!(reg->flags & KBASE_REG_FREE)) || (reg->nr_pages != zone->va_size_pages)) + return true; + } + + /* All zones are the same size as originally made, so there are no + * allocs + */ + return false; +} + +static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, u64 jit_va_pages) +{ + struct kbase_va_region *same_va_reg; + struct kbase_reg_zone *same_va_zone, *custom_va_zone; + u64 same_va_zone_base_addr; + u64 jit_va_start; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Modify the same VA free region after creation. The caller has + * ensured that allocations haven't been made, as any allocations could + * cause an overlap to happen with existing same VA allocations and the + * custom VA zone. + */ + same_va_zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE); + same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT; + + same_va_reg = kbase_region_tracker_find_region_base_address(kctx, same_va_zone_base_addr); + if (WARN(!same_va_reg, + "Already found a free region at the start of every zone, but now cannot find any region for zone SAME_VA base 0x%.16llx", + (unsigned long long)same_va_zone_base_addr)) + return -ENOMEM; + + /* kbase_region_tracker_has_allocs() in the caller has already ensured + * that all of the zones have no allocs, so no need to check that again + * on same_va_reg + */ + WARN_ON((!(same_va_reg->flags & KBASE_REG_FREE)) || + same_va_reg->nr_pages != same_va_zone->va_size_pages); + + if (same_va_reg->nr_pages < jit_va_pages || same_va_zone->va_size_pages < jit_va_pages) + return -ENOMEM; + + /* It's safe to adjust the same VA zone now */ + same_va_reg->nr_pages -= jit_va_pages; + same_va_zone->va_size_pages -= jit_va_pages; + jit_va_start = kbase_reg_zone_end_pfn(same_va_zone); + + /* + * Create a custom VA zone at the end of the VA for allocations which + * JIT can use so it doesn't have to allocate VA from the kernel. Note + * that while the zone has already been zero-initialized during the + * region tracker initialization, we can just overwrite it. + */ + custom_va_zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE); + if (kbase_reg_zone_init(kctx->kbdev, custom_va_zone, CUSTOM_VA_ZONE, jit_va_start, + jit_va_pages)) + return -ENOMEM; + + return 0; +} + +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, int max_allocations, + int trim_level, int group_id, u64 phys_pages_limit) +{ + int err = 0; + + if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL) + return -EINVAL; + + if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) + return -EINVAL; + + if (phys_pages_limit > jit_va_pages) + return -EINVAL; + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (phys_pages_limit != jit_va_pages) + kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + kbase_gpu_vm_lock(kctx); + + /* Verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) { + err = -EINVAL; + goto exit_unlock; + } + + /* If in 64-bit, we always lookup the SAME_VA zone. To ensure it has no + * allocs, we can ensure there are no allocs anywhere. + * + * This check is also useful in 32-bit, just to make sure init of the + * zone is always done before any allocs. + */ + if (kbase_region_tracker_has_allocs(kctx)) { + err = -ENOMEM; + goto exit_unlock; + } + + if (!kbase_ctx_compat_mode(kctx)) + err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); + /* + * Nothing to do for 32-bit clients, JIT uses the existing + * custom VA zone. + */ + + if (!err) { + kctx->jit_max_allocations = max_allocations; + kctx->trim_level = trim_level; + kctx->jit_va = true; + kctx->jit_group_id = group_id; +#if MALI_JIT_PRESSURE_LIMIT_BASE + kctx->jit_phys_pages_limit = phys_pages_limit; + dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", phys_pages_limit); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + } + +exit_unlock: + kbase_gpu_vm_unlock(kctx); + + return err; +} +KBASE_EXPORT_TEST_API(kbase_region_tracker_init_jit); + +int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) +{ +#if !MALI_USE_CSF + struct kbase_reg_zone *exec_va_zone; + struct kbase_reg_zone *target_zone; + struct kbase_va_region *target_reg; + u64 target_zone_base_addr; + enum kbase_memory_zone target_zone_id; + u64 exec_va_start; + int err; +#endif + + /* The EXEC_VA zone shall be created by making space either: + * - for 64-bit clients, at the end of the process's address space + * - for 32-bit clients, in the CUSTOM zone + * + * Firstly, verify that the number of EXEC_VA pages requested by the + * client is reasonable and then make sure that it is not greater than + * the address space itself before calculating the base address of the + * new zone. + */ + if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) + return -EINVAL; + +#if MALI_USE_CSF + /* For CSF GPUs we now setup the EXEC_VA zone during initialization, + * so this request is a null-op. + */ + CSTD_UNUSED(kctx); + return 0; +#else + kbase_gpu_vm_lock(kctx); + + /* Verify that we've not already created a EXEC_VA zone, and that the + * EXEC_VA zone must come before JIT's CUSTOM_VA. + */ + if (kbase_has_exec_va_zone_locked(kctx) || kctx->jit_va) { + err = -EPERM; + goto exit_unlock; + } + + if (exec_va_pages > kctx->gpu_va_end) { + err = -ENOMEM; + goto exit_unlock; + } + + /* Verify no allocations have already been made */ + if (kbase_region_tracker_has_allocs(kctx)) { + err = -ENOMEM; + goto exit_unlock; + } + + if (kbase_ctx_compat_mode(kctx)) { + /* 32-bit client: take from CUSTOM_VA zone */ + target_zone_id = CUSTOM_VA_ZONE; + } else { + /* 64-bit client: take from SAME_VA zone */ + target_zone_id = SAME_VA_ZONE; + } + + target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_id); + target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; + + target_reg = kbase_region_tracker_find_region_base_address(kctx, target_zone_base_addr); + if (WARN(!target_reg, + "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone %s", + (unsigned long long)target_zone_base_addr, + kbase_reg_zone_get_name(target_zone_id))) { + err = -ENOMEM; + goto exit_unlock; + } + /* kbase_region_tracker_has_allocs() above has already ensured that all + * of the zones have no allocs, so no need to check that again on + * target_reg + */ + WARN_ON((!(target_reg->flags & KBASE_REG_FREE)) || + target_reg->nr_pages != target_zone->va_size_pages); + + if (target_reg->nr_pages <= exec_va_pages || target_zone->va_size_pages <= exec_va_pages) { + err = -ENOMEM; + goto exit_unlock; + } + + /* Taken from the end of the target zone */ + exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; + exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); + if (kbase_reg_zone_init(kctx->kbdev, exec_va_zone, EXEC_VA_ZONE, exec_va_start, + exec_va_pages)) + return -ENOMEM; + + /* Update target zone and corresponding region */ + target_reg->nr_pages -= exec_va_pages; + target_zone->va_size_pages -= exec_va_pages; + err = 0; + +exit_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +#endif /* MALI_USE_CSF */ +} +KBASE_EXPORT_TEST_API(kbase_region_tracker_init_exec); + +#if MALI_USE_CSF +void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev) +{ + kbase_reg_zone_term(&kbdev->csf.mcu_shared_zone); +} + +int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) +{ + return kbase_reg_zone_init(kbdev, &kbdev->csf.mcu_shared_zone, MCU_SHARED_ZONE, + KBASE_REG_ZONE_MCU_SHARED_BASE, MCU_SHARED_ZONE_SIZE); +} +#endif + +/** + * kbase_alloc_free_region - Allocate a free region object. + * + * @zone: The memory zone the new region object will be part of. + * @start_pfn: The Page Frame Number in GPU virtual address space. + * @nr_pages: The size of the region in pages. + * + * The allocated object is not part of any list yet, and is flagged as + * KBASE_REG_FREE. No mapping is allocated yet. + * + * zone is CUSTOM_VA_ZONE or SAME_VA_ZONE. + * + * Return: pointer to the allocated region object on success, NULL otherwise. + */ +struct kbase_va_region *kbase_alloc_free_region(struct kbase_reg_zone *zone, u64 start_pfn, + size_t nr_pages) +{ + struct kbase_va_region *new_reg; + + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); + + if (unlikely(!nr_pages)) + return NULL; + + if (WARN_ON(!zone)) + return NULL; + + if (unlikely(!zone->base_pfn || !zone->va_size_pages)) + return NULL; + + new_reg = kmem_cache_zalloc(zone->cache, GFP_KERNEL); + + if (!new_reg) + return NULL; + + kbase_refcount_set(&new_reg->va_refcnt, 1); + atomic_set(&new_reg->no_user_free_count, 0); + new_reg->cpu_alloc = NULL; /* no alloc bound yet */ + new_reg->gpu_alloc = NULL; /* no alloc bound yet */ + new_reg->rbtree = &zone->reg_rbtree; + new_reg->flags = kbase_zone_to_bits(zone->id) | KBASE_REG_FREE; + + new_reg->flags |= KBASE_REG_GROWABLE; + + new_reg->start_pfn = start_pfn; + new_reg->nr_pages = nr_pages; + + INIT_LIST_HEAD(&new_reg->jit_node); + INIT_LIST_HEAD(&new_reg->link); + + return new_reg; +} +KBASE_EXPORT_TEST_API(kbase_alloc_free_region); + +struct kbase_va_region *kbase_ctx_alloc_free_region(struct kbase_context *kctx, + enum kbase_memory_zone id, u64 start_pfn, + size_t nr_pages) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, id); + + return kbase_alloc_free_region(zone, start_pfn, nr_pages); +} +KBASE_EXPORT_TEST_API(kbase_ctx_alloc_free_region); + +/** + * kbase_free_alloced_region - Free a region object. + * + * @reg: Region + * + * The described region must be freed of any mapping. + * + * If the region is not flagged as KBASE_REG_FREE, the region's + * alloc object will be released. + * It is a bug if no alloc object exists for non-free regions. + * + * If region is MCU_SHARED_ZONE it is freed + */ +void kbase_free_alloced_region(struct kbase_va_region *reg) +{ +#if MALI_USE_CSF + if (kbase_bits_to_zone(reg->flags) == MCU_SHARED_ZONE) { + kfree(reg); + return; + } +#endif + if (!(reg->flags & KBASE_REG_FREE)) { + struct kbase_context *kctx = kbase_reg_to_kctx(reg); + + if (WARN_ON(!kctx)) + return; + + if (WARN_ON(kbase_is_region_invalid(reg))) + return; + + dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n of zone %s", (void *)reg, + kbase_reg_zone_get_name(kbase_bits_to_zone(reg->flags))); +#if MALI_USE_CSF + if (reg->flags & KBASE_REG_CSF_EVENT) + /* + * This should not be reachable if called from 'mcu_shared' functions + * such as: + * kbase_csf_firmware_mcu_shared_mapping_init + * kbase_csf_firmware_mcu_shared_mapping_term + */ + + kbase_unlink_event_mem_page(kctx, reg); +#endif + + mutex_lock(&kctx->jit_evict_lock); + + /* + * The physical allocation should have been removed from the + * eviction list before this function is called. However, in the + * case of abnormal process termination or the app leaking the + * memory kbase_mem_free_region is not called so it can still be + * on the list at termination time of the region tracker. + */ + if (!list_empty(®->gpu_alloc->evict_node)) { + /* + * Unlink the physical allocation before unmaking it + * evictable so that the allocation isn't grown back to + * its last backed size as we're going to unmap it + * anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + mutex_unlock(&kctx->jit_evict_lock); + + /* + * If a region has been made evictable then we must + * unmake it before trying to free it. + * If the memory hasn't been reclaimed it will be + * unmapped and freed below, if it has been reclaimed + * then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } else { + mutex_unlock(&kctx->jit_evict_lock); + } + + /* + * Remove the region from the sticky resource metadata + * list should it be there. + */ + kbase_sticky_resource_release_force(kctx, NULL, reg->start_pfn << PAGE_SHIFT); + + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); + + reg->flags |= KBASE_REG_VA_FREED; + kbase_va_region_alloc_put(kctx, reg); + } else { + kfree(reg); + } +} +KBASE_EXPORT_TEST_API(kbase_free_alloced_region); + +int kbase_reg_zone_init(struct kbase_device *kbdev, struct kbase_reg_zone *zone, + enum kbase_memory_zone id, u64 base_pfn, u64 va_size_pages) +{ + struct kbase_va_region *reg; + + *zone = (struct kbase_reg_zone){ .reg_rbtree = RB_ROOT, + .base_pfn = base_pfn, + .va_size_pages = va_size_pages, + .id = id, + .cache = kbdev->va_region_slab }; + + if (unlikely(!va_size_pages)) + return 0; + + reg = kbase_alloc_free_region(zone, base_pfn, va_size_pages); + if (unlikely(!reg)) + return -ENOMEM; + + kbase_region_tracker_insert(reg); + + return 0; +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_reg_track.h b/drivers/gpu/arm/bifrost/mali_kbase_reg_track.h new file mode 100644 index 000000000000..d29999fdcb9a --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_reg_track.h @@ -0,0 +1,443 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ +#ifndef _KBASE_REG_TRACK_H_ +#define _KBASE_REG_TRACK_H_ + +#include +#include + +/* Forward declarations of required types. To avoid increasing the compilation + * times of files that include this header, we want to avoid getting too many + * transitive dependencies on both custom and kernel headers. + */ +struct kbase_context; +struct kbase_va_region; +struct kbase_device; +struct kmem_cache; + +#if MALI_USE_CSF +/* Space for 8 different zones */ +#define KBASE_REG_ZONE_BITS 3 +#else +/* Space for 4 different zones */ +#define KBASE_REG_ZONE_BITS 2 +#endif + +/** + * KBASE_REG_ZONE_MAX - Maximum number of GPU memory region zones + */ +#if MALI_USE_CSF +#define KBASE_REG_ZONE_MAX 6ul +#else +#define KBASE_REG_ZONE_MAX 4ul +#endif + +/* The bits 11-13 (inclusive) of the kbase_va_region flag are reserved + * for information about the zone in which it was allocated. + */ +#define KBASE_REG_ZONE_SHIFT (11ul) +#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << KBASE_REG_ZONE_SHIFT) + +#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS) +#error "Too many zones for the number of zone bits defined" +#endif + +#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT) + +#if MALI_USE_CSF +/* only used with 32-bit clients */ +/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43). + */ +#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) +#else +/* only used with 32-bit clients */ +/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the + * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44 + * bytes, see mmap64 man page for reference). So we put the default limit to the + * maximum possible on Linux and shrink it down, if required by the GPU, during + * initialization. + */ +#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) +/* end 32-bit clients only */ +#endif + +/* The starting address and size of the GPU-executable zone are dynamic + * and depend on the platform and the number of pages requested by the + * user process, with an upper limit of 4 GB. + */ +#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ +#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES + +#if MALI_USE_CSF +#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT) +#define MCU_SHARED_ZONE_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - KBASE_REG_ZONE_MCU_SHARED_BASE) + +/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit + * clients, and 2^43 for 32-bit clients. + */ +#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT) +#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT) + +/* Executable zone supporting FIXED/FIXABLE allocations. + * It is always 4GB in size. + */ +#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES + +/* Non-executable zone supporting FIXED/FIXABLE allocations. + * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from + * (2^43) up to (2^44)-1 for 32-bit userspace clients. For the same reason, + * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1. + */ +#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT) +#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT) + +#endif + +/** + * enum kbase_memory_zone - Kbase memory zone identifier + * @SAME_VA_ZONE: Memory zone for allocations where the GPU and CPU VA coincide. + * @CUSTOM_VA_ZONE: When operating in compatibility mode, this zone is used to + * allow 32-bit userspace (either on a 32-bit device or a + * 32-bit application on a 64-bit device) to address the entirety + * of the GPU address space. The @CUSTOM_VA_ZONE is also used + * for JIT allocations: on 64-bit systems, the zone is created + * by reducing the size of the SAME_VA zone by a user-controlled + * amount, whereas on 32-bit systems, it is created as part of + * the existing CUSTOM_VA_ZONE + * @EXEC_VA_ZONE: Memory zone used to track GPU-executable memory. The start + * and end of this zone depend on the individual platform, + * and it is initialized upon user process request. + * @EXEC_FIXED_VA_ZONE: Memory zone used to contain GPU-executable memory + * that also permits FIXED/FIXABLE allocations. + * @FIXED_VA_ZONE: Memory zone used to allocate memory at userspace-supplied + * addresses. + * @MCU_SHARED_ZONE: Memory zone created for mappings shared between the MCU + * and Kbase. Currently this is the only zone type that is + * created on a per-device, rather than a per-context + * basis. + * @MEMORY_ZONE_MAX: Sentinel value used for iterating over all the memory zone + * identifiers. + * @CONTEXT_ZONE_MAX: Sentinel value used to keep track of the last per-context + * zone for iteration. + */ +enum kbase_memory_zone { + SAME_VA_ZONE, + CUSTOM_VA_ZONE, + EXEC_VA_ZONE, +#if IS_ENABLED(MALI_USE_CSF) + EXEC_FIXED_VA_ZONE, + FIXED_VA_ZONE, + MCU_SHARED_ZONE, +#endif + MEMORY_ZONE_MAX, +#if IS_ENABLED(MALI_USE_CSF) + CONTEXT_ZONE_MAX = FIXED_VA_ZONE + 1 +#else + CONTEXT_ZONE_MAX = EXEC_VA_ZONE + 1 +#endif +}; + +/** + * struct kbase_reg_zone - GPU memory zone information and region tracking + * @reg_rbtree: RB tree used to track kbase memory regions. + * @base_pfn: Page Frame Number in GPU virtual address space for the start of + * the Zone + * @va_size_pages: Size of the Zone in pages + * @id: Memory zone identifier + * @cache: Pointer to a per-device slab allocator to allow for quickly allocating + * new regions + * + * Track information about a zone KBASE_REG_ZONE() and related macros. + * In future, this could also store the &rb_root that are currently in + * &kbase_context and &kbase_csf_device. + */ +struct kbase_reg_zone { + struct rb_root reg_rbtree; + u64 base_pfn; + u64 va_size_pages; + enum kbase_memory_zone id; + struct kmem_cache *cache; +}; + +/** + * kbase_zone_to_bits - Convert a memory zone @zone to the corresponding + * bitpattern, for ORing together with other flags. + * @zone: Memory zone + * + * Return: Bitpattern with the appropriate bits set. + */ +unsigned long kbase_zone_to_bits(enum kbase_memory_zone zone); + +/** + * kbase_bits_to_zone - Convert the bitpattern @zone_bits to the corresponding + * zone identifier + * @zone_bits: Memory allocation flag containing a zone pattern + * + * Return: Zone identifier for valid zone bitpatterns, + */ +enum kbase_memory_zone kbase_bits_to_zone(unsigned long zone_bits); + +/** + * kbase_mem_zone_get_name - Get the string name for a given memory zone + * @zone: Memory zone identifier + * + * Return: string for valid memory zone, NULL otherwise + */ +char *kbase_reg_zone_get_name(enum kbase_memory_zone zone); + +/** + * kbase_is_ctx_reg_zone - Determine whether a zone is associated with a + * context or with the device + * @zone: Zone identifier + * + * Return: True if @zone is a context zone, False otherwise + */ +static inline bool kbase_is_ctx_reg_zone(enum kbase_memory_zone zone) +{ +#if MALI_USE_CSF + return !(zone == MCU_SHARED_ZONE); +#else + return true; +#endif +} + +/** + * kbase_region_tracker_init - Initialize the region tracker data structure + * @kctx: kbase context + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init(struct kbase_context *kctx); + +/** + * kbase_region_tracker_init_jit - Initialize the just-in-time memory + * allocation region + * @kctx: Kbase context. + * @jit_va_pages: Size of the JIT region in pages. + * @max_allocations: Maximum number of allocations allowed for the JIT region. + * Valid range is 0..%BASE_JIT_ALLOC_COUNT. + * @trim_level: Trim level for the JIT region. + * Valid range is 0..%BASE_JIT_MAX_TRIM_LEVEL. + * @group_id: The physical group ID from which to allocate JIT memory. + * Valid range is 0..(%MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @phys_pages_limit: Maximum number of physical pages to use to back the JIT + * region. Must not exceed @jit_va_pages. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, int max_allocations, + int trim_level, int group_id, u64 phys_pages_limit); + +/** + * kbase_region_tracker_init_exec - Initialize the GPU-executable memory region + * @kctx: kbase context + * @exec_va_pages: Size of the JIT region in pages. + * It must not be greater than 4 GB. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages); + +/** + * kbase_region_tracker_term - Terminate the JIT region + * @kctx: kbase context + */ +void kbase_region_tracker_term(struct kbase_context *kctx); + +/** + * kbase_region_tracker_term - Terminate the JIT region + * @kctx: kbase context + */ +void kbase_region_tracker_term(struct kbase_context *kctx); + +/** + * kbase_region_tracker_find_region_enclosing_address - Find the region containing + * a given GPU VA. + * + * @kctx: kbase context containing the region + * @gpu_addr: pointer to check + * + * Context: must be called with region lock held. + * + * Return: pointer to the valid region on success, NULL otherwise + * + */ +struct kbase_va_region * +kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr); + +/** + * kbase_region_tracker_find_region_base_address - Check that a pointer is + * actually a valid region. + * @kctx: kbase context containing the region + * @gpu_addr: pointer to check + * + * Must be called with context lock held. + * + * Return: pointer to the valid region on success, NULL otherwise + */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, + u64 gpu_addr); + +/** + * kbase_remove_va_region - Remove a region object from the global list. + * + * @kbdev: The kbase device + * @reg: Region object to remove + * + * The region reg is removed, possibly by merging with other free and + * compatible adjacent regions. It must be called with the context + * region lock held. The associated memory is not released (see + * kbase_free_alloced_region). Internal use only. + */ +void kbase_remove_va_region(struct kbase_device *kbdev, struct kbase_va_region *reg); + +/** + * kbase_reg_to_kctx - Obtain the kbase context tracking a VA region. + * @reg: VA region + * + * Return: + * * pointer to kbase context of the memory allocation + * * NULL if the region does not belong to a kbase context (for instance, + * if the allocation corresponds to a shared MCU region on CSF). + */ +struct kbase_context *kbase_reg_to_kctx(struct kbase_va_region *reg); + +struct kbase_va_region *kbase_alloc_free_region(struct kbase_reg_zone *zone, u64 start_pfn, + size_t nr_pages); + +struct kbase_va_region *kbase_ctx_alloc_free_region(struct kbase_context *kctx, + enum kbase_memory_zone id, u64 start_pfn, + size_t nr_pages); + +/** + * kbase_add_va_region - Add a VA region to the region list for a context. + * + * @kctx: kbase context containing the region + * @reg: the region to add + * @addr: the address to insert the region at + * @nr_pages: the number of pages in the region + * @align: the minimum alignment in pages + * + * Return: 0 on success, error code otherwise. + */ +int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, + size_t nr_pages, size_t align); + +/** + * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree + * + * @kbdev: The kbase device + * @reg: The region to add + * @addr: The address to add the region at, or 0 to map at any available address + * @nr_pages: The size of the region in pages + * @align: The minimum alignment in pages + * + * Insert a region into the rbtree that was specified when the region was + * created. If addr is 0 a free area in the rbtree is used, otherwise the + * specified address is used. + * + * Note that this method should be removed when we get the per-zone locks, as + * there will be no compelling use-case for manually separating the allocation + * and the tracking operations. + * + * Return: 0 on success, error code otherwise. + */ +int kbase_add_va_region_rbtree(struct kbase_device *kbdev, struct kbase_va_region *reg, u64 addr, + size_t nr_pages, size_t align); + +/** + * kbase_free_alloced_region - Free a region object. + * + * @reg: VA region + * + * The indicated region must be freed of any mapping. Regions with the following + * flags have special handling: + * * + * + * If the region is not flagged as KBASE_REG_FREE, the region's + * alloc object will be released. + * It is a bug if no alloc object exists for non-free regions. + * + * If region is MCU_SHARED it is freed. + */ +void kbase_free_alloced_region(struct kbase_va_region *reg); + +/** + * kbase_reg_zone_init - Initialize a zone in @kctx + * @kbdev: Pointer to kbase device in order to initialize the VA region cache + * @zone: Memory zone + * @id: Memory zone identifier to facilitate lookups + * @base_pfn: Page Frame Number in GPU virtual address space for the start of + * the Zone + * @va_size_pages: Size of the Zone in pages + * + * Return: + * * 0 on success + * * -ENOMEM on error + */ +int kbase_reg_zone_init(struct kbase_device *kbdev, struct kbase_reg_zone *zone, + enum kbase_memory_zone id, u64 base_pfn, u64 va_size_pages); + +void kbase_reg_zone_term(struct kbase_reg_zone *zone); + +/** + * kbase_ctx_reg_zone_get_nolock - Get a zone from @kctx where the caller does + * not have @kctx 's region lock + * @kctx: Pointer to kbase context + * @zone: Zone identifier + * + * This should only be used in performance-critical paths where the code is + * resilient to a race with the zone changing, and only when the zone is tracked + * by the @kctx. + * + * Return: The zone corresponding to @zone + */ +struct kbase_reg_zone *kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx, + enum kbase_memory_zone zone); + +/** + * kbase_ctx_reg_zone_get - Get a memory zone from @kctx + * @kctx: Pointer to kbase context + * @zone: Zone identifier + * + * Note that the zone is not refcounted, so there is no corresponding operation to + * put the zone back. + * + * Return: The zone corresponding to @zone + */ +struct kbase_reg_zone *kbase_ctx_reg_zone_get(struct kbase_context *kctx, + enum kbase_memory_zone zone); + +/** + * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone + * @zone: zone to query + * + * Return: The end of the zone corresponding to @zone + */ +static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone) +{ + if (WARN_ON(!zone)) + return 0; + + return zone->base_pfn + zone->va_size_pages; +} + +#endif /* _KBASE_REG_TRACK_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c index 147082c15fed..d3c030fe6915 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2016, 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -93,8 +93,7 @@ void kbase_io_history_term(struct kbase_io_history *h) h->buf = NULL; } -void kbase_io_history_add(struct kbase_io_history *h, - void __iomem const *addr, u32 value, u8 write) +void kbase_io_history_add(struct kbase_io_history *h, void __iomem const *addr, u32 value, u8 write) { struct kbase_io_access *io; unsigned long flags; @@ -128,15 +127,13 @@ void kbase_io_history_dump(struct kbase_device *kbdev) dev_err(kbdev->dev, "Register IO History:"); iters = (h->size > h->count) ? h->count : h->size; - dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, - h->count); + dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, h->count); for (i = 0; i < iters; ++i) { - struct kbase_io_access *io = - &h->buf[(h->count - iters + i) % h->size]; + struct kbase_io_access *io = &h->buf[(h->count - iters + i) % h->size]; char const access = (io->addr & 1) ? 'w' : 'r'; - dev_err(kbdev->dev, "%6zu: %c: reg 0x%016lx val %08x\n", i, - access, (unsigned long)(io->addr & ~0x1), io->value); + dev_err(kbdev->dev, "%6zu: %c: reg 0x%16pK val %08x\n", i, access, + (void *)(io->addr & ~0x1), io->value); } spin_unlock_irqrestore(&h->lock, flags); @@ -178,6 +175,8 @@ static int regs_history_show(struct seq_file *sfile, void *data) size_t iters; unsigned long flags; + CSTD_UNUSED(data); + if (!h->enabled) { seq_puts(sfile, "The register access history is disabled\n"); goto out; @@ -186,15 +185,13 @@ static int regs_history_show(struct seq_file *sfile, void *data) spin_lock_irqsave(&h->lock, flags); iters = (h->size > h->count) ? h->count : h->size; - seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, - h->count); + seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, h->count); for (i = 0; i < iters; ++i) { - struct kbase_io_access *io = - &h->buf[(h->count - iters + i) % h->size]; + struct kbase_io_access *io = &h->buf[(h->count - iters + i) % h->size]; char const access = (io->addr & 1) ? 'w' : 'r'; - seq_printf(sfile, "%6zu: %c: reg 0x%016lx val %08x\n", i, - access, (unsigned long)(io->addr & ~0x1), io->value); + seq_printf(sfile, "%6zu: %c: reg 0x%16pK val %08x\n", i, access, + (void *)(io->addr & ~0x1), io->value); } spin_unlock_irqrestore(&h->lock, flags); @@ -226,14 +223,11 @@ static const struct file_operations regs_history_fops = { void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) { - debugfs_create_bool("regs_history_enabled", 0644, - kbdev->mali_debugfs_directory, - &kbdev->io_history.enabled); - debugfs_create_file("regs_history_size", 0644, - kbdev->mali_debugfs_directory, - &kbdev->io_history, ®s_history_size_fops); - debugfs_create_file("regs_history", 0444, - kbdev->mali_debugfs_directory, &kbdev->io_history, - ®s_history_fops); + debugfs_create_bool("regs_history_enabled", 0644, kbdev->mali_debugfs_directory, + &kbdev->io_history.enabled); + debugfs_create_file("regs_history_size", 0644, kbdev->mali_debugfs_directory, + &kbdev->io_history, ®s_history_size_fops); + debugfs_create_file("regs_history", 0444, kbdev->mali_debugfs_directory, &kbdev->io_history, + ®s_history_fops); } #endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h index ae327dd799a4..938fcd4f4d43 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2016, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -81,4 +81,4 @@ void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); #endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ +#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h b/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h index 48ea9954b17c..035b1b2bda3e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -163,8 +163,7 @@ void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev); * - false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, - unsigned int flags); +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, unsigned int flags); /** * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. diff --git a/drivers/gpu/arm/bifrost/mali_kbase_smc.c b/drivers/gpu/arm/bifrost/mali_kbase_smc.c index abbe8d56d082..d609e5dbc27c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_smc.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_smc.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,25 +35,20 @@ * string is meant to be concatenated with the inline asm string and will * cause compilation to stop on mismatch. (for details, see gcc PR 15089) */ -#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" +#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" #endif -static noinline u64 invoke_smc_fid(u64 function_id, - u64 arg0, u64 arg1, u64 arg2) +static noinline u64 invoke_smc_fid(u64 function_id, u64 arg0, u64 arg1, u64 arg2) { register u64 x0 asm("x0") = function_id; register u64 x1 asm("x1") = arg0; register u64 x2 asm("x2") = arg1; register u64 x3 asm("x3") = arg2; - asm volatile( - __asmeq("%0", "x0") - __asmeq("%1", "x1") - __asmeq("%2", "x2") - __asmeq("%3", "x3") - "smc #0\n" - : "+r" (x0) - : "r" (x1), "r" (x2), "r" (x3)); + asm volatile(__asmeq("%0", "x0") __asmeq("%1", "x1") __asmeq("%2", "x2") + __asmeq("%3", "x3") "smc #0\n" + : "+r"(x0) + : "r"(x1), "r"(x2), "r"(x3)); return x0; } @@ -68,8 +63,7 @@ u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) return invoke_smc_fid(fid, arg0, arg1, arg2); } -u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, - u64 arg0, u64 arg1, u64 arg2) +u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, u64 arg0, u64 arg1, u64 arg2) { u32 fid = 0; @@ -87,4 +81,3 @@ u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, } #endif /* CONFIG_ARM64 */ - diff --git a/drivers/gpu/arm/bifrost/mali_kbase_smc.h b/drivers/gpu/arm/bifrost/mali_kbase_smc.h index 40a348388598..b9f224f7ae51 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_smc.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_smc.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,7 +34,6 @@ #define SMC_OEN_SIP (2 << SMC_OEN_OFFSET) #define SMC_OEN_STD (4 << SMC_OEN_OFFSET) - /** * kbase_invoke_smc_fid - Perform a secure monitor call * @fid: The SMC function to call, see SMC Calling convention. @@ -61,8 +60,7 @@ u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2); * * Return: the return value from the SMC call. */ -u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, - u64 arg0, u64 arg1, u64 arg2); +u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, u64 arg0, u64 arg1, u64 arg2); #endif /* CONFIG_ARM64 */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c index a9312a0c433e..bbd756dbf840 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,6 +39,7 @@ #include #include #include +#include #if !MALI_USE_CSF /** @@ -82,21 +83,17 @@ static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) /* Schedule timeout of this atom after a period if it is not active */ if (!timer_pending(&kctx->soft_job_timeout)) { - int timeout_ms = atomic_read( - &kctx->kbdev->js_data.soft_job_timeout_ms); - mod_timer(&kctx->soft_job_timeout, - jiffies + msecs_to_jiffies(timeout_ms)); + int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); + mod_timer(&kctx->soft_job_timeout, jiffies + msecs_to_jiffies(timeout_ms)); } } -static int kbasep_read_soft_event_status( - struct kbase_context *kctx, u64 evt, unsigned char *status) +static int kbasep_read_soft_event_status(struct kbase_context *kctx, u64 evt, unsigned char *status) { unsigned char *mapped_evt; struct kbase_vmap_struct map; - mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), - KBASE_REG_CPU_RD, &map); + mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), KBASE_REG_CPU_RD, &map); if (!mapped_evt) return -EFAULT; @@ -107,18 +104,16 @@ static int kbasep_read_soft_event_status( return 0; } -static int kbasep_write_soft_event_status( - struct kbase_context *kctx, u64 evt, unsigned char new_status) +static int kbasep_write_soft_event_status(struct kbase_context *kctx, u64 evt, + unsigned char new_status) { unsigned char *mapped_evt; struct kbase_vmap_struct map; - if ((new_status != BASE_JD_SOFT_EVENT_SET) && - (new_status != BASE_JD_SOFT_EVENT_RESET)) + if ((new_status != BASE_JD_SOFT_EVENT_SET) && (new_status != BASE_JD_SOFT_EVENT_RESET)) return -EINVAL; - mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), - KBASE_REG_CPU_WR, &map); + mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), KBASE_REG_CPU_WR, &map); if (!mapped_evt) return -EFAULT; @@ -150,7 +145,8 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) #ifdef CONFIG_MALI_ARBITER_SUPPORT atomic_inc(&kctx->kbdev->pm.gpu_users_waiting); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ - pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); + pm_active_err = kbase_pm_context_active_handle_suspend( + kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); if (pm_active_err) { struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; @@ -172,8 +168,7 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) atomic_dec(&kctx->kbdev->pm.gpu_users_waiting); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ - kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, - &ts); + kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, &ts); kbase_pm_context_idle(kctx->kbdev); @@ -221,8 +216,7 @@ void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom) static void kbasep_soft_event_complete_job(struct work_struct *work) { - struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, - work); + struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, work); struct kbase_context *kctx = katom->kctx; int resched; @@ -242,8 +236,7 @@ void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - struct kbase_jd_atom *katom = list_entry( - entry, struct kbase_jd_atom, queue); + struct kbase_jd_atom *katom = list_entry(entry, struct kbase_jd_atom, queue); switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_EVENT_WAIT: @@ -251,10 +244,8 @@ void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) list_del(&katom->queue); katom->event_code = BASE_JD_EVENT_DONE; - INIT_WORK(&katom->work, - kbasep_soft_event_complete_job); - queue_work(kctx->jctx.job_done_wq, - &katom->work); + INIT_WORK(&katom->work, kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, &katom->work); } else { /* There are still other waiting jobs, we cannot * cancel the timer yet. @@ -293,17 +284,15 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) dep->status == KBASE_JD_ATOM_STATE_COMPLETED) continue; - if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) - == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { + if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == + BASE_JD_REQ_SOFT_FENCE_TRIGGER) { /* Found blocked trigger fence. */ struct kbase_sync_fence_info info; if (!kbase_sync_fence_in_info_get(dep, &info)) { dev_warn(dev, "\tVictim trigger atom %d fence [%pK] %s: %s\n", - kbase_jd_atom_id(kctx, dep), - info.fence, - info.name, + kbase_jd_atom_id(kctx, dep), info.fence, info.name, kbase_sync_status_string(info.status)); } } @@ -329,12 +318,9 @@ static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) return; } - dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%pK] after %dms\n", - kctx->tgid, kctx->id, - kbase_jd_atom_id(kctx, katom), - info.fence, timeout_ms); - dev_warn(dev, "\tGuilty fence [%pK] %s: %s\n", - info.fence, info.name, + dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%pK] after %dms\n", kctx->tgid, + kctx->id, kbase_jd_atom_id(kctx, katom), info.fence, timeout_ms); + dev_warn(dev, "\tGuilty fence [%pK] %s: %s\n", info.fence, info.name, kbase_sync_status_string(info.status)); /* Search for blocked trigger atoms */ @@ -352,8 +338,7 @@ struct kbase_fence_debug_work { static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) { - struct kbase_fence_debug_work *w = container_of(work, - struct kbase_fence_debug_work, work); + struct kbase_fence_debug_work *w = container_of(work, struct kbase_fence_debug_work, work); struct kbase_jd_atom *katom = w->katom; struct kbase_context *kctx = katom->kctx; @@ -384,10 +369,8 @@ static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) void kbasep_soft_job_timeout_worker(struct timer_list *timer) { - struct kbase_context *kctx = container_of(timer, struct kbase_context, - soft_job_timeout); - u32 timeout_ms = (u32)atomic_read( - &kctx->kbdev->js_data.soft_job_timeout_ms); + struct kbase_context *kctx = container_of(timer, struct kbase_context, soft_job_timeout); + u32 timeout_ms = (u32)atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); ktime_t cur_time = ktime_get_raw(); bool restarting = false; unsigned long lflags; @@ -395,10 +378,8 @@ void kbasep_soft_job_timeout_worker(struct timer_list *timer) spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - struct kbase_jd_atom *katom = list_entry(entry, - struct kbase_jd_atom, queue); - s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, - katom->start_timestamp)); + struct kbase_jd_atom *katom = list_entry(entry, struct kbase_jd_atom, queue); + s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, katom->start_timestamp)); if (elapsed_time < (s64)timeout_ms) { restarting = true; @@ -448,8 +429,7 @@ static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) return 1; } -static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, - unsigned char new_status) +static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, unsigned char new_status) { /* Complete jobs waiting on the same event */ struct kbase_context *kctx = katom->kctx; @@ -473,9 +453,7 @@ static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, * * Return: 0 on success, a negative error code on failure. */ -int kbase_soft_event_update(struct kbase_context *kctx, - u64 event, - unsigned char new_status) +int kbase_soft_event_update(struct kbase_context *kctx, u64 event, unsigned char new_status) { int err = 0; @@ -514,7 +492,7 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) kbase_gpu_vm_lock(katom->kctx); for (i = 0; i < nr; i++) { - int p; + uint p; struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc; if (!buffers[i].pages) @@ -531,8 +509,7 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) kfree(buffers[i].pages); if (gpu_alloc) { switch (gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { kbase_free_user_buffer(&buffers[i]); break; } @@ -575,8 +552,7 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) goto out_cleanup; } - ret = copy_from_user(user_buffers, user_structs, - sizeof(*user_buffers)*nr); + ret = copy_from_user(user_buffers, user_structs, sizeof(*user_buffers) * nr); if (ret) { ret = -EFAULT; goto out_cleanup; @@ -587,7 +563,7 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) u64 page_addr = addr & PAGE_MASK; u64 end_page_addr = addr + user_buffers[i].size - 1; u64 last_page_addr = end_page_addr & PAGE_MASK; - int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; + size_t nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1; int pinned_pages; struct kbase_va_region *reg; struct base_external_resource user_extres; @@ -608,15 +584,12 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) } buffers[i].size = user_buffers[i].size; - if (nr_pages > (KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD / - sizeof(struct page *))) { + if (nr_pages > (KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD / sizeof(struct page *))) { buffers[i].is_vmalloc = true; - buffers[i].pages = vzalloc(nr_pages * - sizeof(struct page *)); + buffers[i].pages = vzalloc(nr_pages * sizeof(struct page *)); } else { buffers[i].is_vmalloc = false; - buffers[i].pages = kcalloc(nr_pages, - sizeof(struct page *), GFP_KERNEL); + buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); } if (!buffers[i].pages) { @@ -624,10 +597,8 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) goto out_cleanup; } - pinned_pages = get_user_pages_fast(page_addr, - nr_pages, - 1, /* Write */ - buffers[i].pages); + pinned_pages = get_user_pages_fast(page_addr, nr_pages, 1, /* Write */ + buffers[i].pages); if (pinned_pages < 0) { /* get_user_pages_fast has failed - page array is not * valid. Don't try to release any pages. @@ -637,7 +608,7 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) ret = pinned_pages; goto out_cleanup; } - if (pinned_pages != nr_pages) { + if ((size_t)pinned_pages != nr_pages) { /* Adjust number of pages, so that we only attempt to * release pages in the array that we know are valid. */ @@ -655,11 +626,9 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) kbase_gpu_vm_lock(katom->kctx); reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, user_extres.ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + katom->kctx, user_extres.ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); - if (kbase_is_region_invalid_or_free(reg) || - reg->gpu_alloc == NULL) { + if (kbase_is_region_invalid_or_free(reg) || reg->gpu_alloc == NULL) { ret = -EINVAL; goto out_unlock; } @@ -667,12 +636,13 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); buffers[i].nr_extres_pages = reg->nr_pages; - if (reg->nr_pages*PAGE_SIZE != buffers[i].size) - dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); + if (reg->nr_pages * PAGE_SIZE != buffers[i].size) + dev_warn( + katom->kctx->kbdev->dev, + "Copy buffer is not of same size as the external resource to copy.\n"); switch (reg->gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; const unsigned long nr_pages = alloc->imported.user_buf.nr_pages; const unsigned long start = alloc->imported.user_buf.address; @@ -681,8 +651,8 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) ret = -EINVAL; goto out_unlock; } - buffers[i].extres_pages = kcalloc(nr_pages, - sizeof(struct page *), GFP_KERNEL); + buffers[i].extres_pages = + kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); if (!buffers[i].extres_pages) { ret = -ENOMEM; goto out_unlock; @@ -690,7 +660,7 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) kbase_gpu_vm_unlock(katom->kctx); ret = get_user_pages_fast(start, nr_pages, 0, buffers[i].extres_pages); kbase_gpu_vm_lock(katom->kctx); - if (ret != nr_pages) { + if (ret != (int)nr_pages) { /* Adjust number of pages, so that we only * attempt to release pages in the array that we * know are valid. @@ -729,8 +699,8 @@ out_cleanup: } #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE -static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, - unsigned long page_num, struct page **page) +static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, unsigned long page_num, + struct page **page) { struct sg_table *sgt = gpu_alloc->imported.umm.sgt; struct sg_page_iter sg_iter; @@ -749,7 +719,7 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, if (page_index == page_num) { *page = sg_page_iter_page(&sg_iter); - return kmap(*page); + return kbase_kmap(*page); } page_index++; } @@ -775,7 +745,7 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, unsigned int target_page_nr = 0; struct page **pages = buf_data->pages; u64 offset = buf_data->offset; - size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; + size_t extres_size = buf_data->nr_extres_pages * PAGE_SIZE; size_t to_copy = min(extres_size, buf_data->size); struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; int ret = 0; @@ -790,25 +760,22 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, } switch (gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - { - for (i = 0; i < buf_data->nr_extres_pages && - target_page_nr < buf_data->nr_pages; i++) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + for (i = 0; i < buf_data->nr_extres_pages && target_page_nr < buf_data->nr_pages; + i++) { struct page *pg = buf_data->extres_pages[i]; - void *extres_page = kmap(pg); - + void *extres_page = kbase_kmap(pg); if (extres_page) { - ret = kbase_mem_copy_to_pinned_user_pages( - pages, extres_page, &to_copy, - buf_data->nr_pages, - &target_page_nr, offset); - kunmap(pg); + ret = kbase_mem_copy_to_pinned_user_pages(pages, extres_page, + &to_copy, + buf_data->nr_pages, + &target_page_nr, offset); + kbase_kunmap(pg, extres_page); if (ret) goto out_unlock; } } - } - break; + } break; case KBASE_MEM_TYPE_IMPORTED_UMM: { struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf; @@ -816,14 +783,13 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE) dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch"); - dma_to_copy = min(dma_buf->size, - (size_t)(buf_data->nr_extres_pages * PAGE_SIZE)); + dma_to_copy = min(dma_buf->size, (size_t)(buf_data->nr_extres_pages * PAGE_SIZE)); ret = dma_buf_begin_cpu_access(dma_buf, DMA_FROM_DEVICE); if (ret) goto out_unlock; - for (i = 0; i < dma_to_copy/PAGE_SIZE && - target_page_nr < buf_data->nr_pages; i++) { + for (i = 0; i < dma_to_copy / PAGE_SIZE && target_page_nr < buf_data->nr_pages; + i++) { #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE struct page *pg; void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); @@ -831,13 +797,13 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, void *extres_page = dma_buf_kmap(dma_buf, i); #endif if (extres_page) { - ret = kbase_mem_copy_to_pinned_user_pages( - pages, extres_page, &to_copy, - buf_data->nr_pages, - &target_page_nr, offset); + ret = kbase_mem_copy_to_pinned_user_pages(pages, extres_page, + &to_copy, + buf_data->nr_pages, + &target_page_nr, offset); #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE - kunmap(pg); + kbase_kunmap(pg, extres_page); #else dma_buf_kunmap(dma_buf, i, extres_page); #endif @@ -878,10 +844,12 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) #define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) -int kbasep_jit_alloc_validate(struct kbase_context *kctx, - struct base_jit_alloc_info *info) +int kbasep_jit_alloc_validate(struct kbase_context *kctx, struct base_jit_alloc_info *info) { - int j; + size_t j; + + CSTD_UNUSED(kctx); + /* If the ID is zero, then fail the job */ if (info->id == 0) return -EINVAL; @@ -923,8 +891,7 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx, /* If BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE is set, heap_info_gpu_addr * cannot be 0 */ - if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) && - !info->heap_info_gpu_addr) + if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) && !info->heap_info_gpu_addr) return -EINVAL; #endif /* !MALI_USE_CSF */ @@ -935,7 +902,7 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx, static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) { - __user u8 *data = (__user u8 *)(uintptr_t) katom->jc; + __user u8 *data = (__user u8 *)(uintptr_t)katom->jc; struct base_jit_alloc_info *info; struct kbase_context *kctx = katom->kctx; struct kbase_device *kbdev = kctx->kbdev; @@ -958,8 +925,7 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) count = katom->nr_extres; /* Sanity checks */ - if (!data || count > kctx->jit_max_allocations || - count > ARRAY_SIZE(kctx->jit_alloc)) { + if (!data || count > kctx->jit_max_allocations || count > ARRAY_SIZE(kctx->jit_alloc)) { ret = -EINVAL; goto fail; } @@ -983,9 +949,8 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) if (ret) goto free_info; KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( - kbdev, katom, info->va_pages, info->commit_pages, - info->extension, info->id, info->bin_id, - info->max_allocations, info->flags, info->usage_id); + kbdev, katom, info->va_pages, info->commit_pages, info->extension, info->id, + info->bin_id, info->max_allocations, info->flags, info->usage_id); } katom->jit_blocked = false; @@ -1015,11 +980,10 @@ fail: static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom) { - if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) != - BASE_JD_REQ_SOFT_JIT_FREE)) + if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) != BASE_JD_REQ_SOFT_JIT_FREE)) return NULL; - return (u8 *) katom->softjob_data; + return (u8 *)katom->softjob_data; } static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom) @@ -1053,8 +1017,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) u32 i; bool ignore_pressure_limit = false; - trace_sysgraph(SGR_SUBMIT, kctx->id, - kbase_jd_atom_id(kctx, katom)); + trace_sysgraph(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(kctx, katom)); if (katom->jit_blocked) { list_del(&katom->queue); @@ -1081,8 +1044,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) * is disabled at the context scope, then bypass JIT pressure limit * logic in kbase_jit_allocate(). */ - if (!kbase_ctx_flag(kctx, KCTX_JPL_ENABLED) - || (kctx->jit_current_allocations == 0)) { + if (!kbase_ctx_flag(kctx, KCTX_JPL_ENABLED) || (kctx->jit_current_allocations == 0)) { ignore_pressure_limit = true; } #else @@ -1099,8 +1061,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) info = katom->softjob_data; for (j = 0; j < i; j++, info++) { kbase_jit_free(kctx, kctx->jit_alloc[info->id]); - kctx->jit_alloc[info->id] = - KBASE_RESERVED_REG_JIT_ALLOC; + kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC; } katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; @@ -1120,11 +1081,10 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) break; if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == - BASE_JD_REQ_SOFT_JIT_FREE) { + BASE_JD_REQ_SOFT_JIT_FREE) { u8 *free_ids = kbase_jit_free_get_ids(jit_atom); - if (free_ids && *free_ids && - kctx->jit_alloc[*free_ids]) { + if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) { /* A JIT free which is active and * submitted before this atom */ @@ -1141,12 +1101,12 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) * allocation itself failed. */ for (; i < count; i++, info++) { - kctx->jit_alloc[info->id] = - KBASE_RESERVED_REG_JIT_ALLOC; + kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC; } katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; - dev_warn_ratelimited(kbdev->dev, "JIT alloc softjob failed: atom id %d\n", + dev_warn_ratelimited(kbdev->dev, + "JIT alloc softjob failed: atom id %d\n", kbase_jd_atom_id(kctx, katom)); return 0; } @@ -1179,8 +1139,8 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) * Write the address of the JIT allocation to the user provided * GPU allocation. */ - ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), - KBASE_REG_CPU_WR, &mapping); + ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), KBASE_REG_CPU_WR, + &mapping); if (!ptr) { /* * Leave the allocations "live" as the JIT free atom @@ -1199,19 +1159,17 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) * Retrieve the mmu flags for JIT allocation * only if dumping is enabled */ - entry_mmu_flags = kbase_mmu_create_ate(kbdev, - (struct tagged_addr){ 0 }, reg->flags, - MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id); + entry_mmu_flags = kbase_mmu_create_ate(kbdev, (struct tagged_addr){ 0 }, reg->flags, + MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id); #endif - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( - kbdev, katom, info->gpu_alloc_addr, new_addr, - info->flags, entry_mmu_flags, info->id, - info->commit_pages, info->extension, info->va_pages); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, katom, info->gpu_alloc_addr, new_addr, + info->flags, entry_mmu_flags, info->id, + info->commit_pages, info->extension, + info->va_pages); kbase_vunmap(kctx, &mapping); - kbase_trace_jit_report_gpu_mem(kctx, reg, - KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + kbase_trace_jit_report_gpu_mem(kctx, reg, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); } katom->event_code = BASE_JD_EVENT_DONE; @@ -1245,7 +1203,7 @@ static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; struct kbase_device *kbdev = kctx->kbdev; - __user void *data = (__user void *)(uintptr_t) katom->jc; + __user void *data = (__user void *)(uintptr_t)katom->jc; u8 *ids; u32 count = MAX(katom->nr_extres, 1); u32 i; @@ -1275,7 +1233,7 @@ static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) goto free_info; } - if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) { + if (copy_from_user(ids, data, sizeof(*ids) * count) != 0) { ret = -EINVAL; goto free_info; } @@ -1322,8 +1280,7 @@ static void kbase_jit_free_process(struct kbase_jd_atom *katom) static void kbasep_jit_finish_worker(struct work_struct *work) { - struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, - work); + struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, work); struct kbase_context *kctx = katom->kctx; int resched; @@ -1341,18 +1298,15 @@ void kbase_jit_retry_pending_alloc(struct kbase_context *kctx) LIST_HEAD(jit_pending_alloc_list); struct list_head *i, *tmp; - list_splice_tail_init(&kctx->jctx.jit_pending_alloc, - &jit_pending_alloc_list); + list_splice_tail_init(&kctx->jctx.jit_pending_alloc, &jit_pending_alloc_list); list_for_each_safe(i, tmp, &jit_pending_alloc_list) { - struct kbase_jd_atom *pending_atom = list_entry(i, - struct kbase_jd_atom, queue); + struct kbase_jd_atom *pending_atom = list_entry(i, struct kbase_jd_atom, queue); KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kctx->kbdev, pending_atom); kbase_kinstr_jm_atom_sw_start(pending_atom); if (kbase_jit_allocate_process(pending_atom) == 0) { /* Atom has completed */ - INIT_WORK(&pending_atom->work, - kbasep_jit_finish_worker); + INIT_WORK(&pending_atom->work, kbasep_jit_finish_worker); queue_work(kctx->jctx.job_done_wq, &pending_atom->work); } KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kctx->kbdev, pending_atom); @@ -1382,11 +1336,10 @@ static void kbase_jit_free_finish(struct kbase_jd_atom *katom) * still succeed this soft job but don't try and free * the allocation. */ - if (kctx->jit_alloc[ids[j]] != - KBASE_RESERVED_REG_JIT_ALLOC) { - KBASE_TLSTREAM_TL_JIT_USEDPAGES(kctx->kbdev, - kctx->jit_alloc[ids[j]]-> - gpu_alloc->nents, ids[j]); + if (kctx->jit_alloc[ids[j]] != KBASE_RESERVED_REG_JIT_ALLOC) { + KBASE_TLSTREAM_TL_JIT_USEDPAGES( + kctx->kbdev, kctx->jit_alloc[ids[j]]->gpu_alloc->nents, + ids[j]); kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]); } kctx->jit_alloc[ids[j]] = NULL; @@ -1405,8 +1358,7 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) u64 count = 0; size_t copy_size; - user_ext_res = (__user struct base_external_resource_list *) - (uintptr_t) katom->jc; + user_ext_res = (__user struct base_external_resource_list *)(uintptr_t)katom->jc; /* Fail the job if there is no info structure */ if (!user_ext_res) @@ -1440,7 +1392,7 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) { struct base_external_resource_list *ext_res; - int i; + uint64_t i; bool failed = false; ext_res = katom->softjob_data; @@ -1452,15 +1404,12 @@ static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) for (i = 0; i < ext_res->count; i++) { u64 gpu_addr; - gpu_addr = ext_res->ext_res[i].ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + gpu_addr = ext_res->ext_res[i].ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE; if (map) { - if (!kbase_sticky_resource_acquire(katom->kctx, - gpu_addr)) + if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr)) goto failed_loop; } else { - if (!kbase_sticky_resource_release_force(katom->kctx, NULL, - gpu_addr)) + if (!kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr)) failed = true; } } @@ -1482,7 +1431,7 @@ static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) failed_loop: while (i > 0) { u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr); @@ -1514,8 +1463,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom); kbase_kinstr_jm_atom_sw_start(katom); - trace_sysgraph(SGR_SUBMIT, kctx->id, - kbase_jd_atom_id(kctx, katom)); + trace_sysgraph(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(kctx, katom)); switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: @@ -1524,12 +1472,10 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) #if IS_ENABLED(CONFIG_SYNC_FILE) case BASE_JD_REQ_SOFT_FENCE_TRIGGER: - katom->event_code = kbase_sync_fence_out_trigger(katom, - katom->event_code == BASE_JD_EVENT_DONE ? - 0 : -EFAULT); + katom->event_code = kbase_sync_fence_out_trigger( + katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT); break; - case BASE_JD_REQ_SOFT_FENCE_WAIT: - { + case BASE_JD_REQ_SOFT_FENCE_WAIT: { ret = kbase_sync_fence_in_wait(katom); if (ret == 1) { @@ -1552,8 +1498,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); break; #if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST - case BASE_JD_REQ_SOFT_DEBUG_COPY: - { + case BASE_JD_REQ_SOFT_DEBUG_COPY: { int res = kbase_debug_copy(katom); if (res) @@ -1601,60 +1546,47 @@ void kbase_cancel_soft_job(struct kbase_jd_atom *katom) int kbase_prepare_soft_job(struct kbase_jd_atom *katom) { switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { - case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: - { - if (!IS_ALIGNED(katom->jc, cache_line_size())) - return -EINVAL; - } - break; + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: { + if (!IS_ALIGNED(katom->jc, cache_line_size())) + return -EINVAL; + } break; #if IS_ENABLED(CONFIG_SYNC_FILE) - case BASE_JD_REQ_SOFT_FENCE_TRIGGER: - { - struct base_fence fence; - int fd; + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: { + struct base_fence fence; + int fd; - if (copy_from_user(&fence, - (__user void *)(uintptr_t)katom->jc, - sizeof(fence)) != 0) - return -EINVAL; + if (copy_from_user(&fence, (__user void *)(uintptr_t)katom->jc, sizeof(fence)) != 0) + return -EINVAL; - fd = kbase_sync_fence_out_create(katom, - fence.basep.stream_fd); - if (fd < 0) - return -EINVAL; + fd = kbase_sync_fence_out_create(katom, fence.basep.stream_fd); + if (fd < 0) + return -EINVAL; - fence.basep.fd = fd; - if (copy_to_user((__user void *)(uintptr_t)katom->jc, - &fence, sizeof(fence)) != 0) { - kbase_sync_fence_out_remove(katom); - /* fd should have been closed here, but there's + fence.basep.fd = fd; + if (copy_to_user((__user void *)(uintptr_t)katom->jc, &fence, sizeof(fence)) != 0) { + kbase_sync_fence_out_remove(katom); + /* fd should have been closed here, but there's * no good way of doing that. Since * copy_to_user() very rarely fails, and the fd * will get closed on process termination this * won't be a problem. */ - fence.basep.fd = -EINVAL; - return -EINVAL; - } + fence.basep.fd = -EINVAL; + return -EINVAL; } - break; - case BASE_JD_REQ_SOFT_FENCE_WAIT: - { - struct base_fence fence; - int ret; + } break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: { + struct base_fence fence; + int ret; - if (copy_from_user(&fence, - (__user void *)(uintptr_t)katom->jc, - sizeof(fence)) != 0) - return -EINVAL; + if (copy_from_user(&fence, (__user void *)(uintptr_t)katom->jc, sizeof(fence)) != 0) + return -EINVAL; - /* Get a reference to the fence object */ - ret = kbase_sync_fence_in_from_fd(katom, - fence.basep.fd); - if (ret < 0) - return ret; - } - break; + /* Get a reference to the fence object */ + ret = kbase_sync_fence_in_from_fd(katom, fence.basep.fd); + if (ret < 0) + return ret; + } break; #endif /* CONFIG_SYNC_FILE */ case BASE_JD_REQ_SOFT_JIT_ALLOC: return kbase_jit_allocate_prepare(katom); @@ -1683,8 +1615,7 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) void kbase_finish_soft_job(struct kbase_jd_atom *katom) { - trace_sysgraph(SGR_COMPLETE, katom->kctx->id, - kbase_jd_atom_id(katom->kctx, katom)); + trace_sysgraph(SGR_COMPLETE, katom->kctx->id, kbase_jd_atom_id(katom->kctx, katom)); switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: @@ -1693,8 +1624,8 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) #if IS_ENABLED(CONFIG_SYNC_FILE) case BASE_JD_REQ_SOFT_FENCE_TRIGGER: /* If fence has not yet been signaled, do it now */ - kbase_sync_fence_out_trigger(katom, katom->event_code == - BASE_JD_EVENT_DONE ? 0 : -EFAULT); + kbase_sync_fence_out_trigger(katom, + katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT); break; case BASE_JD_REQ_SOFT_FENCE_WAIT: /* Release katom's reference to fence object */ @@ -1735,16 +1666,14 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) /* Move out the entire list */ mutex_lock(&js_devdata->runpool_mutex); - list_splice_init(&js_devdata->suspended_soft_jobs_list, - &local_suspended_soft_jobs); + list_splice_init(&js_devdata->suspended_soft_jobs_list, &local_suspended_soft_jobs); mutex_unlock(&js_devdata->runpool_mutex); /* * Each atom must be detached from the list and ran separately - * it could be re-added to the old list, but this is unlikely */ - list_for_each_entry_safe(katom_iter, tmp_iter, - &local_suspended_soft_jobs, dep_item[1]) { + list_for_each_entry_safe(katom_iter, tmp_iter, &local_suspended_soft_jobs, dep_item[1]) { struct kbase_context *kctx = katom_iter->kctx; mutex_lock(&kctx->jctx.lock); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_strings.h b/drivers/gpu/arm/bifrost/mali_kbase_strings.h deleted file mode 100644 index c3f94f9267d5..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_strings.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -extern const char kbase_drv_name[]; -extern const char kbase_timeline_name[]; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync.h b/drivers/gpu/arm/bifrost/mali_kbase_sync.h index 3d2053bee08e..ff5206d8d395 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_sync.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_sync.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,7 +31,6 @@ #include #include #if IS_ENABLED(CONFIG_SYNC_FILE) -#include "mali_kbase_fence_defs.h" #include #endif @@ -113,8 +112,7 @@ int kbase_sync_fence_validate(int fd); * * Return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE */ -enum base_jd_event_code -kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result); +enum base_jd_event_code kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result); /** * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled @@ -164,8 +162,7 @@ void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); * * Return: 0 on success, < 0 on error */ -int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info); +int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, struct kbase_sync_fence_info *info); /** * kbase_sync_fence_out_info_get() - Retrieves information about output fence @@ -174,17 +171,14 @@ int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, * * Return: 0 on success, < 0 on error */ -int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info); +int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, struct kbase_sync_fence_info *info); #endif /* !MALI_USE_CSF */ #if IS_ENABLED(CONFIG_SYNC_FILE) #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -void kbase_sync_fence_info_get(struct fence *fence, - struct kbase_sync_fence_info *info); +void kbase_sync_fence_info_get(struct fence *fence, struct kbase_sync_fence_info *info); #else -void kbase_sync_fence_info_get(struct dma_fence *fence, - struct kbase_sync_fence_info *info); +void kbase_sync_fence_info_get(struct dma_fence *fence, struct kbase_sync_fence_info *info); #endif #endif @@ -196,7 +190,6 @@ void kbase_sync_fence_info_get(struct dma_fence *fence, */ const char *kbase_sync_status_string(int status); - #if !MALI_USE_CSF /* * Internal worker used to continue processing of atom. diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c index 9360324cfee6..aa4bf980e2bb 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,22 +33,18 @@ #include #include #include -#include "mali_kbase_fence_defs.h" #include "mali_kbase_sync.h" #include "mali_kbase_fence.h" #include "mali_kbase.h" -static const struct file_operations stream_fops = { - .owner = THIS_MODULE -}; +static const struct file_operations stream_fops = { .owner = THIS_MODULE }; int kbase_sync_fence_stream_create(const char *name, int *const out_fd) { if (!out_fd) return -EINVAL; - *out_fd = anon_inode_getfd(name, &stream_fops, NULL, - O_RDONLY | O_CLOEXEC); + *out_fd = anon_inode_getfd(name, &stream_fops, NULL, O_RDONLY | O_CLOEXEC); if (*out_fd < 0) return -EINVAL; @@ -66,6 +62,8 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) struct sync_file *sync_file; int fd; + CSTD_UNUSED(stream_fd); + fence = kbase_fence_out_new(katom); if (!fence) return -ENOMEM; @@ -138,8 +136,7 @@ int kbase_sync_fence_validate(int fd) } #if !MALI_USE_CSF -enum base_jd_event_code -kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) +enum base_jd_event_code kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) { int res; @@ -150,8 +147,7 @@ kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) res = kbase_fence_out_signal(katom, result); if (unlikely(res < 0)) { - dev_warn(katom->kctx->kbdev->dev, - "fence_signal() failed with %d\n", res); + dev_warn(katom->kctx->kbdev->dev, "fence_signal() failed with %d\n", res); } kbase_sync_fence_out_remove(katom); @@ -160,29 +156,27 @@ kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) } #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -static void kbase_fence_wait_callback(struct fence *fence, - struct fence_cb *cb) +static void kbase_fence_wait_callback(struct fence *fence, struct fence_cb *cb) #else -static void kbase_fence_wait_callback(struct dma_fence *fence, - struct dma_fence_cb *cb) +static void kbase_fence_wait_callback(struct dma_fence *fence, struct dma_fence_cb *cb) #endif { - struct kbase_jd_atom *katom = container_of(cb, struct kbase_jd_atom, - dma_fence.fence_cb); + struct kbase_jd_atom *katom = container_of(cb, struct kbase_jd_atom, dma_fence.fence_cb); struct kbase_context *kctx = katom->kctx; + CSTD_UNUSED(fence); + /* Cancel atom if fence is erroneous */ if (dma_fence_is_signaled(katom->dma_fence.fence_in) && #if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ - (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ - KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) + (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) katom->dma_fence.fence_in->error < 0) #else katom->dma_fence.fence_in->status < 0) #endif katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - /* To prevent a potential deadlock we schedule the work onto the * job_done_wq workqueue * @@ -209,8 +203,7 @@ int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) if (!fence) return 0; /* no input fence to wait for, good to go! */ - err = dma_fence_add_callback(fence, &katom->dma_fence.fence_cb, - kbase_fence_wait_callback); + err = dma_fence_add_callback(fence, &katom->dma_fence.fence_cb, kbase_fence_wait_callback); if (err == -ENOENT) { int fence_status = dma_fence_get_status(fence); @@ -228,8 +221,8 @@ int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) kbase_sync_fence_in_info_get(katom, &info); dev_warn(katom->kctx->kbdev->dev, - "Unexpected status for fence %s of ctx:%d_%d atom:%d", - info.name, katom->kctx->tgid, katom->kctx->id, + "Unexpected status for fence %s of ctx:%d_%d atom:%d", info.name, + katom->kctx->tgid, katom->kctx->id, kbase_jd_atom_id(katom->kctx, katom)); } @@ -306,10 +299,11 @@ void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) struct kbase_sync_fence_info info; kbase_sync_fence_in_info_get(katom, &info); - dev_warn(katom->kctx->kbdev->dev, - "Callback was not removed earlier for fence %s of ctx:%d_%d atom:%d", - info.name, katom->kctx->tgid, katom->kctx->id, - kbase_jd_atom_id(katom->kctx, katom)); + dev_warn( + katom->kctx->kbdev->dev, + "Callback was not removed earlier for fence %s of ctx:%d_%d atom:%d", + info.name, katom->kctx->tgid, katom->kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); } } @@ -319,11 +313,9 @@ void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) #endif /* !MALI_USE_CSF */ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -void kbase_sync_fence_info_get(struct fence *fence, - struct kbase_sync_fence_info *info) +void kbase_sync_fence_info_get(struct fence *fence, struct kbase_sync_fence_info *info) #else -void kbase_sync_fence_info_get(struct dma_fence *fence, - struct kbase_sync_fence_info *info) +void kbase_sync_fence_info_get(struct dma_fence *fence, struct kbase_sync_fence_info *info) #endif { info->fence = fence; @@ -335,8 +327,8 @@ void kbase_sync_fence_info_get(struct dma_fence *fence, */ if (dma_fence_is_signaled(fence)) { #if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ - (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ - KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) + (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) int status = fence->error; #else int status = fence->status; @@ -345,22 +337,19 @@ void kbase_sync_fence_info_get(struct dma_fence *fence, info->status = status; /* signaled with error */ else info->status = 1; /* signaled with success */ - } else { + } else { info->status = 0; /* still active (unsignaled) */ } #if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) - scnprintf(info->name, sizeof(info->name), "%llu#%u", - fence->context, fence->seqno); + scnprintf(info->name, sizeof(info->name), "%llu#%u", fence->context, fence->seqno); #else - scnprintf(info->name, sizeof(info->name), "%llu#%llu", - fence->context, fence->seqno); + scnprintf(info->name, sizeof(info->name), "%llu#%llu", fence->context, fence->seqno); #endif } #if !MALI_USE_CSF -int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info) +int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, struct kbase_sync_fence_info *info) { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence; @@ -379,8 +368,7 @@ int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, return 0; } -int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info) +int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, struct kbase_sync_fence_info *info) { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence; @@ -399,11 +387,11 @@ int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, return 0; } - #ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) { /* Not implemented */ + CSTD_UNUSED(katom); } #endif #endif /* !MALI_USE_CSF*/ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c index 7df7d79b6bc5..ced080dae504 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -54,8 +54,7 @@ struct kbase_dma_buf { * * Return: true on success. */ -static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx, - struct dma_buf *dma_buf, +static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx, struct dma_buf *dma_buf, struct rb_root *tree) { struct kbase_dma_buf *buf_node = NULL; @@ -105,8 +104,7 @@ static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx, * * Return: true on success */ -static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, - struct dma_buf *dma_buf, +static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, struct dma_buf *dma_buf, struct rb_root *root) { struct kbase_dma_buf *buf_node = NULL; @@ -130,8 +128,7 @@ static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, } if (unique_buf_imported) { - struct kbase_dma_buf *new_buf_node = - kzalloc(sizeof(*new_buf_node), GFP_KERNEL); + struct kbase_dma_buf *new_buf_node = kzalloc(sizeof(*new_buf_node), GFP_KERNEL); if (new_buf_node == NULL) { dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n"); @@ -146,8 +143,7 @@ static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, struct kbase_dma_buf *new_node; parent = *new; - new_node = rb_entry(parent, struct kbase_dma_buf, - dma_buf_node); + new_node = rb_entry(parent, struct kbase_dma_buf, dma_buf_node); if (dma_buf < new_node->dma_buf) new = &(*new)->rb_left; else @@ -163,19 +159,18 @@ static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, return unique_buf_imported; } -void kbase_remove_dma_buf_usage(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc) +void kbase_remove_dma_buf_usage(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc) { struct kbase_device *kbdev = kctx->kbdev; bool dev_mapping_removed, prcs_mapping_removed; mutex_lock(&kbdev->dma_buf_lock); - dev_mapping_removed = kbase_delete_dma_buf_mapping( - kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); + dev_mapping_removed = kbase_delete_dma_buf_mapping(kctx, alloc->imported.umm.dma_buf, + &kbdev->dma_buf_root); - prcs_mapping_removed = kbase_delete_dma_buf_mapping( - kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); + prcs_mapping_removed = kbase_delete_dma_buf_mapping(kctx, alloc->imported.umm.dma_buf, + &kctx->kprcs->dma_buf_root); WARN_ON(dev_mapping_removed && !prcs_mapping_removed); @@ -193,8 +188,7 @@ void kbase_remove_dma_buf_usage(struct kbase_context *kctx, mutex_unlock(&kbdev->dma_buf_lock); } -void kbase_add_dma_buf_usage(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc) +void kbase_add_dma_buf_usage(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc) { struct kbase_device *kbdev = kctx->kbdev; bool unique_dev_dmabuf, unique_prcs_dmabuf; @@ -202,11 +196,11 @@ void kbase_add_dma_buf_usage(struct kbase_context *kctx, mutex_lock(&kbdev->dma_buf_lock); /* add dma_buf to device and process. */ - unique_dev_dmabuf = kbase_capture_dma_buf_mapping( - kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); + unique_dev_dmabuf = kbase_capture_dma_buf_mapping(kctx, alloc->imported.umm.dma_buf, + &kbdev->dma_buf_root); - unique_prcs_dmabuf = kbase_capture_dma_buf_mapping( - kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); + unique_prcs_dmabuf = kbase_capture_dma_buf_mapping(kctx, alloc->imported.umm.dma_buf, + &kctx->kprcs->dma_buf_root); WARN_ON(unique_dev_dmabuf && !unique_prcs_dmabuf); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h index fd871fcb352a..96d0c40530fc 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,25 +26,26 @@ #include #endif -#define DEVICE_TGID ((u32) 0U) +#define DEVICE_TGID ((u32)0U) -static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev, - struct kbase_context *kctx) +static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev, struct kbase_context *kctx) { #if IS_ENABLED(CONFIG_TRACE_GPU_MEM) lockdep_assert_held(&kbdev->gpu_mem_usage_lock); - trace_gpu_mem_total(kbdev->id, DEVICE_TGID, - kbdev->total_gpu_pages << PAGE_SHIFT); + trace_gpu_mem_total(kbdev->id, DEVICE_TGID, kbdev->total_gpu_pages << PAGE_SHIFT); if (likely(kctx)) trace_gpu_mem_total(kbdev->id, kctx->kprcs->tgid, - kctx->kprcs->total_gpu_pages << PAGE_SHIFT); + kctx->kprcs->total_gpu_pages << PAGE_SHIFT); +#else + CSTD_UNUSED(kbdev); + CSTD_UNUSED(kctx); #endif } static inline void kbase_trace_gpu_mem_usage_dec(struct kbase_device *kbdev, - struct kbase_context *kctx, size_t pages) + struct kbase_context *kctx, size_t pages) { spin_lock(&kbdev->gpu_mem_usage_lock); @@ -59,7 +60,7 @@ static inline void kbase_trace_gpu_mem_usage_dec(struct kbase_device *kbdev, } static inline void kbase_trace_gpu_mem_usage_inc(struct kbase_device *kbdev, - struct kbase_context *kctx, size_t pages) + struct kbase_context *kctx, size_t pages) { spin_lock(&kbdev->gpu_mem_usage_lock); @@ -82,8 +83,7 @@ static inline void kbase_trace_gpu_mem_usage_inc(struct kbase_device *kbdev, * Remove reference to dma buf been unmapped from kbase_device level * rb_tree and Kbase_process level dma buf rb_tree. */ -void kbase_remove_dma_buf_usage(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc); +void kbase_remove_dma_buf_usage(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc); /** * kbase_add_dma_buf_usage - Add a dma-buf entry captured. @@ -94,7 +94,6 @@ void kbase_remove_dma_buf_usage(struct kbase_context *kctx, * Add reference to dma buf been mapped to kbase_device level * rb_tree and Kbase_process level dma buf rb_tree. */ -void kbase_add_dma_buf_usage(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc); +void kbase_add_dma_buf_usage(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc); #endif /* _KBASE_TRACE_GPU_MEM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_utility.h b/drivers/gpu/arm/bifrost/mali_kbase_utility.h deleted file mode 100644 index 2dad49b59f52..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_utility.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2012-2013, 2015, 2018, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_UTILITY_H -#define _KBASE_UTILITY_H - -#ifndef _KBASE_H_ -#error "Don't include this file directly, use mali_kbase.h instead" -#endif - -static inline void kbase_timer_setup(struct timer_list *timer, - void (*callback)(struct timer_list *timer)) -{ -#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE - setup_timer(timer, (void (*)(unsigned long)) callback, - (unsigned long) timer); -#else - timer_setup(timer, callback, 0); -#endif -} - -#ifndef WRITE_ONCE - #ifdef ASSIGN_ONCE - #define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x) - #else - #define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val)) - #endif -#endif - -#ifndef READ_ONCE - #define READ_ONCE(x) ACCESS_ONCE(x) -#endif - -#endif /* _KBASE_UTILITY_H */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c deleted file mode 100644 index d770913e9da5..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c +++ /dev/null @@ -1,1132 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include "mali_kbase_vinstr.h" -#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" -#include "hwcnt/mali_kbase_hwcnt_types.h" -#include -#include "hwcnt/mali_kbase_hwcnt_gpu.h" -#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h" -#include -#include "mali_malisw.h" -#include "mali_kbase_debug.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Explicitly include epoll header for old kernels. Not required from 4.16. */ -#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE -#include -#endif - -/* Hwcnt reader API version */ -#define HWCNT_READER_API 1 - -/* The minimum allowed interval between dumps (equivalent to 10KHz) */ -#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) - -/* The maximum allowed buffers per client */ -#define MAX_BUFFER_COUNT 32 - -/** - * struct kbase_vinstr_context - IOCTL interface for userspace hardware - * counters. - * @hvirt: Hardware counter virtualizer used by vinstr. - * @metadata: Hardware counter metadata provided by virtualizer. - * @metadata_user: API compatible hardware counter metadata provided by vinstr. - * For compatibility with the user driver interface, this - * contains a narrowed version of the HWCNT metadata limited - * to 64 entries per block of 32 bits each. - * @lock: Lock protecting all vinstr state. - * @suspend_count: Suspend reference count. If non-zero, timer and worker are - * prevented from being re-scheduled. - * @client_count: Number of vinstr clients. - * @clients: List of vinstr clients. - * @dump_timer: Timer that enqueues dump_work to a workqueue. - * @dump_work: Worker for performing periodic counter dumps. - */ -struct kbase_vinstr_context { - struct kbase_hwcnt_virtualizer *hvirt; - const struct kbase_hwcnt_metadata *metadata; - const struct kbase_hwcnt_metadata_narrow *metadata_user; - struct mutex lock; - size_t suspend_count; - size_t client_count; - struct list_head clients; - struct hrtimer dump_timer; - struct work_struct dump_work; -}; - -/** - * struct kbase_vinstr_client - A vinstr client attached to a vinstr context. - * @vctx: Vinstr context client is attached to. - * @hvcli: Hardware counter virtualizer client. - * @node: Node used to attach this client to list in vinstr - * context. - * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic - * client. - * @next_dump_time_ns: Time in ns when this client's next periodic dump must - * occur. If 0, not a periodic client. - * @enable_map: Counters enable map. - * @tmp_buf: Temporary buffer to use before handing dump to client. - * @dump_bufs: Array of narrow dump buffers allocated by this client. - * @dump_bufs_meta: Metadata of hwcnt reader client buffers. - * @meta_idx: Index of metadata being accessed by userspace. - * @read_idx: Index of buffer read by userspace. - * @write_idx: Index of buffer being written by dump worker. - * @waitq: Client's notification queue. - */ -struct kbase_vinstr_client { - struct kbase_vinstr_context *vctx; - struct kbase_hwcnt_virtualizer_client *hvcli; - struct list_head node; - u64 next_dump_time_ns; - u32 dump_interval_ns; - struct kbase_hwcnt_enable_map enable_map; - struct kbase_hwcnt_dump_buffer tmp_buf; - struct kbase_hwcnt_dump_buffer_narrow_array dump_bufs; - struct kbase_hwcnt_reader_metadata *dump_bufs_meta; - atomic_t meta_idx; - atomic_t read_idx; - atomic_t write_idx; - wait_queue_head_t waitq; -}; - -static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait); - -static long kbasep_vinstr_hwcnt_reader_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long arg); - -static int kbasep_vinstr_hwcnt_reader_mmap( - struct file *filp, - struct vm_area_struct *vma); - -static int kbasep_vinstr_hwcnt_reader_release( - struct inode *inode, - struct file *filp); - -/* Vinstr client file operations */ -static const struct file_operations vinstr_client_fops = { - .owner = THIS_MODULE, - .poll = kbasep_vinstr_hwcnt_reader_poll, - .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, - .compat_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, - .mmap = kbasep_vinstr_hwcnt_reader_mmap, - .release = kbasep_vinstr_hwcnt_reader_release, -}; - -/** - * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds. - * - * Return: Current time in nanoseconds. - */ -static u64 kbasep_vinstr_timestamp_ns(void) -{ - return ktime_get_raw_ns(); -} - -/** - * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time. - * @cur_ts_ns: Current time in nanoseconds. - * @interval: Interval between dumps in nanoseconds. - * - * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump - * time that occurs after cur_ts_ns. - */ -static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval) -{ - /* Non-periodic client */ - if (interval == 0) - return 0; - - /* - * Return the next interval after the current time relative to t=0. - * This means multiple clients with the same period will synchronise, - * regardless of when they were started, allowing the worker to be - * scheduled less frequently. - */ - do_div(cur_ts_ns, interval); - return (cur_ts_ns + 1) * interval; -} - -/** - * kbasep_vinstr_client_dump() - Perform a dump for a client. - * @vcli: Non-NULL pointer to a vinstr client. - * @event_id: Event type that triggered the dump. - * - * Return: 0 on success, else error code. - */ -static int kbasep_vinstr_client_dump( - struct kbase_vinstr_client *vcli, - enum base_hwcnt_reader_event event_id) -{ - int errcode; - u64 ts_start_ns; - u64 ts_end_ns; - unsigned int write_idx; - unsigned int read_idx; - struct kbase_hwcnt_dump_buffer *tmp_buf; - struct kbase_hwcnt_dump_buffer_narrow *dump_buf; - struct kbase_hwcnt_reader_metadata *meta; - u8 clk_cnt; - - WARN_ON(!vcli); - lockdep_assert_held(&vcli->vctx->lock); - - write_idx = atomic_read(&vcli->write_idx); - read_idx = atomic_read(&vcli->read_idx); - - /* Check if there is a place to copy HWC block into. */ - if (write_idx - read_idx == vcli->dump_bufs.buf_cnt) - return -EBUSY; - write_idx %= vcli->dump_bufs.buf_cnt; - - dump_buf = &vcli->dump_bufs.bufs[write_idx]; - meta = &vcli->dump_bufs_meta[write_idx]; - tmp_buf = &vcli->tmp_buf; - - errcode = kbase_hwcnt_virtualizer_client_dump( - vcli->hvcli, &ts_start_ns, &ts_end_ns, tmp_buf); - if (errcode) - return errcode; - - /* Patch the dump buf headers, to hide the counters that other hwcnt - * clients are using. - */ - kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &vcli->enable_map); - - /* Copy the temp buffer to the userspace visible buffer. The strict - * variant will explicitly zero any non-enabled counters to ensure - * nothing except exactly what the user asked for is made visible. - * - * A narrow copy is required since virtualizer has a bigger buffer - * but user only needs part of it. - */ - kbase_hwcnt_dump_buffer_copy_strict_narrow(dump_buf, tmp_buf, - &vcli->enable_map); - - clk_cnt = vcli->vctx->metadata->clk_cnt; - - meta->timestamp = ts_end_ns; - meta->event_id = event_id; - meta->buffer_idx = write_idx; - meta->cycles.top = (clk_cnt > 0) ? dump_buf->clk_cnt_buf[0] : 0; - meta->cycles.shader_cores = - (clk_cnt > 1) ? dump_buf->clk_cnt_buf[1] : 0; - - /* Notify client. Make sure all changes to memory are visible. */ - wmb(); - atomic_inc(&vcli->write_idx); - wake_up_interruptible(&vcli->waitq); - return 0; -} - -/** - * kbasep_vinstr_client_clear() - Reset all the client's counters to zero. - * @vcli: Non-NULL pointer to a vinstr client. - * - * Return: 0 on success, else error code. - */ -static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli) -{ - u64 ts_start_ns; - u64 ts_end_ns; - - WARN_ON(!vcli); - lockdep_assert_held(&vcli->vctx->lock); - - /* A virtualizer dump with a NULL buffer will just clear the virtualizer - * client's buffer. - */ - return kbase_hwcnt_virtualizer_client_dump( - vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL); -} - -/** - * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic - * vinstr clients, then reschedule the dump - * worker appropriately. - * @vctx: Non-NULL pointer to the vinstr context. - * - * If there are no periodic clients, then the dump worker will not be - * rescheduled. Else, the dump worker will be rescheduled for the next periodic - * client dump. - */ -static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx) -{ - u64 cur_ts_ns; - u64 earliest_next_ns = U64_MAX; - struct kbase_vinstr_client *pos; - - WARN_ON(!vctx); - lockdep_assert_held(&vctx->lock); - - cur_ts_ns = kbasep_vinstr_timestamp_ns(); - - /* - * Update each client's next dump time, and find the earliest next - * dump time if any of the clients have a non-zero interval. - */ - list_for_each_entry(pos, &vctx->clients, node) { - const u64 cli_next_ns = - kbasep_vinstr_next_dump_time_ns( - cur_ts_ns, pos->dump_interval_ns); - - /* Non-zero next dump time implies a periodic client */ - if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns)) - earliest_next_ns = cli_next_ns; - - pos->next_dump_time_ns = cli_next_ns; - } - - /* Cancel the timer if it is already pending */ - hrtimer_cancel(&vctx->dump_timer); - - /* Start the timer if there are periodic clients and vinstr is not - * suspended. - */ - if ((earliest_next_ns != U64_MAX) && - (vctx->suspend_count == 0) && - !WARN_ON(earliest_next_ns < cur_ts_ns)) - hrtimer_start( - &vctx->dump_timer, - ns_to_ktime(earliest_next_ns - cur_ts_ns), - HRTIMER_MODE_REL); -} - -/** - * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients - * that need to be dumped, then reschedules itself. - * @work: Work structure. - */ -static void kbasep_vinstr_dump_worker(struct work_struct *work) -{ - struct kbase_vinstr_context *vctx = - container_of(work, struct kbase_vinstr_context, dump_work); - struct kbase_vinstr_client *pos; - u64 cur_time_ns; - - mutex_lock(&vctx->lock); - - cur_time_ns = kbasep_vinstr_timestamp_ns(); - - /* Dump all periodic clients whose next dump time is before the current - * time. - */ - list_for_each_entry(pos, &vctx->clients, node) { - if ((pos->next_dump_time_ns != 0) && - (pos->next_dump_time_ns < cur_time_ns)) - kbasep_vinstr_client_dump( - pos, BASE_HWCNT_READER_EVENT_PERIODIC); - } - - /* Update the next dump times of all periodic clients, then reschedule - * this worker at the earliest next dump time. - */ - kbasep_vinstr_reschedule_worker(vctx); - - mutex_unlock(&vctx->lock); -} - -/** - * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for - * execution as soon as possible. - * @timer: Timer structure. - * - * Return: HRTIMER_NORESTART always. - */ -static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer) -{ - struct kbase_vinstr_context *vctx = - container_of(timer, struct kbase_vinstr_context, dump_timer); - - /* We don't need to check vctx->suspend_count here, as the suspend - * function will ensure that any worker enqueued here is immediately - * cancelled, and the worker itself won't reschedule this timer if - * suspend_count != 0. - */ - kbase_hwcnt_virtualizer_queue_work(vctx->hvirt, &vctx->dump_work); - return HRTIMER_NORESTART; -} - -/** - * kbasep_vinstr_client_destroy() - Destroy a vinstr client. - * @vcli: vinstr client. Must not be attached to a vinstr context. - */ -static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) -{ - if (!vcli) - return; - - kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli); - kfree(vcli->dump_bufs_meta); - kbase_hwcnt_dump_buffer_narrow_array_free(&vcli->dump_bufs); - kbase_hwcnt_dump_buffer_free(&vcli->tmp_buf); - kbase_hwcnt_enable_map_free(&vcli->enable_map); - kfree(vcli); -} - -/** - * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to - * the vinstr context. - * @vctx: Non-NULL pointer to vinstr context. - * @setup: Non-NULL pointer to hardware counter ioctl setup structure. - * setup->buffer_count must not be 0 and must be a power of 2. - * @out_vcli: Non-NULL pointer to where created client will be stored on - * success. - * - * Return: 0 on success, else error code. - */ -static int kbasep_vinstr_client_create( - struct kbase_vinstr_context *vctx, - struct kbase_ioctl_hwcnt_reader_setup *setup, - struct kbase_vinstr_client **out_vcli) -{ - int errcode; - struct kbase_vinstr_client *vcli; - struct kbase_hwcnt_physical_enable_map phys_em; - - WARN_ON(!vctx); - WARN_ON(!setup); - WARN_ON(setup->buffer_count == 0); - WARN_ON(!is_power_of_2(setup->buffer_count)); - - vcli = kzalloc(sizeof(*vcli), GFP_KERNEL); - if (!vcli) - return -ENOMEM; - - vcli->vctx = vctx; - - errcode = kbase_hwcnt_enable_map_alloc( - vctx->metadata, &vcli->enable_map); - if (errcode) - goto error; - - phys_em.fe_bm = setup->fe_bm; - phys_em.shader_bm = setup->shader_bm; - phys_em.tiler_bm = setup->tiler_bm; - phys_em.mmu_l2_bm = setup->mmu_l2_bm; - kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); - - /* Use virtualizer's metadata to alloc tmp buffer which interacts with - * the HWC virtualizer. - */ - errcode = kbase_hwcnt_dump_buffer_alloc(vctx->metadata, &vcli->tmp_buf); - if (errcode) - goto error; - - /* Enable all the available clk_enable_map. */ - vcli->enable_map.clk_enable_map = (1ull << vctx->metadata->clk_cnt) - 1; - - /* Use vinstr's narrowed metadata to alloc narrow dump buffers which - * interact with clients. - */ - errcode = kbase_hwcnt_dump_buffer_narrow_array_alloc( - vctx->metadata_user, setup->buffer_count, &vcli->dump_bufs); - if (errcode) - goto error; - - errcode = -ENOMEM; - vcli->dump_bufs_meta = kmalloc_array( - setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL); - if (!vcli->dump_bufs_meta) - goto error; - - errcode = kbase_hwcnt_virtualizer_client_create( - vctx->hvirt, &vcli->enable_map, &vcli->hvcli); - if (errcode) - goto error; - - init_waitqueue_head(&vcli->waitq); - - *out_vcli = vcli; - return 0; -error: - kbasep_vinstr_client_destroy(vcli); - return errcode; -} - -int kbase_vinstr_init( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_vinstr_context **out_vctx) -{ - int errcode; - struct kbase_vinstr_context *vctx; - const struct kbase_hwcnt_metadata *metadata; - - if (!hvirt || !out_vctx) - return -EINVAL; - - metadata = kbase_hwcnt_virtualizer_metadata(hvirt); - if (!metadata) - return -EINVAL; - - vctx = kzalloc(sizeof(*vctx), GFP_KERNEL); - if (!vctx) - return -ENOMEM; - - vctx->hvirt = hvirt; - vctx->metadata = metadata; - errcode = kbase_hwcnt_gpu_metadata_narrow_create(&vctx->metadata_user, - metadata); - if (errcode) - goto err_metadata_create; - - mutex_init(&vctx->lock); - INIT_LIST_HEAD(&vctx->clients); - hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - vctx->dump_timer.function = kbasep_vinstr_dump_timer; - INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker); - - *out_vctx = vctx; - return 0; - -err_metadata_create: - kfree(vctx); - - return errcode; -} - -void kbase_vinstr_term(struct kbase_vinstr_context *vctx) -{ - if (!vctx) - return; - - /* Non-zero client count implies client leak */ - if (WARN_ON(vctx->client_count != 0)) { - struct kbase_vinstr_client *pos, *n; - - list_for_each_entry_safe(pos, n, &vctx->clients, node) { - list_del(&pos->node); - vctx->client_count--; - kbasep_vinstr_client_destroy(pos); - } - } - - cancel_work_sync(&vctx->dump_work); - kbase_hwcnt_gpu_metadata_narrow_destroy(vctx->metadata_user); - - WARN_ON(vctx->client_count != 0); - kfree(vctx); -} - -void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx) -{ - if (WARN_ON(!vctx)) - return; - - mutex_lock(&vctx->lock); - - if (!WARN_ON(vctx->suspend_count == SIZE_MAX)) - vctx->suspend_count++; - - mutex_unlock(&vctx->lock); - - /* Always sync cancel the timer and then the worker, regardless of the - * new suspend count. - * - * This ensures concurrent calls to kbase_vinstr_suspend() always block - * until vinstr is fully suspended. - * - * The timer is cancelled before the worker, as the timer - * unconditionally re-enqueues the worker, but the worker checks the - * suspend_count that we just incremented before rescheduling the timer. - * - * Therefore if we cancel the worker first, the timer might re-enqueue - * the worker before we cancel the timer, but the opposite is not - * possible. - */ - hrtimer_cancel(&vctx->dump_timer); - cancel_work_sync(&vctx->dump_work); -} - -void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) -{ - if (WARN_ON(!vctx)) - return; - - mutex_lock(&vctx->lock); - - if (!WARN_ON(vctx->suspend_count == 0)) { - vctx->suspend_count--; - - /* Last resume, so re-enqueue the worker if we have any periodic - * clients. - */ - if (vctx->suspend_count == 0) { - struct kbase_vinstr_client *pos; - bool has_periodic_clients = false; - - list_for_each_entry(pos, &vctx->clients, node) { - if (pos->dump_interval_ns != 0) { - has_periodic_clients = true; - break; - } - } - - if (has_periodic_clients) - kbase_hwcnt_virtualizer_queue_work( - vctx->hvirt, &vctx->dump_work); - } - } - - mutex_unlock(&vctx->lock); -} - -int kbase_vinstr_hwcnt_reader_setup( - struct kbase_vinstr_context *vctx, - struct kbase_ioctl_hwcnt_reader_setup *setup) -{ - int errcode; - int fd; - struct kbase_vinstr_client *vcli = NULL; - - if (!vctx || !setup || - (setup->buffer_count == 0) || - (setup->buffer_count > MAX_BUFFER_COUNT) || - !is_power_of_2(setup->buffer_count)) - return -EINVAL; - - errcode = kbasep_vinstr_client_create(vctx, setup, &vcli); - if (errcode) - goto error; - - /* Add the new client. No need to reschedule worker, as not periodic */ - mutex_lock(&vctx->lock); - - vctx->client_count++; - list_add(&vcli->node, &vctx->clients); - - mutex_unlock(&vctx->lock); - - /* Expose to user-space only once the client is fully initialized */ - errcode = anon_inode_getfd( - "[mali_vinstr_desc]", - &vinstr_client_fops, - vcli, - O_RDONLY | O_CLOEXEC); - if (errcode < 0) - goto client_installed_error; - - fd = errcode; - - return fd; - -client_installed_error: - mutex_lock(&vctx->lock); - - vctx->client_count--; - list_del(&vcli->node); - - mutex_unlock(&vctx->lock); -error: - kbasep_vinstr_client_destroy(vcli); - return errcode; -} - -/** - * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready - * buffers. - * @cli: Non-NULL pointer to vinstr client. - * - * Return: Non-zero if client has at least one dumping buffer filled that was - * not notified to user yet. - */ -static int kbasep_vinstr_hwcnt_reader_buffer_ready( - struct kbase_vinstr_client *cli) -{ - WARN_ON(!cli); - return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_dump( - struct kbase_vinstr_client *cli) -{ - int errcode; - - mutex_lock(&cli->vctx->lock); - - errcode = kbasep_vinstr_client_dump( - cli, BASE_HWCNT_READER_EVENT_MANUAL); - - mutex_unlock(&cli->vctx->lock); - return errcode; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_clear( - struct kbase_vinstr_client *cli) -{ - int errcode; - - mutex_lock(&cli->vctx->lock); - - errcode = kbasep_vinstr_client_clear(cli); - - mutex_unlock(&cli->vctx->lock); - return errcode; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * @buffer: Non-NULL pointer to userspace buffer. - * @size: Size of buffer. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( - struct kbase_vinstr_client *cli, - void __user *buffer, - size_t size) -{ - unsigned int meta_idx = atomic_read(&cli->meta_idx); - unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; - - struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; - const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); - const size_t min_size = min(size, meta_size); - - /* Metadata sanity check. */ - WARN_ON(idx != meta->buffer_idx); - - /* Check if there is any buffer available. */ - if (unlikely(atomic_read(&cli->write_idx) == meta_idx)) - return -EAGAIN; - - /* Check if previously taken buffer was put back. */ - if (unlikely(atomic_read(&cli->read_idx) != meta_idx)) - return -EBUSY; - - /* Clear user buffer to zero. */ - if (unlikely(meta_size < size && clear_user(buffer, size))) - return -EFAULT; - - /* Copy next available buffer's metadata to user. */ - if (unlikely(copy_to_user(buffer, meta, min_size))) - return -EFAULT; - - /* Compare exchange meta idx to protect against concurrent getters */ - if (meta_idx != atomic_cmpxchg(&cli->meta_idx, meta_idx, meta_idx + 1)) - return -EBUSY; - - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * @buffer: Non-NULL pointer to userspace buffer. - * @size: Size of buffer. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( - struct kbase_vinstr_client *cli, - void __user *buffer, - size_t size) -{ - unsigned int read_idx = atomic_read(&cli->read_idx); - unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; - - struct kbase_hwcnt_reader_metadata *meta; - const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); - const size_t max_size = max(size, meta_size); - int ret = 0; - u8 stack_kbuf[64]; - u8 *kbuf = NULL; - size_t i; - - /* Check if any buffer was taken. */ - if (unlikely(atomic_read(&cli->meta_idx) == read_idx)) - return -EPERM; - - if (likely(max_size <= sizeof(stack_kbuf))) { - /* Use stack buffer when the size is small enough. */ - if (unlikely(meta_size > size)) - memset(stack_kbuf, 0, sizeof(stack_kbuf)); - kbuf = stack_kbuf; - } else { - kbuf = kzalloc(max_size, GFP_KERNEL); - if (unlikely(!kbuf)) - return -ENOMEM; - } - - /* - * Copy user buffer to zero cleared kernel buffer which has enough - * space for both user buffer and kernel metadata. - */ - if (unlikely(copy_from_user(kbuf, buffer, size))) { - ret = -EFAULT; - goto out; - } - - /* - * Make sure any "extra" data passed from userspace is zero. - * It's meaningful only in case meta_size < size. - */ - for (i = meta_size; i < size; i++) { - /* Check if user data beyond meta size is zero. */ - if (unlikely(kbuf[i] != 0)) { - ret = -EINVAL; - goto out; - } - } - - /* Check if correct buffer is put back. */ - meta = (struct kbase_hwcnt_reader_metadata *)kbuf; - if (unlikely(idx != meta->buffer_idx)) { - ret = -EINVAL; - goto out; - } - - /* Compare exchange read idx to protect against concurrent putters */ - if (read_idx != - atomic_cmpxchg(&cli->read_idx, read_idx, read_idx + 1)) { - ret = -EPERM; - goto out; - } - -out: - if (unlikely(kbuf != stack_kbuf)) - kfree(kbuf); - return ret; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * @interval: Periodic dumping interval (disable periodic dumping if 0). - * - * Return: 0 always. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( - struct kbase_vinstr_client *cli, - u32 interval) -{ - mutex_lock(&cli->vctx->lock); - - if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS)) - interval = DUMP_INTERVAL_MIN_NS; - /* Update the interval, and put in a dummy next dump time */ - cli->dump_interval_ns = interval; - cli->next_dump_time_ns = 0; - - /* - * If it's a periodic client, kick off the worker early to do a proper - * timer reschedule. Return value is ignored, as we don't care if the - * worker is already queued. - */ - if ((interval != 0) && (cli->vctx->suspend_count == 0)) - kbase_hwcnt_virtualizer_queue_work(cli->vctx->hvirt, - &cli->vctx->dump_work); - - mutex_unlock(&cli->vctx->lock); - - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * @event_id: ID of event to enable. - * - * Return: 0 always. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( - struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id) -{ - /* No-op, as events aren't supported */ - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl - * command. - * @cli: Non-NULL pointer to vinstr client. - * @event_id: ID of event to disable. - * - * Return: 0 always. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( - struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id) -{ - /* No-op, as events aren't supported */ - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * @hwver: Non-NULL pointer to user buffer where HW version will be stored. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( - struct kbase_vinstr_client *cli, - u32 __user *hwver) -{ - u32 ver = 5; - const enum kbase_hwcnt_gpu_group_type type = - kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0); - - if (WARN_ON(type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) - return -EINVAL; - - return put_user(ver, hwver); -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_get_api_version() - get API version ioctl - * command. - * @cli: The non-NULL pointer to the client - * @arg: Command's argument. - * @size: Size of arg. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( - struct kbase_vinstr_client *cli, unsigned long arg, size_t size) -{ - long ret = -EINVAL; - - if (size == sizeof(u32)) { - ret = put_user(HWCNT_READER_API, (u32 __user *)arg); - } else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) { - u8 clk_cnt = cli->vctx->metadata->clk_cnt; - unsigned long bytes = 0; - struct kbase_hwcnt_reader_api_version api_version = { - .version = HWCNT_READER_API, - .features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE, - }; - - if (clk_cnt > 0) - api_version.features |= - KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP; - if (clk_cnt > 1) - api_version.features |= - KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES; - - bytes = copy_to_user( - (void __user *)arg, &api_version, sizeof(api_version)); - - /* copy_to_user returns zero in case of success. - * If it fails, it returns the number of bytes that could NOT be copied - */ - if (bytes == 0) - ret = 0; - else - ret = -EFAULT; - } - return ret; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl. - * @filp: Non-NULL pointer to file structure. - * @cmd: User command. - * @arg: Command's argument. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long arg) -{ - long rcode; - struct kbase_vinstr_client *cli; - - if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER)) - return -EINVAL; - - cli = filp->private_data; - if (!cli) - return -EINVAL; - - switch (_IOC_NR(cmd)) { - case _IOC_NR(KBASE_HWCNT_READER_GET_API_VERSION): - rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( - cli, arg, _IOC_SIZE(cmd)); - break; - case _IOC_NR(KBASE_HWCNT_READER_GET_HWVER): - rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( - cli, (u32 __user *)arg); - break; - case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER_SIZE): - rcode = put_user((u32)cli->vctx->metadata_user->dump_buf_bytes, - (u32 __user *)arg); - break; - case _IOC_NR(KBASE_HWCNT_READER_DUMP): - rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); - break; - case _IOC_NR(KBASE_HWCNT_READER_CLEAR): - rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); - break; - case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER): - rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( - cli, (void __user *)arg, _IOC_SIZE(cmd)); - break; - case _IOC_NR(KBASE_HWCNT_READER_PUT_BUFFER): - rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( - cli, (void __user *)arg, _IOC_SIZE(cmd)); - break; - case _IOC_NR(KBASE_HWCNT_READER_SET_INTERVAL): - rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( - cli, (u32)arg); - break; - case _IOC_NR(KBASE_HWCNT_READER_ENABLE_EVENT): - rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( - cli, (enum base_hwcnt_reader_event)arg); - break; - case _IOC_NR(KBASE_HWCNT_READER_DISABLE_EVENT): - rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( - cli, (enum base_hwcnt_reader_event)arg); - break; - default: - pr_warn("Unknown HWCNT ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); - rcode = -EINVAL; - break; - } - - return rcode; -} - -/** - * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll. - * @filp: Non-NULL pointer to file structure. - * @wait: Non-NULL pointer to poll table. - * - * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if - * data can not be read without blocking, else EPOLLHUP | EPOLLERR. - */ -static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait) -{ - struct kbase_vinstr_client *cli; - - if (!filp || !wait) - return EPOLLHUP | EPOLLERR; - - cli = filp->private_data; - if (!cli) - return EPOLLHUP | EPOLLERR; - - poll_wait(filp, &cli->waitq, wait); - if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) - return EPOLLIN | EPOLLRDNORM; - - return (__poll_t)0; -} - -/** - * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap. - * @filp: Non-NULL pointer to file structure. - * @vma: Non-NULL pointer to vma structure. - * - * Return: 0 on success, else error code. - */ -static int kbasep_vinstr_hwcnt_reader_mmap( - struct file *filp, - struct vm_area_struct *vma) -{ - struct kbase_vinstr_client *cli; - unsigned long vm_size, size, addr, pfn, offset; - - if (!filp || !vma) - return -EINVAL; - - cli = filp->private_data; - if (!cli) - return -EINVAL; - - vm_size = vma->vm_end - vma->vm_start; - - /* The mapping is allowed to span the entirety of the page allocation, - * not just the chunk where the dump buffers are allocated. - * This accommodates the corner case where the combined size of the - * dump buffers is smaller than a single page. - * This does not pose a security risk as the pages are zeroed on - * allocation, and anything out of bounds of the dump buffers is never - * written to. - */ - size = (1ull << cli->dump_bufs.page_order) * PAGE_SIZE; - - if (vma->vm_pgoff > (size >> PAGE_SHIFT)) - return -EINVAL; - - offset = vma->vm_pgoff << PAGE_SHIFT; - if (vm_size > size - offset) - return -EINVAL; - - addr = __pa(cli->dump_bufs.page_addr + offset); - pfn = addr >> PAGE_SHIFT; - - return remap_pfn_range( - vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot); -} - -/** - * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release. - * @inode: Non-NULL pointer to inode structure. - * @filp: Non-NULL pointer to file structure. - * - * Return: 0 always. - */ -static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, - struct file *filp) -{ - struct kbase_vinstr_client *vcli = filp->private_data; - - mutex_lock(&vcli->vctx->lock); - - vcli->vctx->client_count--; - list_del(&vcli->node); - - mutex_unlock(&vcli->vctx->lock); - - kbasep_vinstr_client_destroy(vcli); - - return 0; -} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h deleted file mode 100644 index 6747ec70a406..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h +++ /dev/null @@ -1,90 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2015-2018, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* - * Vinstr, used to provide an ioctl for userspace access to periodic hardware - * counters. - */ - -#ifndef _KBASE_VINSTR_H_ -#define _KBASE_VINSTR_H_ - -struct kbase_vinstr_context; -struct kbase_hwcnt_virtualizer; -struct kbase_ioctl_hwcnt_reader_setup; - -/** - * kbase_vinstr_init() - Initialise a vinstr context. - * @hvirt: Non-NULL pointer to the hardware counter virtualizer. - * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr - * context will be stored on success. - * - * On creation, the suspend count of the context will be 0. - * - * Return: 0 on success, else error code. - */ -int kbase_vinstr_init( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_vinstr_context **out_vctx); - -/** - * kbase_vinstr_term() - Terminate a vinstr context. - * @vctx: Pointer to the vinstr context to be terminated. - */ -void kbase_vinstr_term(struct kbase_vinstr_context *vctx); - -/** - * kbase_vinstr_suspend() - Increment the suspend count of the context. - * @vctx: Non-NULL pointer to the vinstr context to be suspended. - * - * After this function call returns, it is guaranteed that all timers and - * workers in vinstr will be cancelled, and will not be re-triggered until - * after the context has been resumed. In effect, this means no new counter - * dumps will occur for any existing or subsequently added periodic clients. - */ -void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx); - -/** - * kbase_vinstr_resume() - Decrement the suspend count of the context. - * @vctx: Non-NULL pointer to the vinstr context to be resumed. - * - * If a call to this function decrements the suspend count from 1 to 0, then - * normal operation of vinstr will be resumed (i.e. counter dumps will once - * again be automatically triggered for all periodic clients). - * - * It is only valid to call this function one time for each prior returned call - * to kbase_vinstr_suspend. - */ -void kbase_vinstr_resume(struct kbase_vinstr_context *vctx); - -/** - * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader - * client. - * @vinstr_ctx: Non-NULL pointer to the vinstr context. - * @setup: Non-NULL pointer to the hwcnt reader configuration. - * - * Return: file descriptor on success, else a (negative) error code. - */ -int kbase_vinstr_hwcnt_reader_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_ioctl_hwcnt_reader_setup *setup); - -#endif /* _KBASE_VINSTR_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_linux_trace.h b/drivers/gpu/arm/bifrost/mali_linux_trace.h index 52f17390c853..a11e12dcf184 100644 --- a/drivers/gpu/arm/bifrost/mali_linux_trace.h +++ b/drivers/gpu/arm/bifrost/mali_linux_trace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,28 +36,14 @@ * @event_id: ORed together bitfields representing a type of event, * made with the GATOR_MAKE_EVENT() macro. */ -TRACE_EVENT(mali_job_slots_event, - TP_PROTO(u32 gpu_id, u32 event_id, u32 tgid, u32 pid, - u8 job_id), - TP_ARGS(gpu_id, event_id, tgid, pid, job_id), - TP_STRUCT__entry( - __field(u32, gpu_id) - __field(u32, event_id) - __field(u32, tgid) - __field(u32, pid) - __field(u8, job_id) - ), - TP_fast_assign( - __entry->gpu_id = gpu_id; - __entry->event_id = event_id; - __entry->tgid = tgid; - __entry->pid = pid; - __entry->job_id = job_id; - ), - TP_printk("gpu=%u event=%u tgid=%u pid=%u job_id=%u", - __entry->gpu_id, __entry->event_id, - __entry->tgid, __entry->pid, __entry->job_id) -); +TRACE_EVENT(mali_job_slots_event, TP_PROTO(u32 gpu_id, u32 event_id, u32 tgid, u32 pid, u8 job_id), + TP_ARGS(gpu_id, event_id, tgid, pid, job_id), + TP_STRUCT__entry(__field(u32, gpu_id) __field(u32, event_id) __field(u32, tgid) + __field(u32, pid) __field(u8, job_id)), + TP_fast_assign(__entry->gpu_id = gpu_id; __entry->event_id = event_id; + __entry->tgid = tgid; __entry->pid = pid; __entry->job_id = job_id;), + TP_printk("gpu=%u event=%u tgid=%u pid=%u job_id=%u", __entry->gpu_id, + __entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)); /** * mali_pm_status - Reports change of power management status. @@ -66,22 +52,13 @@ TRACE_EVENT(mali_job_slots_event, * @value: 64bits bitmask reporting either power status of * the cores (1-ON, 0-OFF) */ -TRACE_EVENT(mali_pm_status, - TP_PROTO(u32 gpu_id, u32 event_id, u64 value), - TP_ARGS(gpu_id, event_id, value), - TP_STRUCT__entry( - __field(u32, gpu_id) - __field(u32, event_id) - __field(u64, value) - ), - TP_fast_assign( - __entry->gpu_id = gpu_id; - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("gpu=%u event %u = %llu", - __entry->gpu_id, __entry->event_id, __entry->value) -); +TRACE_EVENT(mali_pm_status, TP_PROTO(u32 gpu_id, u32 event_id, u64 value), + TP_ARGS(gpu_id, event_id, value), + TP_STRUCT__entry(__field(u32, gpu_id) __field(u32, event_id) __field(u64, value)), + TP_fast_assign(__entry->gpu_id = gpu_id; __entry->event_id = event_id; + __entry->value = value;), + TP_printk("gpu=%u event %u = %llu", __entry->gpu_id, __entry->event_id, + __entry->value)); /** * mali_page_fault_insert_pages - Reports an MMU page fault @@ -90,22 +67,13 @@ TRACE_EVENT(mali_pm_status, * @event_id: MMU address space number * @value: Number of newly allocated pages */ -TRACE_EVENT(mali_page_fault_insert_pages, - TP_PROTO(u32 gpu_id, s32 event_id, u64 value), - TP_ARGS(gpu_id, event_id, value), - TP_STRUCT__entry( - __field(u32, gpu_id) - __field(s32, event_id) - __field(u64, value) - ), - TP_fast_assign( - __entry->gpu_id = gpu_id; - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("gpu=%u event %d = %llu", - __entry->gpu_id, __entry->event_id, __entry->value) -); +TRACE_EVENT(mali_page_fault_insert_pages, TP_PROTO(u32 gpu_id, s32 event_id, u64 value), + TP_ARGS(gpu_id, event_id, value), + TP_STRUCT__entry(__field(u32, gpu_id) __field(s32, event_id) __field(u64, value)), + TP_fast_assign(__entry->gpu_id = gpu_id; __entry->event_id = event_id; + __entry->value = value;), + TP_printk("gpu=%u event %d = %llu", __entry->gpu_id, __entry->event_id, + __entry->value)); /** * mali_total_alloc_pages_change - Reports that the total number of @@ -113,19 +81,11 @@ TRACE_EVENT(mali_page_fault_insert_pages, * @gpu_id: Kbase device id * @event_id: Total number of pages allocated */ -TRACE_EVENT(mali_total_alloc_pages_change, - TP_PROTO(u32 gpu_id, s64 event_id), - TP_ARGS(gpu_id, event_id), - TP_STRUCT__entry( - __field(u32, gpu_id) - __field(s64, event_id) - ), - TP_fast_assign( - __entry->gpu_id = gpu_id; - __entry->event_id = event_id; - ), - TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id) -); +TRACE_EVENT(mali_total_alloc_pages_change, TP_PROTO(u32 gpu_id, s64 event_id), + TP_ARGS(gpu_id, event_id), + TP_STRUCT__entry(__field(u32, gpu_id) __field(s64, event_id)), + TP_fast_assign(__entry->gpu_id = gpu_id; __entry->event_id = event_id;), + TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id)); #endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ /* @@ -151,108 +111,106 @@ TRACE_EVENT(mali_total_alloc_pages_change, #define _ENSURE_PARENTHESIS(args...) args #define KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT(code) \ - (!KBASE_MMU_FAULT_CODE_VALID(code) ? "UNKNOWN,level=" : \ - __print_symbolic(((code) & ~3u), \ - KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS)) -#define KBASE_MMU_FAULT_CODE_LEVEL(code) \ - (((((code) & ~0x3u) == 0xC4) ? 4 : 0) + ((code) & 0x3u)) + (!KBASE_MMU_FAULT_CODE_VALID(code) ? \ + "UNKNOWN,level=" : \ + __print_symbolic(((code) & ~3u), KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS)) +#define KBASE_MMU_FAULT_CODE_LEVEL(code) (((((code) & ~0x3u) == 0xC4) ? 4 : 0) + ((code)&0x3u)) -#define KBASE_MMU_FAULT_STATUS_CODE(status) \ - ((status) & 0xFFu) +#define KBASE_MMU_FAULT_STATUS_CODE(status) ((status)&0xFFu) #define KBASE_MMU_FAULT_STATUS_DECODED_STRING(status) \ - (((status) & (1u << 10)) ? "DECODER_FAULT" : "SLAVE_FAULT") + (((status) & (1u << 10)) ? "DECODER_FAULT" : "SLAVE_FAULT") #define KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(status) \ - KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT( \ - KBASE_MMU_FAULT_STATUS_CODE(status)) + KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT(KBASE_MMU_FAULT_STATUS_CODE(status)) #define KBASE_MMU_FAULT_STATUS_LEVEL(status) \ - KBASE_MMU_FAULT_CODE_LEVEL(KBASE_MMU_FAULT_STATUS_CODE(status)) + KBASE_MMU_FAULT_CODE_LEVEL(KBASE_MMU_FAULT_STATUS_CODE(status)) -#define KBASE_MMU_FAULT_STATUS_ACCESS(status) \ - ((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK) -#define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ - {AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \ - {AS_FAULTSTATUS_ACCESS_TYPE_EX, "EXECUTE"}, \ - {AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \ - {AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" }) -#define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \ - __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \ - KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS) +#define KBASE_MMU_FAULT_STATUS_ACCESS(status) ((status)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) +#define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS \ + _ENSURE_PARENTHESIS({ AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \ + { AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE, "EXECUTE" }, \ + { AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \ + { AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" }) +#define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \ + __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \ + KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS) #if MALI_USE_CSF -#define KBASE_MMU_FAULT_CODE_VALID(code) \ - ((code >= 0xC0 && code <= 0xEB) && \ - (!(code >= 0xC5 && code <= 0xC7)) && \ - (!(code >= 0xCC && code <= 0xD8)) && \ - (!(code >= 0xDC && code <= 0xDF)) && \ - (!(code >= 0xE1 && code <= 0xE3))) -#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ - {0xC0, "TRANSLATION_FAULT_" }, \ - {0xC4, "TRANSLATION_FAULT_" }, \ - {0xC8, "PERMISSION_FAULT_" }, \ - {0xD0, "TRANSTAB_BUS_FAULT_" }, \ - {0xD8, "ACCESS_FLAG_" }, \ - {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ - {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ - {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }) +#define KBASE_MMU_FAULT_CODE_VALID(code) \ + ((code >= 0xC0 && code <= 0xEB) && (!(code >= 0xC5 && code <= 0xC7)) && \ + (!(code >= 0xCC && code <= 0xD8)) && (!(code >= 0xDC && code <= 0xDF)) && \ + (!(code >= 0xE1 && code <= 0xE3))) +#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS \ + _ENSURE_PARENTHESIS({ 0xC0, "TRANSLATION_FAULT_" }, { 0xC4, "TRANSLATION_FAULT_" }, \ + { 0xC8, "PERMISSION_FAULT_" }, { 0xD0, "TRANSTAB_BUS_FAULT_" }, \ + { 0xD8, "ACCESS_FLAG_" }, { 0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ + { 0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ + { 0xE8, "MEMORY_ATTRIBUTES_FAULT_" }) #else /* MALI_USE_CSF */ -#define KBASE_MMU_FAULT_CODE_VALID(code) \ - ((code >= 0xC0 && code <= 0xEF) && \ - (!(code >= 0xC5 && code <= 0xC6)) && \ - (!(code >= 0xCC && code <= 0xCF)) && \ - (!(code >= 0xD4 && code <= 0xD7)) && \ - (!(code >= 0xDC && code <= 0xDF))) -#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ - {0xC0, "TRANSLATION_FAULT_" }, \ - {0xC4, "TRANSLATION_FAULT(_7==_IDENTITY)_" }, \ - {0xC8, "PERMISSION_FAULT_" }, \ - {0xD0, "TRANSTAB_BUS_FAULT_" }, \ - {0xD8, "ACCESS_FLAG_" }, \ - {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ - {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ - {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \ - {0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" }) +#define KBASE_MMU_FAULT_CODE_VALID(code) \ + ((code >= 0xC0 && code <= 0xEF) && (!(code >= 0xC5 && code <= 0xC6)) && \ + (!(code >= 0xCC && code <= 0xCF)) && (!(code >= 0xD4 && code <= 0xD7)) && \ + (!(code >= 0xDC && code <= 0xDF))) +#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS \ + _ENSURE_PARENTHESIS({ 0xC0, "TRANSLATION_FAULT_" }, \ + { 0xC4, "TRANSLATION_FAULT(_7==_IDENTITY)_" }, \ + { 0xC8, "PERMISSION_FAULT_" }, { 0xD0, "TRANSTAB_BUS_FAULT_" }, \ + { 0xD8, "ACCESS_FLAG_" }, { 0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ + { 0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ + { 0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \ + { 0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" }) #endif /* MALI_USE_CSF */ #endif /* __TRACE_MALI_MMU_HELPERS */ -/* trace_mali_mmu_page_fault_grow +/* trace_mali_mmu_page_fault_extra_grow * * Tracepoint about a successful grow of a region due to a GPU page fault */ -TRACE_EVENT(mali_mmu_page_fault_grow, - TP_PROTO(struct kbase_va_region *reg, struct kbase_fault *fault, - size_t new_pages), +TRACE_EVENT( + mali_mmu_page_fault_extra_grow, + TP_PROTO(struct kbase_va_region *reg, struct kbase_fault *fault, size_t new_pages), TP_ARGS(reg, fault, new_pages), - TP_STRUCT__entry( - __field(u64, start_addr) - __field(u64, fault_addr) - __field(u64, fault_extra_addr) - __field(size_t, new_pages) - __field(u32, status) - ), - TP_fast_assign( - __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; - __entry->fault_addr = fault->addr; - __entry->fault_extra_addr = fault->extra_addr; - __entry->new_pages = new_pages; - __entry->status = fault->status; - ), - TP_printk("start=0x%llx fault_addr=0x%llx fault_extra_addr=0x%llx new_pages=%zu raw_fault_status=0x%x decoded_faultstatus=%s exception_type=0x%x,%s%u access_type=0x%x,%s source_id=0x%x", - __entry->start_addr, __entry->fault_addr, - __entry->fault_extra_addr, __entry->new_pages, - __entry->status, + TP_STRUCT__entry(__field(u64, start_addr) __field(u64, fault_addr) + __field(u64, fault_extra_addr) __field(size_t, new_pages) + __field(u32, status)), + TP_fast_assign(__entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->fault_addr = fault->addr; + __entry->fault_extra_addr = fault->extra_addr; + __entry->new_pages = new_pages; __entry->status = fault->status;), + TP_printk( + "start=0x%llx fault_addr=0x%llx fault_extra_addr=0x%llx new_pages=%zu raw_fault_status=0x%x decoded_faultstatus=%s exception_type=0x%x,%s%u access_type=0x%x,%s source_id=0x%x", + __entry->start_addr, __entry->fault_addr, __entry->fault_extra_addr, + __entry->new_pages, __entry->status, KBASE_MMU_FAULT_STATUS_DECODED_STRING(__entry->status), KBASE_MMU_FAULT_STATUS_CODE(__entry->status), KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(__entry->status), KBASE_MMU_FAULT_STATUS_LEVEL(__entry->status), KBASE_MMU_FAULT_STATUS_ACCESS(__entry->status) >> 8, - KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(__entry->status), - __entry->status >> 16) -); - - + KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(__entry->status), __entry->status >> 16)); +/* trace_mali_mmu_page_fault_grow + * + * Tracepoint about a successful grow of a region due to a GPU page fault for GPU >= v14.8.4 + */ +TRACE_EVENT( + mali_mmu_page_fault_grow, + TP_PROTO(struct kbase_va_region *reg, struct kbase_fault *fault, size_t new_pages), + TP_ARGS(reg, fault, new_pages), + TP_STRUCT__entry(__field(u64, start_addr) __field(u64, fault_addr) + __field(size_t, new_pages) __field(u32, status)), + TP_fast_assign(__entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->fault_addr = fault->addr; __entry->new_pages = new_pages; + __entry->status = fault->status;), + TP_printk( + "start=0x%llx fault_addr=0x%llx new_pages=%zu raw_fault_status=0x%x decoded_faultstatus=%s exception_type=0x%x,%s%u access_type=0x%x,%s source_id=0x%x", + __entry->start_addr, __entry->fault_addr, __entry->new_pages, __entry->status, + KBASE_MMU_FAULT_STATUS_DECODED_STRING(__entry->status), + KBASE_MMU_FAULT_STATUS_CODE(__entry->status), + KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(__entry->status), + KBASE_MMU_FAULT_STATUS_LEVEL(__entry->status), + KBASE_MMU_FAULT_STATUS_ACCESS(__entry->status) >> 8, + KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(__entry->status), __entry->status >> 16)); /* * Just-in-time memory allocation subsystem tracepoints @@ -261,25 +219,17 @@ TRACE_EVENT(mali_mmu_page_fault_grow, /* Just-in-time memory allocation soft-job template. Override the TP_printk * further if need be. jit_id can be 0. */ -DECLARE_EVENT_CLASS(mali_jit_softjob_template, - TP_PROTO(struct kbase_va_region *reg, u8 jit_id), - TP_ARGS(reg, jit_id), - TP_STRUCT__entry( - __field(u64, start_addr) - __field(size_t, nr_pages) - __field(size_t, backed_pages) - __field(u8, jit_id) - ), - TP_fast_assign( - __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; - __entry->nr_pages = reg->nr_pages; - __entry->backed_pages = kbase_reg_current_backed_size(reg); - __entry->jit_id = jit_id; - ), - TP_printk("jit_id=%u start=0x%llx va_pages=0x%zx backed_size=0x%zx", - __entry->jit_id, __entry->start_addr, __entry->nr_pages, - __entry->backed_pages) -); +DECLARE_EVENT_CLASS(mali_jit_softjob_template, TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id), + TP_STRUCT__entry(__field(u64, start_addr) __field(size_t, nr_pages) + __field(size_t, backed_pages) __field(u8, jit_id)), + TP_fast_assign(__entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->nr_pages = reg->nr_pages; + __entry->backed_pages = kbase_reg_current_backed_size(reg); + __entry->jit_id = jit_id;), + TP_printk("jit_id=%u start=0x%llx va_pages=0x%zx backed_size=0x%zx", + __entry->jit_id, __entry->start_addr, __entry->nr_pages, + __entry->backed_pages)); /* trace_mali_jit_alloc() * @@ -287,8 +237,7 @@ DECLARE_EVENT_CLASS(mali_jit_softjob_template, * allocating memory */ DEFINE_EVENT(mali_jit_softjob_template, mali_jit_alloc, - TP_PROTO(struct kbase_va_region *reg, u8 jit_id), - TP_ARGS(reg, jit_id)); + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), TP_ARGS(reg, jit_id)); /* trace_mali_jit_free() * @@ -300,10 +249,9 @@ DEFINE_EVENT(mali_jit_softjob_template, mali_jit_alloc, * it's currently suppressed from the output - set jit_id to 0 */ DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, - TP_PROTO(struct kbase_va_region *reg, u8 jit_id), - TP_ARGS(reg, jit_id), - TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", - __entry->start_addr, __entry->nr_pages, __entry->backed_pages)); + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), TP_ARGS(reg, jit_id), + TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", __entry->start_addr, + __entry->nr_pages, __entry->backed_pages)); #if !MALI_USE_CSF #if MALI_JIT_PRESSURE_LIMIT_BASE @@ -313,37 +261,24 @@ DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, * allocation report, and its calculated physical page usage */ TRACE_EVENT(mali_jit_report, - TP_PROTO(struct kbase_jd_atom *katom, struct kbase_va_region *reg, - unsigned int id_idx, u64 read_val, u64 used_pages), - TP_ARGS(katom, reg, id_idx, read_val, used_pages), - TP_STRUCT__entry( - __field(u64, start_addr) - __field(u64, read_val) - __field(u64, used_pages) - __field(unsigned long, flags) - __field(u8, id_idx) - __field(u8, jit_id) - ), - TP_fast_assign( - __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; - __entry->read_val = read_val; - __entry->used_pages = used_pages; - __entry->flags = reg->flags; - __entry->id_idx = id_idx; - __entry->jit_id = katom->jit_ids[id_idx]; - ), - TP_printk("start=0x%llx jit_ids[%u]=%u read_type='%s' read_val=0x%llx used_pages=%llu", - __entry->start_addr, __entry->id_idx, __entry->jit_id, - __print_symbolic(__entry->flags, - { 0, "address"}, - { KBASE_REG_TILER_ALIGN_TOP, "address with align" }, - { KBASE_REG_HEAP_INFO_IS_SIZE, "size" }, - { KBASE_REG_HEAP_INFO_IS_SIZE | - KBASE_REG_TILER_ALIGN_TOP, - "size with align (invalid)" } - ), - __entry->read_val, __entry->used_pages) -); + TP_PROTO(struct kbase_jd_atom *katom, struct kbase_va_region *reg, unsigned int id_idx, + u64 read_val, u64 used_pages), + TP_ARGS(katom, reg, id_idx, read_val, used_pages), + TP_STRUCT__entry(__field(u64, start_addr) __field(u64, read_val) + __field(u64, used_pages) __field(unsigned long, flags) + __field(u8, id_idx) __field(u8, jit_id)), + TP_fast_assign(__entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->read_val = read_val; __entry->used_pages = used_pages; + __entry->flags = reg->flags; __entry->id_idx = id_idx; + __entry->jit_id = katom->jit_ids[id_idx];), + TP_printk("start=0x%llx jit_ids[%u]=%u read_type='%s' read_val=0x%llx used_pages=%llu", + __entry->start_addr, __entry->id_idx, __entry->jit_id, + __print_symbolic(__entry->flags, { 0, "address" }, + { KBASE_REG_TILER_ALIGN_TOP, "address with align" }, + { KBASE_REG_HEAP_INFO_IS_SIZE, "size" }, + { KBASE_REG_HEAP_INFO_IS_SIZE | KBASE_REG_TILER_ALIGN_TOP, + "size with align (invalid)" }), + __entry->read_val, __entry->used_pages)); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ #endif /* !MALI_USE_CSF */ @@ -357,44 +292,30 @@ TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); * - just-in-time allocation of a region * - free of a region that was allocated just-in-time */ -TRACE_EVENT(mali_jit_report_pressure, - TP_PROTO(struct kbase_va_region *reg, u64 new_used_pages, - u64 new_pressure, unsigned int flags), +TRACE_EVENT( + mali_jit_report_pressure, + TP_PROTO(struct kbase_va_region *reg, u64 new_used_pages, u64 new_pressure, + unsigned int flags), TP_ARGS(reg, new_used_pages, new_pressure, flags), - TP_STRUCT__entry( - __field(u64, start_addr) - __field(u64, used_pages) - __field(u64, new_used_pages) - __field(u64, new_pressure) - __field(unsigned int, flags) - ), - TP_fast_assign( - __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; - __entry->used_pages = reg->used_pages; - __entry->new_used_pages = new_used_pages; - __entry->new_pressure = new_pressure; - __entry->flags = flags; - ), - TP_printk("start=0x%llx old_used_pages=%llu new_used_pages=%llu new_pressure=%llu report_flags=%s", - __entry->start_addr, __entry->used_pages, - __entry->new_used_pages, __entry->new_pressure, + TP_STRUCT__entry(__field(u64, start_addr) __field(u64, used_pages) + __field(u64, new_used_pages) __field(u64, new_pressure) + __field(unsigned int, flags)), + TP_fast_assign(__entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->used_pages = reg->used_pages; + __entry->new_used_pages = new_used_pages; + __entry->new_pressure = new_pressure; __entry->flags = flags;), + TP_printk( + "start=0x%llx old_used_pages=%llu new_used_pages=%llu new_pressure=%llu report_flags=%s", + __entry->start_addr, __entry->used_pages, __entry->new_used_pages, + __entry->new_pressure, __print_flags(__entry->flags, "|", - { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, - "HAPPENED_ON_ALLOC_OR_FREE" })) -); + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, "HAPPENED_ON_ALLOC_OR_FREE" }))); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ #ifndef __TRACE_SYSGRAPH_ENUM #define __TRACE_SYSGRAPH_ENUM /* Enum of sysgraph message IDs */ -enum sysgraph_msg { - SGR_ARRIVE, - SGR_SUBMIT, - SGR_COMPLETE, - SGR_POST, - SGR_ACTIVE, - SGR_INACTIVE -}; +enum sysgraph_msg { SGR_ARRIVE, SGR_SUBMIT, SGR_COMPLETE, SGR_POST, SGR_ACTIVE, SGR_INACTIVE }; #endif /* __TRACE_SYSGRAPH_ENUM */ /* A template for SYSGRAPH events @@ -403,22 +324,14 @@ enum sysgraph_msg { * which is atom_id therefore they will be using a common template */ TRACE_EVENT(sysgraph, - TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, - unsigned int atom_id), - TP_ARGS(message, proc_id, atom_id), - TP_STRUCT__entry( - __field(unsigned int, proc_id) - __field(enum sysgraph_msg, message) - __field(unsigned int, atom_id) - ), - TP_fast_assign( - __entry->proc_id = proc_id; - __entry->message = message; - __entry->atom_id = atom_id; - ), - TP_printk("msg=%u proc_id=%u, param1=%d", __entry->message, - __entry->proc_id, __entry->atom_id) -); + TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, unsigned int atom_id), + TP_ARGS(message, proc_id, atom_id), + TP_STRUCT__entry(__field(unsigned int, proc_id) __field(enum sysgraph_msg, message) + __field(unsigned int, atom_id)), + TP_fast_assign(__entry->proc_id = proc_id; __entry->message = message; + __entry->atom_id = atom_id;), + TP_printk("msg=%u proc_id=%u, param1=%d", __entry->message, __entry->proc_id, + __entry->atom_id)); /* A template for SYSGRAPH GPU events * @@ -427,25 +340,15 @@ TRACE_EVENT(sysgraph, * atom id. */ TRACE_EVENT(sysgraph_gpu, - TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, - unsigned int atom_id, unsigned int js), - TP_ARGS(message, proc_id, atom_id, js), - TP_STRUCT__entry( - __field(unsigned int, proc_id) - __field(enum sysgraph_msg, message) - __field(unsigned int, atom_id) - __field(unsigned int, js) - ), - TP_fast_assign( - __entry->proc_id = proc_id; - __entry->message = message; - __entry->atom_id = atom_id; - __entry->js = js; - ), - TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d", - __entry->message, __entry->proc_id, - __entry->atom_id, __entry->js) -); + TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, unsigned int atom_id, + unsigned int js), + TP_ARGS(message, proc_id, atom_id, js), + TP_STRUCT__entry(__field(unsigned int, proc_id) __field(enum sysgraph_msg, message) + __field(unsigned int, atom_id) __field(unsigned int, js)), + TP_fast_assign(__entry->proc_id = proc_id; __entry->message = message; + __entry->atom_id = atom_id; __entry->js = js;), + TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d", __entry->message, __entry->proc_id, + __entry->atom_id, __entry->js)); /* Tracepoint files get included more than once - protect against multiple * definition @@ -463,73 +366,50 @@ TRACE_EVENT(sysgraph_gpu, * memory allocation report */ TRACE_EVENT(mali_jit_report_gpu_mem, - TP_PROTO(u64 base_addr, u64 reg_addr, u64 *gpu_mem, unsigned int flags), - TP_ARGS(base_addr, reg_addr, gpu_mem, flags), - TP_STRUCT__entry( - __field(u64, base_addr) - __field(u64, reg_addr) - __array(u64, mem_values, - KBASE_JIT_REPORT_GPU_MEM_SIZE / sizeof(u64)) - __field(unsigned int, flags) - ), - TP_fast_assign( - __entry->base_addr = base_addr; - __entry->reg_addr = reg_addr; - memcpy(__entry->mem_values, gpu_mem, - sizeof(__entry->mem_values)); - __entry->flags = flags; - ), - TP_printk("start=0x%llx read GPU memory base=0x%llx values=%s report_flags=%s", - __entry->reg_addr, __entry->base_addr, - __print_array(__entry->mem_values, - ARRAY_SIZE(__entry->mem_values), sizeof(u64)), - __print_flags(__entry->flags, "|", - { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, - "HAPPENED_ON_ALLOC_OR_FREE" })) -); + TP_PROTO(u64 base_addr, u64 reg_addr, u64 *gpu_mem, unsigned int flags), + TP_ARGS(base_addr, reg_addr, gpu_mem, flags), + TP_STRUCT__entry(__field(u64, base_addr) __field(u64, reg_addr) + __array(u64, mem_values, + KBASE_JIT_REPORT_GPU_MEM_SIZE / sizeof(u64)) + __field(unsigned int, flags)), + TP_fast_assign(__entry->base_addr = base_addr; __entry->reg_addr = reg_addr; + memcpy(__entry->mem_values, gpu_mem, sizeof(__entry->mem_values)); + __entry->flags = flags;), + TP_printk("start=0x%llx read GPU memory base=0x%llx values=%s report_flags=%s", + __entry->reg_addr, __entry->base_addr, + __print_array(__entry->mem_values, ARRAY_SIZE(__entry->mem_values), + sizeof(u64)), + __print_flags(__entry->flags, "|", + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, + "HAPPENED_ON_ALLOC_OR_FREE" }))); /* trace_mali_jit_trim_from_region * * Tracepoint about trimming physical pages from a region */ -TRACE_EVENT(mali_jit_trim_from_region, - TP_PROTO(struct kbase_va_region *reg, size_t freed_pages, - size_t old_pages, size_t available_pages, size_t new_pages), +TRACE_EVENT( + mali_jit_trim_from_region, + TP_PROTO(struct kbase_va_region *reg, size_t freed_pages, size_t old_pages, + size_t available_pages, size_t new_pages), TP_ARGS(reg, freed_pages, old_pages, available_pages, new_pages), - TP_STRUCT__entry( - __field(u64, start_addr) - __field(size_t, freed_pages) - __field(size_t, old_pages) - __field(size_t, available_pages) - __field(size_t, new_pages) - ), - TP_fast_assign( - __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; - __entry->freed_pages = freed_pages; - __entry->old_pages = old_pages; - __entry->available_pages = available_pages; - __entry->new_pages = new_pages; - ), + TP_STRUCT__entry(__field(u64, start_addr) __field(size_t, freed_pages) + __field(size_t, old_pages) __field(size_t, available_pages) + __field(size_t, new_pages)), + TP_fast_assign(__entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->freed_pages = freed_pages; __entry->old_pages = old_pages; + __entry->available_pages = available_pages; __entry->new_pages = new_pages;), TP_printk("start=0x%llx freed_pages=%zu old_pages=%zu available_pages=%zu new_pages=%zu", - __entry->start_addr, __entry->freed_pages, __entry->old_pages, - __entry->available_pages, __entry->new_pages) -); + __entry->start_addr, __entry->freed_pages, __entry->old_pages, + __entry->available_pages, __entry->new_pages)); /* trace_mali_jit_trim * * Tracepoint about total trimmed physical pages */ -TRACE_EVENT(mali_jit_trim, - TP_PROTO(size_t freed_pages), - TP_ARGS(freed_pages), - TP_STRUCT__entry( - __field(size_t, freed_pages) - ), - TP_fast_assign( - __entry->freed_pages = freed_pages; - ), - TP_printk("freed_pages=%zu", __entry->freed_pages) -); +TRACE_EVENT(mali_jit_trim, TP_PROTO(size_t freed_pages), TP_ARGS(freed_pages), + TP_STRUCT__entry(__field(size_t, freed_pages)), + TP_fast_assign(__entry->freed_pages = freed_pages;), + TP_printk("freed_pages=%zu", __entry->freed_pages)); #include "debug/mali_kbase_debug_linux_ktrace.h" @@ -540,7 +420,7 @@ TRACE_EVENT(mali_jit_trim, * extend CFLAGS */ #define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE mali_linux_trace /* This part must be outside protection */ diff --git a/drivers/gpu/arm/bifrost/mali_malisw.h b/drivers/gpu/arm/bifrost/mali_malisw.h index d9db189e8684..a44765a05cef 100644 --- a/drivers/gpu/arm/bifrost/mali_malisw.h +++ b/drivers/gpu/arm/bifrost/mali_malisw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,7 +36,7 @@ * As a macro it may evaluate its arguments more than once. * Refer to MAX macro for more details */ -#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) /** * MAX - Return the greater of two values. @@ -50,7 +50,7 @@ * to retrieve the min and max of two values, consider using a conditional swap * instead. */ -#define MAX(x, y) ((x) < (y) ? (y) : (x)) +#define MAX(x, y) ((x) < (y) ? (y) : (x)) /** * CSTD_UNUSED - Function-like macro for suppressing unused variable warnings. @@ -60,7 +60,7 @@ * Where possible such variables should be removed; this macro is present for * cases where we much support API backwards compatibility. */ -#define CSTD_UNUSED(x) ((void)(x)) +#define CSTD_UNUSED(x) ((void)(x)) /** * CSTD_NOP - Function-like macro for use where "no behavior" is desired. @@ -69,7 +69,7 @@ * This is useful when compile time macros turn a function-like macro in to a * no-op, but where having no statement is otherwise invalid. */ -#define CSTD_NOP(...) ((void)#__VA_ARGS__) +#define CSTD_NOP(...) ((void)#__VA_ARGS__) /** * CSTD_STR1 - Function-like macro for stringizing a single level macro. @@ -81,7 +81,7 @@ * > "MY_MACRO" * @endcode */ -#define CSTD_STR1(x) #x +#define CSTD_STR1(x) #x /** * CSTD_STR2 - Function-like macro for stringizing a macro's value. @@ -95,14 +95,14 @@ * > "32" * @endcode */ -#define CSTD_STR2(x) CSTD_STR1(x) +#define CSTD_STR2(x) CSTD_STR1(x) - #ifndef fallthrough - #define fallthrough __fallthrough - #endif /* fallthrough */ +#ifndef fallthrough +#define fallthrough __fallthrough +#endif /* fallthrough */ #ifndef __fallthrough -#define __fallthrough __attribute__((fallthrough)) +#define __fallthrough __attribute__((fallthrough)) #endif /* __fallthrough */ #endif /* _MALISW_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h index f156650a40a3..b30ca0713690 100644 --- a/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h +++ b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,30 +37,22 @@ DECLARE_EVENT_CLASS(gpu, - TP_PROTO(unsigned int state, unsigned int gpu_id), + TP_PROTO(unsigned int state, unsigned int gpu_id), - TP_ARGS(state, gpu_id), + TP_ARGS(state, gpu_id), - TP_STRUCT__entry( - __field(u32, state) - __field(u32, gpu_id) - ), + TP_STRUCT__entry(__field(u32, state) __field(u32, gpu_id)), - TP_fast_assign( - __entry->state = state; - __entry->gpu_id = gpu_id; - ), + TP_fast_assign(__entry->state = state; __entry->gpu_id = gpu_id;), - TP_printk("state=%lu gpu_id=%lu", (unsigned long)__entry->state, - (unsigned long)__entry->gpu_id) -); + TP_printk("state=%lu gpu_id=%lu", (unsigned long)__entry->state, + (unsigned long)__entry->gpu_id)); DEFINE_EVENT(gpu, gpu_frequency, - TP_PROTO(unsigned int frequency, unsigned int gpu_id), + TP_PROTO(unsigned int frequency, unsigned int gpu_id), - TP_ARGS(frequency, gpu_id) -); + TP_ARGS(frequency, gpu_id)); #endif /* _TRACE_POWER_GPU_FREQUENCY_H */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_strings.c b/drivers/gpu/arm/bifrost/mali_power_gpu_work_period_trace.c similarity index 68% rename from drivers/gpu/arm/bifrost/mali_kbase_strings.c rename to drivers/gpu/arm/bifrost/mali_power_gpu_work_period_trace.c index 84784be6f244..8e7bf6fe165c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_strings.c +++ b/drivers/gpu/arm/bifrost/mali_power_gpu_work_period_trace.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,10 +19,10 @@ * */ -#include "mali_kbase_strings.h" - -#define KBASE_DRV_NAME "mali" -#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" - -const char kbase_drv_name[] = KBASE_DRV_NAME; -const char kbase_timeline_name[] = KBASE_TIMELINE_NAME; +/* Create the trace point if not configured in kernel */ +#ifndef CONFIG_TRACE_POWER_GPU_WORK_PERIOD +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#define CREATE_TRACE_POINTS +#include "mali_power_gpu_work_period_trace.h" +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ +#endif diff --git a/drivers/gpu/arm/bifrost/mali_power_gpu_work_period_trace.h b/drivers/gpu/arm/bifrost/mali_power_gpu_work_period_trace.h new file mode 100644 index 000000000000..60d341d4ac7c --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_power_gpu_work_period_trace.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _TRACE_POWER_GPU_WORK_PERIOD_MALI +#define _TRACE_POWER_GPU_WORK_PERIOD_MALI +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_power_gpu_work_period_trace +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#if !defined(_TRACE_POWER_GPU_WORK_PERIOD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWER_GPU_WORK_PERIOD_H + +#include + +/** + * gpu_work_period - Reports GPU work period metrics + * + * @gpu_id: Unique GPU Identifier + * @uid: UID of an application + * @start_time_ns: Start time of a GPU work period in nanoseconds + * @end_time_ns: End time of a GPU work period in nanoseconds + * @total_active_duration_ns: Total amount of time the GPU was running GPU work for given + * UID during the GPU work period, in nanoseconds. This duration does + * not double-account parallel GPU work for the same UID. + */ +TRACE_EVENT( + gpu_work_period, + + TP_PROTO(u32 gpu_id, u32 uid, u64 start_time_ns, u64 end_time_ns, + u64 total_active_duration_ns), + + TP_ARGS(gpu_id, uid, start_time_ns, end_time_ns, total_active_duration_ns), + + TP_STRUCT__entry(__field(u32, gpu_id) __field(u32, uid) __field(u64, start_time_ns) + __field(u64, end_time_ns) __field(u64, total_active_duration_ns)), + + TP_fast_assign(__entry->gpu_id = gpu_id; __entry->uid = uid; + __entry->start_time_ns = start_time_ns; __entry->end_time_ns = end_time_ns; + __entry->total_active_duration_ns = total_active_duration_ns;), + + TP_printk( + "gpu_id=%u uid=%u start_time_ns=%llu end_time_ns=%llu total_active_duration_ns=%llu", + __entry->gpu_id, __entry->uid, __entry->start_time_ns, __entry->end_time_ns, + __entry->total_active_duration_ns)); + +#endif /* _TRACE_POWER_GPU_WORK_PERIOD_H */ + +/* This part must be outside protection */ +#include diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c index 4cac7876f5f7..8d6eb5fb651f 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c @@ -30,30 +30,23 @@ #include #include -void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, - struct kbase_mmu_setup * const setup) +void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup) { /* Set up the required caching policies at the correct indices * in the memattr register. */ setup->memattr = - (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << - (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | - (AS_MEMATTR_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | - (AS_MEMATTR_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | - (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << - (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | - (AS_MEMATTR_AARCH64_OUTER_WA << - (AS_MEMATTR_INDEX_OUTER_WA * 8)) | - (AS_MEMATTR_AARCH64_NON_CACHEABLE << - (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)) | - (AS_MEMATTR_AARCH64_SHARED << - (AS_MEMATTR_INDEX_SHARED * 8)); + (KBASE_MEMATTR_IMPL_DEF_CACHE_POLICY + << (KBASE_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | + (KBASE_MEMATTR_FORCE_TO_CACHE_ALL << (KBASE_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (KBASE_MEMATTR_WRITE_ALLOC << (KBASE_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (KBASE_MEMATTR_AARCH64_OUTER_IMPL_DEF << (KBASE_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (KBASE_MEMATTR_AARCH64_OUTER_WA << (KBASE_MEMATTR_INDEX_OUTER_WA * 8)) | + (KBASE_MEMATTR_AARCH64_NON_CACHEABLE << (KBASE_MEMATTR_INDEX_NON_CACHEABLE * 8)) | + (KBASE_MEMATTR_AARCH64_SHARED << (KBASE_MEMATTR_INDEX_SHARED * 8)); setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; - setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; + setup->transcfg = AS_TRANSCFG_MODE_SET(0, AS_TRANSCFG_MODE_AARCH64_4K); } /** @@ -65,8 +58,7 @@ void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, * * This function submits a work for reporting the details of MMU fault. */ -static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, - struct kbase_fault *fault) +static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, struct kbase_fault *fault) { unsigned long flags; struct kbase_as *const as = &kbdev->as[as_nr]; @@ -78,7 +70,7 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, if (kctx) { kbase_ctx_sched_retain_ctx_refcount(kctx); - as->pf_data = (struct kbase_fault) { + as->pf_data = (struct kbase_fault){ .status = fault->status, .addr = fault->addr, }; @@ -89,8 +81,7 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, * MCU's address space. */ if (!queue_work(as->pf_wq, &as->work_pagefault)) { - dev_dbg(kbdev->dev, - "Page fault is already pending for as %u", as_nr); + dev_dbg(kbdev->dev, "Page fault is already pending for as %u", as_nr); kbase_ctx_sched_release_ctx(kctx); } else { atomic_inc(&kbdev->faults_pending); @@ -99,8 +90,7 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, - struct kbase_fault *fault) +void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, struct kbase_fault *fault) { /* decode the fault status */ u32 exception_type = fault->status & 0xFF; @@ -115,11 +105,9 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, "exception type 0x%X: %s\n" "access type 0x%X: %s\n" "source id 0x%X\n", - fault->addr, - fault->status, - exception_type, kbase_gpu_exception_name(exception_type), - access_type, kbase_gpu_access_type_name(fault->status), - source_id); + fault->addr, fault->status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(fault->status), source_id); kbase_debug_csf_fault_notify(kbdev, NULL, DF_GPU_PAGE_FAULT); @@ -128,26 +116,24 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, submit_work_pagefault(kbdev, as_no, fault); /* GPU reset is required to recover */ - if (kbase_prepare_to_reset_gpu(kbdev, - RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } KBASE_EXPORT_TEST_API(kbase_mmu_report_mcu_as_fault_and_reset); -void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, struct kbase_fault *fault) +void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault) { struct kbase_device *kbdev = kctx->kbdev; u32 const status = fault->status; int exception_type = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> - GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT; + GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT; int access_type = (status & GPU_FAULTSTATUS_ACCESS_TYPE_MASK) >> - GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT; + GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT; int source_id = (status & GPU_FAULTSTATUS_SOURCE_ID_MASK) >> - GPU_FAULTSTATUS_SOURCE_ID_SHIFT; - const char *addr_valid = (status & GPU_FAULTSTATUS_ADDR_VALID_FLAG) ? - "true" : "false"; + GPU_FAULTSTATUS_SOURCE_ID_SHIFT; + const char *addr_valid = (status & GPU_FAULTSTATUS_ADDRESS_VALID_MASK) ? "true" : "false"; int as_no = as->number; unsigned long flags; const uintptr_t fault_addr = fault->addr; @@ -161,13 +147,9 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, "access type 0x%X: %s\n" "source id 0x%X\n" "pid: %d\n", - as_no, (void *)fault_addr, - addr_valid, - status, - exception_type, kbase_gpu_exception_name(exception_type), - access_type, kbase_gpu_access_type_name(access_type), - source_id, - kctx->pid); + as_no, (void *)fault_addr, addr_valid, status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(access_type), source_id, kctx->pid); /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); @@ -187,8 +169,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, /* Now clear the GPU fault */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAR_FAULT); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_CLEAR_FAULT); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -197,9 +178,8 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, * The caller must ensure it's retained the ctx to prevent it from being * scheduled out whilst it's being worked on. */ -void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, const char *reason_str, - struct kbase_fault *fault) +void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as *as, + const char *reason_str, struct kbase_fault *fault) { unsigned long flags; unsigned int exception_type; @@ -230,13 +210,9 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, "access type 0x%X: %s\n" "source id 0x%X\n" "pid: %d\n", - as_no, fault->addr, - reason_str, - status, - exception_type, kbase_gpu_exception_name(exception_type), - access_type, kbase_gpu_access_type_name(status), - source_id, - kctx->pid); + as_no, fault->addr, reason_str, status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(status), source_id, kctx->pid); /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); @@ -247,12 +223,13 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_mmu_disable(kctx); kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); + kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT); + kbase_csf_ctx_report_page_fault_for_active_groups(kctx, fault); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ - kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT); /* Switching to UNMAPPED mode above would have enabled the firmware to * recover from the fault (if the memory access was made by firmware) * and it can then respond to CSG termination requests to be sent now. @@ -262,10 +239,8 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, kbase_csf_ctx_handle_fault(kctx, fault); /* Clear down the fault */ - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_clear_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); } @@ -279,29 +254,33 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, * * This function will process a fault on a specific address space */ -static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_as *as, - struct kbase_fault *fault) +static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_as *as, struct kbase_fault *fault) { lockdep_assert_held(&kbdev->hwaccess_lock); if (!kctx) { - dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", - kbase_as_has_bus_fault(as, fault) ? - "Bus error" : "Page fault", + if (kbase_as_has_bus_fault(as, fault)) { + dev_warn( + kbdev->dev, + "Bus error in AS%d at PA 0x%pK with no context present! Spurious IRQ or SW Design Error?\n", + as->number, (void *)(uintptr_t)fault->addr); + } else { + dev_warn( + kbdev->dev, + "Page fault in AS%d at VA 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", as->number, fault->addr); + } /* Since no ctx was found, the MMU must be disabled. */ WARN_ON(as->current_setup.transtab); if (kbase_as_has_bus_fault(as, fault)) - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAR_FAULT); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_CLEAR_FAULT); else if (kbase_as_has_page_fault(as, fault)) { - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_clear_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); } return; @@ -320,8 +299,7 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, } } -int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, - u32 status, u32 as_nr) +int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, u32 as_nr) { struct kbase_context *kctx; unsigned long flags; @@ -337,10 +315,7 @@ int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, as = &kbdev->as[as_nr]; fault = &as->bf_data; fault->status = status; - fault->addr = (u64) kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; - fault->addr |= kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); + fault->addr = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(GPU_FAULTADDRESS)); fault->protected_mode = false; /* report the fault to debugfs */ @@ -368,9 +343,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* remember current mask */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + new_mask = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK)); /* mask interrupts for now */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), 0); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); while (pf_bits) { @@ -380,11 +355,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) struct kbase_fault *fault = &as->pf_data; /* find faulting address */ - fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_HI)); - fault->addr <<= 32; - fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_LO)); + fault->addr = kbase_reg_read64(kbdev, MMU_AS_OFFSET(as_no, FAULTADDRESS)); /* Mark the fault protected or not */ fault->protected_mode = false; @@ -393,14 +364,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) kbase_as_fault_debugfs_new(kbdev, as_no); /* record the fault status */ - fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTSTATUS)); + fault->status = kbase_reg_read32(kbdev, MMU_AS_OFFSET(as_no, FAULTSTATUS)); - fault->extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); - fault->extra_addr <<= 32; - fault->extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); + if (kbase_reg_is_valid(kbdev, MMU_AS_OFFSET(as_no, FAULTEXTRA))) + fault->extra_addr = + kbase_reg_read64(kbdev, MMU_AS_OFFSET(as_no, FAULTEXTRA)); /* Mark page fault as handled */ pf_bits &= ~(1UL << as_no); @@ -432,15 +400,17 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* reenable interrupts */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + tmp = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK)); new_mask |= tmp; - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), new_mask); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } -int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, - struct kbase_va_region *const reg) +int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, struct kbase_va_region *const reg) { + CSTD_UNUSED(kctx); + CSTD_UNUSED(reg); + /* Can't soft-stop the provoking job */ return -EPERM; } @@ -455,34 +425,30 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, */ static void kbase_mmu_gpu_fault_worker(struct work_struct *data) { - struct kbase_as *const faulting_as = container_of(data, struct kbase_as, - work_gpufault); + struct kbase_as *const faulting_as = container_of(data, struct kbase_as, work_gpufault); const u32 as_nr = faulting_as->number; - struct kbase_device *const kbdev = container_of(faulting_as, struct - kbase_device, as[as_nr]); + struct kbase_device *const kbdev = + container_of(faulting_as, struct kbase_device, as[as_nr]); struct kbase_fault *fault; struct kbase_context *kctx; u32 status; - u64 address; + uintptr_t phys_addr; u32 as_valid; unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); fault = &faulting_as->gf_data; status = fault->status; - as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; - address = fault->addr; + as_valid = status & GPU_FAULTSTATUS_JASID_VALID_MASK; + phys_addr = (uintptr_t)fault->addr; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_warn(kbdev->dev, - "GPU Fault 0x%08x (%s) in AS%u at 0x%016llx\n" + "GPU Fault 0x%08x (%s) in AS%u at PA 0x%pK\n" "ASID_VALID: %s, ADDRESS_VALID: %s\n", - status, - kbase_gpu_exception_name( - GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)), - as_nr, address, - as_valid ? "true" : "false", - status & GPU_FAULTSTATUS_ADDR_VALID_FLAG ? "true" : "false"); + status, kbase_gpu_exception_name(GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)), + as_nr, (void *)phys_addr, as_valid ? "true" : "false", + status & GPU_FAULTSTATUS_ADDRESS_VALID_MASK ? "true" : "false"); kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_nr); kbase_csf_ctx_handle_fault(kctx, fault); @@ -493,8 +459,7 @@ static void kbase_mmu_gpu_fault_worker(struct work_struct *data) * Now clear the GPU fault to allow next GPU fault interrupt report. */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAR_FAULT); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_CLEAR_FAULT); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); atomic_dec(&kbdev->faults_pending); @@ -510,8 +475,7 @@ static void kbase_mmu_gpu_fault_worker(struct work_struct *data) * * This function submits a work for reporting the details of GPU fault. */ -static void submit_work_gpufault(struct kbase_device *kbdev, u32 status, - u32 as_nr, u64 address) +static void submit_work_gpufault(struct kbase_device *kbdev, u32 status, u32 as_nr, u64 address) { unsigned long flags; struct kbase_as *const as = &kbdev->as[as_nr]; @@ -523,7 +487,7 @@ static void submit_work_gpufault(struct kbase_device *kbdev, u32 status, if (kctx) { kbase_ctx_sched_retain_ctx_refcount(kctx); - as->gf_data = (struct kbase_fault) { + as->gf_data = (struct kbase_fault){ .status = status, .addr = address, }; @@ -536,8 +500,8 @@ static void submit_work_gpufault(struct kbase_device *kbdev, u32 status, spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, - u32 as_nr, u64 address, bool as_valid) +void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, u32 as_nr, u64 address, + bool as_valid) { if (!as_valid || (as_nr == MCU_AS_NR)) { int as; @@ -555,12 +519,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_gpu_fault_interrupt); int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) { kbdev->as[i].number = i; - kbdev->as[i].bf_data.addr = 0ULL; - kbdev->as[i].pf_data.addr = 0ULL; - kbdev->as[i].gf_data.addr = 0ULL; - kbdev->as[i].is_unresponsive = false; - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i); + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 0, i); if (!kbdev->as[i].pf_wq) return -ENOMEM; diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c index d716ce0068fd..7cf0ed292fb0 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c @@ -30,32 +30,26 @@ #include #include -void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, - struct kbase_mmu_setup * const setup) +void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup) { /* Set up the required caching policies at the correct indices * in the memattr register. */ setup->memattr = - (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << - (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | - (AS_MEMATTR_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | - (AS_MEMATTR_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | - (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << - (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | - (AS_MEMATTR_AARCH64_OUTER_WA << - (AS_MEMATTR_INDEX_OUTER_WA * 8)) | - (AS_MEMATTR_AARCH64_NON_CACHEABLE << - (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)); + (KBASE_MEMATTR_IMPL_DEF_CACHE_POLICY + << (KBASE_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | + (KBASE_MEMATTR_FORCE_TO_CACHE_ALL << (KBASE_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (KBASE_MEMATTR_WRITE_ALLOC << (KBASE_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (KBASE_MEMATTR_AARCH64_OUTER_IMPL_DEF << (KBASE_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (KBASE_MEMATTR_AARCH64_OUTER_WA << (KBASE_MEMATTR_INDEX_OUTER_WA * 8)) | + (KBASE_MEMATTR_AARCH64_NON_CACHEABLE << (KBASE_MEMATTR_INDEX_NON_CACHEABLE * 8)); setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; - setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; + setup->transcfg = AS_TRANSCFG_MODE_SET(0, AS_TRANSCFG_MODE_AARCH64_4K); } -void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, struct kbase_fault *fault) +void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault) { struct kbase_device *const kbdev = kctx->kbdev; u32 const status = fault->status; @@ -72,11 +66,8 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, "exception type 0x%X: %s\n" "exception data 0x%X\n" "pid: %d\n", - as_no, (void *)fault_addr, - status, - exception_type, kbase_gpu_exception_name(exception_type), - exception_data, - kctx->pid); + as_no, (void *)fault_addr, status, exception_type, + kbase_gpu_exception_name(exception_type), exception_data, kctx->pid); /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter * dumping AS transaction begin @@ -91,10 +82,8 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_clear_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); } @@ -102,9 +91,8 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, * The caller must ensure it's retained the ctx to prevent it from being * scheduled out whilst it's being worked on. */ -void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, const char *reason_str, - struct kbase_fault *fault) +void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as *as, + const char *reason_str, struct kbase_fault *fault) { unsigned long flags; u32 exception_type; @@ -136,22 +124,16 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, "access type 0x%X: %s\n" "source id 0x%X\n" "pid: %d\n", - as_no, fault->addr, - reason_str, - fault->status, - exception_type, kbase_gpu_exception_name(exception_type), - access_type, kbase_gpu_access_type_name(fault->status), - source_id, - kctx->pid); + as_no, fault->addr, reason_str, fault->status, exception_type, + kbase_gpu_exception_name(exception_type), access_type, + kbase_gpu_access_type_name(fault->status), source_id, kctx->pid); /* hardware counters dump fault handling */ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && - (kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_DUMPING)) { + (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING)) { if ((fault->addr >= kbdev->hwcnt.addr) && - (fault->addr < (kbdev->hwcnt.addr + - kbdev->hwcnt.addr_bytes))) + (fault->addr < (kbdev->hwcnt.addr + kbdev->hwcnt.addr_bytes))) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); @@ -183,10 +165,8 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, /* AS transaction end */ /* Clear down the fault */ - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_clear_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); } @@ -200,37 +180,36 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, * * This function will process a fault on a specific address space */ -static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_as *as, - struct kbase_fault *fault) +static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_as *as, struct kbase_fault *fault) { unsigned long flags; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, - "Entering %s kctx %pK, as %pK\n", - __func__, (void *)kctx, (void *)as); + dev_dbg(kbdev->dev, "Entering %s kctx %pK, as %pK\n", __func__, (void *)kctx, (void *)as); if (!kctx) { - dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", - kbase_as_has_bus_fault(as, fault) ? - "Bus error" : "Page fault", + if (kbase_as_has_bus_fault(as, fault)) { + dev_warn( + kbdev->dev, + "Bus error in AS%d at PA 0x%pK with no context present! Spurious IRQ or SW Design Error?\n", + as->number, (void *)(uintptr_t)fault->addr); + } else { + dev_warn( + kbdev->dev, + "Page fault in AS%d at VA 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", as->number, fault->addr); - + } /* Since no ctx was found, the MMU must be disabled. */ WARN_ON(as->current_setup.transtab); if (kbase_as_has_bus_fault(as, fault)) { - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_clear_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); } else if (kbase_as_has_page_fault(as, fault)) { - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_clear_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); } return; @@ -245,8 +224,7 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, */ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if ((kbdev->hwcnt.kctx == kctx) && - (kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_DUMPING)) + (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING)) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); @@ -258,10 +236,8 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, */ kbasep_js_clear_submit_allowed(js_devdata, kctx); - dev_warn(kbdev->dev, - "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", - as->number, fault->addr, - fault->extra_addr); + dev_warn(kbdev->dev, "Bus error in AS%d at PA=0x%pK, IPA=0x%pK\n", as->number, + (void *)(uintptr_t)fault->addr, (void *)(uintptr_t)fault->extra_addr); /* * We need to switch to UNMAPPED mode - but we do this in a @@ -274,9 +250,7 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, atomic_inc(&kbdev->faults_pending); } - dev_dbg(kbdev->dev, - "Leaving %s kctx %pK, as %pK\n", - __func__, (void *)kctx, (void *)as); + dev_dbg(kbdev->dev, "Leaving %s kctx %pK, as %pK\n", __func__, (void *)kctx, (void *)as); } static void validate_protected_page_fault(struct kbase_device *kbdev) @@ -288,8 +262,8 @@ static void validate_protected_page_fault(struct kbase_device *kbdev) u32 protected_debug_mode = 0; if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { - protected_debug_mode = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN; + protected_debug_mode = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & + GPU_STATUS_GPU_DBG_ENABLED; } if (!protected_debug_mode) { @@ -310,8 +284,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) u32 new_mask; u32 tmp, bf_bits, pf_bits; - dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n", - __func__, irq_stat); + dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n", __func__, irq_stat); /* bus faults */ bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; /* page faults (note: Ignore ASes with both pf and bf) */ @@ -322,9 +295,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* remember current mask */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + new_mask = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK)); /* mask interrupts for now */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), 0); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); while (bf_bits | pf_bits) { @@ -355,11 +328,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); /* find faulting address */ - fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_HI)); - fault->addr <<= 32; - fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_LO)); + fault->addr = kbase_reg_read64(kbdev, MMU_AS_OFFSET(as_no, FAULTADDRESS)); /* Mark the fault protected or not */ fault->protected_mode = kbdev->protected_mode; @@ -372,13 +341,8 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) kbase_as_fault_debugfs_new(kbdev, as_no); /* record the fault status */ - fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTSTATUS)); - fault->extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); - fault->extra_addr <<= 32; - fault->extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); + fault->status = kbase_reg_read32(kbdev, MMU_AS_OFFSET(as_no, FAULTSTATUS)); + fault->extra_addr = kbase_reg_read64(kbdev, MMU_AS_OFFSET(as_no, FAULTEXTRA)); if (kbase_as_has_bus_fault(as, fault)) { /* Mark bus fault as handled. @@ -388,8 +352,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) bf_bits &= ~(1UL << as_no); /* remove the queued BF (and PF) from the mask */ - new_mask &= ~(MMU_BUS_ERROR(as_no) | - MMU_PAGE_FAULT(as_no)); + new_mask &= ~(MMU_BUS_ERROR(as_no) | MMU_PAGE_FAULT(as_no)); } else { /* Mark page fault as handled */ pf_bits &= ~(1UL << as_no); @@ -406,20 +369,17 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* reenable interrupts */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + tmp = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK)); new_mask |= tmp; - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), new_mask); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); - dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", - __func__, irq_stat); + dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", __func__, irq_stat); } -int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, - struct kbase_va_region *const reg) +int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, struct kbase_va_region *const reg) { - dev_dbg(kctx->kbdev->dev, - "Switching to incremental rendering for region %pK\n", + dev_dbg(kctx->kbdev->dev, "Switching to incremental rendering for region %pK\n", (void *)reg); return kbase_job_slot_softstop_start_rp(kctx, reg); } @@ -429,9 +389,8 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) kbdev->as[i].number = i; kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; - kbdev->as[i].is_unresponsive = false; - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i); + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 0, i); if (!kbdev->as[i].pf_wq) return -ENOMEM; diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c index d6d3fcdee6e7..f3095f3b1f2a 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -46,6 +46,7 @@ #if !MALI_USE_CSF #include #endif +#include #include #include @@ -55,11 +56,15 @@ #define MGM_DEFAULT_PTE_GROUP (0) /* Macro to convert updated PDGs to flags indicating levels skip in flush */ -#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF) +#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds)&0xF) + +static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + const u64 start_vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id, u64 *dirty_pgds, + struct kbase_va_region *reg, bool ignore_page_migration); /* Small wrapper function to factor out GPU-dependent context releasing */ -static void release_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) +static void release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { #if MALI_USE_CSF CSTD_UNUSED(kbdev); @@ -83,7 +88,11 @@ static void mmu_hw_operation_begin(struct kbase_device *kbdev) kbdev->mmu_hw_operation_in_progress = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } +#else + CSTD_UNUSED(kbdev); #endif /* MALI_USE_CSF */ +#else + CSTD_UNUSED(kbdev); #endif /* !CONFIG_MALI_BIFROST_NO_MALI */ } @@ -105,7 +114,11 @@ static void mmu_hw_operation_end(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } +#else + CSTD_UNUSED(kbdev); #endif /* MALI_USE_CSF */ +#else + CSTD_UNUSED(kbdev); #endif /* !CONFIG_MALI_BIFROST_NO_MALI */ } @@ -122,11 +135,7 @@ static void mmu_hw_operation_end(struct kbase_device *kbdev) */ static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev) { - uint32_t const arch_maj_cur = (kbdev->gpu_props.props.raw_props.gpu_id & - GPU_ID2_ARCH_MAJOR) >> - GPU_ID2_ARCH_MAJOR_SHIFT; - - return arch_maj_cur > 11; + return kbdev->gpu_props.gpu_id.arch_major > 11; } /** @@ -180,7 +189,7 @@ static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kct spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { + if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) { as_nr = kctx ? kctx->as_nr : as_nr; if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param)) dev_err(kbdev->dev, @@ -201,7 +210,7 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param))) + if (kbdev->pm.backend.gpu_ready && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param))) dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -275,7 +284,8 @@ static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_contex * interface. */ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx, - int as_nr, const struct kbase_mmu_hw_op_param *op_param) + int as_nr, + const struct kbase_mmu_hw_op_param *op_param) { unsigned long flags; @@ -283,7 +293,7 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { + if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) { as_nr = kctx ? kctx->as_nr : as_nr; if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param)) dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); @@ -294,8 +304,7 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct } static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx, - phys_addr_t phys, size_t size, - enum kbase_mmu_op_type flush_op) + phys_addr_t phys, size_t size, enum kbase_mmu_op_type flush_op) { kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op); } @@ -306,8 +315,7 @@ static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle * the pages from memory */ if (kbdev->system_coherency == COHERENCY_NONE) - dma_sync_single_for_device(kbdev->dev, handle, size, - DMA_TO_DEVICE); + dma_sync_single_for_device(kbdev->dev, handle, size, DMA_TO_DEVICE); } /** @@ -333,7 +341,6 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context phys_addr_t phys, dma_addr_t handle, size_t size, enum kbase_mmu_op_type flush_op) { - kbase_mmu_sync_pgd_cpu(kbdev, handle, size); kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op); } @@ -384,31 +391,31 @@ static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase } static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - struct page *p) + struct kbase_mmu_table *mmut, struct page *p) { struct kbase_page_metadata *page_md = kbase_page_private(p); bool page_is_isolated = false; lockdep_assert_held(&mmut->mmu_lock); - if (!kbase_page_migration_enabled) + if (!kbase_is_page_migration_enabled()) return false; spin_lock(&page_md->migrate_lock); if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) { WARN_ON_ONCE(!mmut->kctx); if (IS_PAGE_ISOLATED(page_md->status)) { - page_md->status = PAGE_STATUS_SET(page_md->status, - FREE_PT_ISOLATED_IN_PROGRESS); + page_md->status = + PAGE_STATUS_SET(page_md->status, FREE_PT_ISOLATED_IN_PROGRESS); page_md->data.free_pt_isolated.kbdev = kbdev; page_is_isolated = true; } else { - page_md->status = - PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS); + page_md->status = PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS); } + } else if ((PAGE_STATUS_GET(page_md->status) == FREE_IN_PROGRESS) || + (PAGE_STATUS_GET(page_md->status) == ALLOCATE_IN_PROGRESS)) { + /* Nothing to do - fall through */ } else { - WARN_ON_ONCE(mmut->kctx); WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE); } spin_unlock(&page_md->migrate_lock); @@ -434,7 +441,7 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, * @pgd: Physical address of page directory to be freed. * * This function is supposed to be called with mmu_lock held and after - * ensuring that GPU won't be able to access the page. + * ensuring that the GPU won't be able to access the page. */ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t pgd) @@ -512,8 +519,8 @@ static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut) * * Return: the number of backed pages to increase by */ -static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, - struct kbase_va_region *reg, size_t fault_rel_pfn) +static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, struct kbase_va_region *reg, + size_t fault_rel_pfn) { size_t multiple = reg->extension; size_t reg_current_size = kbase_reg_current_backed_size(reg); @@ -561,8 +568,8 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, /* same as calculating * (fault_rel_pfn - initial_commit + 1) */ - size_t pages_after_initial = minimum_extra + - reg_current_size - initial_commit; + size_t pages_after_initial = + minimum_extra + reg_current_size - initial_commit; remainder = pages_after_initial % multiple; } @@ -577,9 +584,8 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, #ifdef CONFIG_MALI_CINSTR_GWT static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, - struct kbase_as *faulting_as, - u64 start_pfn, size_t nr, - u32 kctx_id, u64 dirty_pgds) + struct kbase_as *faulting_as, u64 start_pfn, + size_t nr, u32 kctx_id, u64 dirty_pgds) { /* Calls to this function are inherently synchronous, with respect to * MMU operations. @@ -590,8 +596,7 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, mutex_lock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); /* flush L2 and unlock the VA (resumes the MMU) */ op_param.vpfn = start_pfn; @@ -603,8 +608,7 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, unsigned long irq_flags; spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - op_param.flush_skip_levels = - pgd_level_to_skip_flush(dirty_pgds); + op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param); spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); } else { @@ -619,13 +623,11 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, dev_err(kbdev->dev, "Flush for GPU page fault due to write access did not complete"); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); } -static void set_gwt_element_page_addr_and_size( - struct kbasep_gwt_list_element *element, - u64 fault_page_addr, struct tagged_addr fault_phys) +static void set_gwt_element_page_addr_and_size(struct kbasep_gwt_list_element *element, + u64 fault_page_addr, struct tagged_addr fault_phys) { u64 fault_pfn = fault_page_addr >> PAGE_SHIFT; unsigned int vindex = fault_pfn & (NUM_4K_PAGES_IN_2MB_PAGE - 1); @@ -643,7 +645,7 @@ static void set_gwt_element_page_addr_and_size( } static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, - struct kbase_as *faulting_as) + struct kbase_as *faulting_as) { struct kbasep_gwt_list_element *pos; struct kbase_va_region *region; @@ -662,21 +664,20 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, kbase_gpu_vm_lock(kctx); /* Find region and check if it should be writable. */ - region = kbase_region_tracker_find_region_enclosing_address(kctx, - fault->addr); + region = kbase_region_tracker_find_region_enclosing_address(kctx, fault->addr); if (kbase_is_region_invalid_or_free(region)) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory is not mapped on the GPU", - &faulting_as->pf_data); + "Memory is not mapped on the GPU", + &faulting_as->pf_data); return; } if (!(region->flags & KBASE_REG_GPU_WR)) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Region does not have write permissions", - &faulting_as->pf_data); + "Region does not have write permissions", + &faulting_as->pf_data); return; } @@ -701,8 +702,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, pos = kmalloc(sizeof(*pos), GFP_KERNEL); if (pos) { pos->region = region; - set_gwt_element_page_addr_and_size(pos, - fault_page_addr, *fault_phys_addr); + set_gwt_element_page_addr_and_size(pos, fault_page_addr, + *fault_phys_addr); list_add(&pos->link, &kctx->gwt_current_list); } else { dev_warn(kbdev->dev, "kmalloc failure"); @@ -714,14 +715,14 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1, region->flags, region->gpu_alloc->group_id, &dirty_pgds); - kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, - kctx->id, dirty_pgds); + kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, kctx->id, + dirty_pgds); kbase_gpu_vm_unlock(kctx); } static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, - struct kbase_as *faulting_as) + struct kbase_as *faulting_as) { struct kbase_fault *fault = &faulting_as->pf_data; @@ -730,17 +731,16 @@ static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: kbase_gpu_mmu_handle_write_fault(kctx, faulting_as); break; - case AS_FAULTSTATUS_ACCESS_TYPE_EX: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Execute Permission fault", fault); + case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Execute Permission fault", + fault); break; case AS_FAULTSTATUS_ACCESS_TYPE_READ: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Read Permission fault", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Read Permission fault", fault); break; default: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown Permission fault", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Unknown Permission fault", + fault); break; } } @@ -811,10 +811,9 @@ static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const si * * Return: true if successful, false on failure */ -static bool page_fault_try_alloc(struct kbase_context *kctx, - struct kbase_va_region *region, size_t new_pages, - int *pages_to_grow, bool *grow_2mb_pool, - struct kbase_sub_alloc **prealloc_sas) +static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_region *region, + size_t new_pages, int *pages_to_grow, bool *grow_2mb_pool, + struct kbase_sub_alloc **prealloc_sas) { size_t total_gpu_pages_alloced = 0; size_t total_cpu_pages_alloced = 0; @@ -826,8 +825,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, lockdep_assert_held(&kctx->reg_lock); lockdep_assert_held(&kctx->mem_partials_lock); - if (WARN_ON(region->gpu_alloc->group_id >= - MEMORY_GROUP_MANAGER_NR_GROUPS)) { + if (WARN_ON(region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { /* Do not try to grow the memory pool */ *pages_to_grow = 0; return false; @@ -879,10 +877,9 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1; if (pages_to_alloc_4k) { - struct tagged_addr *gpu_pages = - kbase_alloc_phy_pages_helper_locked(region->gpu_alloc, pool, - pages_to_alloc_4k_per_alloc, - &prealloc_sas[0]); + struct tagged_addr *gpu_pages = kbase_alloc_phy_pages_helper_locked( + region->gpu_alloc, pool, pages_to_alloc_4k_per_alloc, + &prealloc_sas[0]); if (!gpu_pages) alloc_failed = true; @@ -1030,60 +1027,122 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) #endif if (unlikely(fault->protected_mode)) { - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Protected mode fault", fault); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Protected mode fault", fault); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); goto fault_done; } fault_status = fault->status; - switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { - - case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT: + switch (AS_FAULTSTATUS_EXCEPTION_TYPE_GET(fault_status)) { + case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_1: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_2: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_3: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_4: +#if !MALI_USE_CSF + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_IDENTITY: +#endif /* need to check against the region to handle this one */ break; - case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT: + case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_0: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_1: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_2: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_3: #ifdef CONFIG_MALI_CINSTR_GWT /* If GWT was ever enabled then we need to handle * write fault pages even if the feature was disabled later. */ if (kctx->gwt_was_enabled) { - kbase_gpu_mmu_handle_permission_fault(kctx, - faulting_as); + kbase_gpu_mmu_handle_permission_fault(kctx, faulting_as); goto fault_done; } #endif - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Permission failure", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Permission failure", fault); goto fault_done; - case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Translation table bus fault", fault); +#if !MALI_USE_CSF + case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_0: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_1: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_2: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_3: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Translation table bus fault", + fault); goto fault_done; +#endif - case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: +#if !MALI_USE_CSF + case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_0: + fallthrough; +#endif + case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_1: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_2: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_3: /* nothing to do, but we don't expect this fault currently */ dev_warn(kbdev->dev, "Access flag unexpectedly set"); goto fault_done; - case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Address size fault", fault); +#if MALI_USE_CSF + case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN: + fallthrough; +#else + case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN0: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN1: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN2: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN3: + fallthrough; +#endif + case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT0: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT1: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT2: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT3: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Address size fault", fault); goto fault_done; - case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory attributes fault", fault); + case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3: +#if !MALI_USE_CSF + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_0: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_1: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_2: + fallthrough; + case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_3: +#endif + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Memory attributes fault", + fault); goto fault_done; default: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Unknown fault code", fault); goto fault_done; } @@ -1110,46 +1169,42 @@ page_fault_retry: */ kbase_gpu_vm_lock(kctx); - region = kbase_region_tracker_find_region_enclosing_address(kctx, - fault->addr); + region = kbase_region_tracker_find_region_enclosing_address(kctx, fault->addr); if (kbase_is_region_invalid_or_free(region)) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory is not mapped on the GPU", fault); + "Memory is not mapped on the GPU", fault); goto fault_done; } if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "DMA-BUF is not mapped on the GPU", fault); + "DMA-BUF is not mapped on the GPU", fault); goto fault_done; } if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) { kbase_gpu_vm_unlock(kctx); - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Bad physical memory group ID", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Bad physical memory group ID", + fault); goto fault_done; } - if ((region->flags & GROWABLE_FLAGS_REQUIRED) - != GROWABLE_FLAGS_REQUIRED) { + if ((region->flags & GROWABLE_FLAGS_REQUIRED) != GROWABLE_FLAGS_REQUIRED) { kbase_gpu_vm_unlock(kctx); - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory is not growable", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Memory is not growable", fault); goto fault_done; } if ((region->flags & KBASE_REG_DONT_NEED)) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Don't need memory can't be grown", fault); + "Don't need memory can't be grown", fault); goto fault_done; } - if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) == - AS_FAULTSTATUS_ACCESS_TYPE_READ) + if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) == AS_FAULTSTATUS_ACCESS_TYPE_READ) dev_warn(kbdev->dev, "Grow on pagefault while reading"); /* find the size we need to grow it by @@ -1165,15 +1220,12 @@ page_fault_retry: struct kbase_mmu_hw_op_param op_param; dev_dbg(kbdev->dev, - "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", - fault->addr, region->start_pfn, - region->start_pfn + - current_backed_size); + "Page fault @ VA 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", + fault->addr, region->start_pfn, region->start_pfn + current_backed_size); mutex_lock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); /* [1] in case another page fault occurred while we were * handling the (duplicate) page fault we need to ensure we * don't loose the other page fault as result of us clearing @@ -1186,8 +1238,7 @@ page_fault_retry: op_param.kctx_id = kctx->id; if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { mmu_hw_operation_begin(kbdev); - err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, - &op_param); + err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, &op_param); mmu_hw_operation_end(kbdev); } else { /* Can safely skip the invalidate for all levels in case @@ -1196,20 +1247,18 @@ page_fault_retry: op_param.flush_skip_levels = 0xF; op_param.vpfn = fault_pfn; op_param.nr = 1; - err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, - &op_param); + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, &op_param); } if (err) { dev_err(kbdev->dev, - "Invalidation for MMU did not complete on handling page fault @ 0x%llx", + "Invalidation for MMU did not complete on handling page fault @ VA 0x%llx", fault->addr); } mutex_unlock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); kbase_gpu_vm_unlock(kctx); goto fault_done; @@ -1227,16 +1276,14 @@ page_fault_retry: mutex_lock(&kbdev->mmu_hw_mutex); /* Duplicate of a fault we've already handled, nothing to do */ - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); /* See comment [1] about UNLOCK usage */ op_param.mmu_sync_info = mmu_sync_info; op_param.kctx_id = kctx->id; if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { mmu_hw_operation_begin(kbdev); - err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, - &op_param); + err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, &op_param); mmu_hw_operation_end(kbdev); } else { /* Can safely skip the invalidate for all levels in case @@ -1245,20 +1292,18 @@ page_fault_retry: op_param.flush_skip_levels = 0xF; op_param.vpfn = fault_pfn; op_param.nr = 1; - err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, - &op_param); + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, &op_param); } if (err) { dev_err(kbdev->dev, - "Invalidation for MMU did not complete on handling page fault @ 0x%llx", + "Invalidation for MMU did not complete on handling page fault @ VA 0x%llx", fault->addr); } mutex_unlock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); kbase_gpu_vm_unlock(kctx); goto fault_done; } @@ -1273,8 +1318,8 @@ page_fault_retry: #endif spin_lock(&kctx->mem_partials_lock); - grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, - &grow_2mb_pool, prealloc_sas); + grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, &grow_2mb_pool, + prealloc_sas); spin_unlock(&kctx->mem_partials_lock); if (grown) { @@ -1283,8 +1328,7 @@ page_fault_retry: struct kbase_mmu_hw_op_param op_param; /* alloc success */ - WARN_ON(kbase_reg_current_backed_size(region) > - region->nr_pages); + WARN_ON(kbase_reg_current_backed_size(region) > region->nr_pages); /* set up the new pages */ pfn_offset = kbase_reg_current_backed_size(region) - new_pages; @@ -1296,35 +1340,35 @@ page_fault_retry: * so the no_flush version of insert_pages is used which allows * us to unlock the MMU as we see fit. */ - err = kbase_mmu_insert_pages_no_flush( - kbdev, &kctx->mmu, region->start_pfn + pfn_offset, - &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags, - region->gpu_alloc->group_id, &dirty_pgds, region, false); + err = mmu_insert_pages_no_flush(kbdev, &kctx->mmu, region->start_pfn + pfn_offset, + &kbase_get_gpu_phy_pages(region)[pfn_offset], + new_pages, region->flags, + region->gpu_alloc->group_id, &dirty_pgds, region, + false); if (err) { - kbase_free_phy_pages_helper(region->gpu_alloc, - new_pages); + kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); if (region->gpu_alloc != region->cpu_alloc) - kbase_free_phy_pages_helper(region->cpu_alloc, - new_pages); + kbase_free_phy_pages_helper(region->cpu_alloc, new_pages); kbase_gpu_vm_unlock(kctx); /* The locked VA region will be unlocked and the cache * invalidated in here */ kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Page table update failure", fault); + "Page table update failure", fault); goto fault_done; } - KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, - (u64)new_pages); - trace_mali_mmu_page_fault_grow(region, fault, new_pages); + KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, (u64)new_pages); + if (kbase_reg_is_valid(kbdev, MMU_AS_OFFSET(as_no, FAULTEXTRA))) + trace_mali_mmu_page_fault_extra_grow(region, fault, new_pages); + else + trace_mali_mmu_page_fault_grow(region, fault, new_pages); #if MALI_INCREMENTAL_RENDERING_JM /* Switch to incremental rendering if we have nearly run out of * memory in a JIT memory allocation. */ if (region->threshold_pages && - kbase_reg_current_backed_size(region) > - region->threshold_pages) { + kbase_reg_current_backed_size(region) > region->threshold_pages) { dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu", new_pages + current_backed_size, region->threshold_pages); @@ -1346,8 +1390,7 @@ page_fault_retry: * this stage a new IRQ might not be raised when the GPU finds * a MMU IRQ is already pending. */ - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); op_param.vpfn = region->start_pfn + pfn_offset; op_param.nr = new_pages; @@ -1356,21 +1399,18 @@ page_fault_retry: op_param.mmu_sync_info = mmu_sync_info; if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { /* Unlock to invalidate the TLB (and resume the MMU) */ - op_param.flush_skip_levels = - pgd_level_to_skip_flush(dirty_pgds); - err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, - &op_param); + op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, &op_param); } else { /* flush L2 and unlock the VA (resumes the MMU) */ mmu_hw_operation_begin(kbdev); - err = kbase_mmu_hw_do_flush(kbdev, faulting_as, - &op_param); + err = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); mmu_hw_operation_end(kbdev); } if (err) { dev_err(kbdev->dev, - "Flush for GPU page table update did not complete on handling page fault @ 0x%llx", + "Flush for GPU page table update did not complete on handling page fault @ VA 0x%llx", fault->addr); } @@ -1378,8 +1418,7 @@ page_fault_retry: /* AS transaction end */ /* reenable this in the mask */ - kbase_mmu_hw_enable_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); #ifdef CONFIG_MALI_CINSTR_GWT if (kctx->gwt_enabled) { @@ -1389,12 +1428,9 @@ page_fault_retry: pos = kmalloc(sizeof(*pos), GFP_KERNEL); if (pos) { pos->region = region; - pos->page_addr = (region->start_pfn + - pfn_offset) << - PAGE_SHIFT; + pos->page_addr = (region->start_pfn + pfn_offset) << PAGE_SHIFT; pos->num_pages = new_pages; - list_add(&pos->link, - &kctx->gwt_current_list); + list_add(&pos->link, &kctx->gwt_current_list); } else { dev_warn(kbdev->dev, "kmalloc failure"); } @@ -1410,6 +1446,7 @@ page_fault_retry: kbase_gpu_vm_unlock(kctx); } else { int ret = -ENOMEM; + const u8 group_id = region->gpu_alloc->group_id; kbase_gpu_vm_unlock(kctx); @@ -1420,28 +1457,23 @@ page_fault_retry: if (kbdev->pagesize_2mb && grow_2mb_pool) { /* Round page requirement up to nearest 2 MB */ struct kbase_mem_pool *const lp_mem_pool = - &kctx->mem_pools.large[ - region->gpu_alloc->group_id]; + &kctx->mem_pools.large[group_id]; - pages_to_grow = (pages_to_grow + - ((1 << lp_mem_pool->order) - 1)) - >> lp_mem_pool->order; + pages_to_grow = (pages_to_grow + ((1 << lp_mem_pool->order) - 1)) >> + lp_mem_pool->order; - ret = kbase_mem_pool_grow(lp_mem_pool, - pages_to_grow, kctx->task); + ret = kbase_mem_pool_grow(lp_mem_pool, pages_to_grow, kctx->task); } else { struct kbase_mem_pool *const mem_pool = - &kctx->mem_pools.small[ - region->gpu_alloc->group_id]; + &kctx->mem_pools.small[group_id]; - ret = kbase_mem_pool_grow(mem_pool, - pages_to_grow, kctx->task); + ret = kbase_mem_pool_grow(mem_pool, pages_to_grow, kctx->task); } } if (ret < 0) { /* failed to extend, handle as a normal PF */ kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Page allocation failure", fault); + "Page allocation failure", fault); } else { dev_dbg(kbdev->dev, "Try again after pool_grow"); goto page_fault_retry; @@ -1473,8 +1505,7 @@ fault_done: dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data); } -static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut) +static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { u64 *page; struct page *p; @@ -1482,9 +1513,10 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); if (!p) - return KBASE_MMU_INVALID_PGD_ADDRESS; + return KBASE_INVALID_PHYSICAL_ADDRESS; + + page = kbase_kmap(p); - page = kmap(p); if (page == NULL) goto alloc_free; @@ -1497,12 +1529,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, if (mmut->kctx) { int new_page_count; - new_page_count = atomic_add_return(1, - &mmut->kctx->used_pages); - KBASE_TLSTREAM_AUX_PAGESALLOC( - kbdev, - mmut->kctx->id, - (u64)new_page_count); + new_page_count = atomic_add_return(1, &mmut->kctx->used_pages); + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, (u64)new_page_count); kbase_process_page_usage_inc(mmut->kctx, 1); } @@ -1517,13 +1545,13 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, */ kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE); - kunmap(p); + kbase_kunmap(p, page); return pgd; alloc_free: kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false); - return KBASE_MMU_INVALID_PGD_ADDRESS; + return KBASE_INVALID_PHYSICAL_ADDRESS; } /** @@ -1557,7 +1585,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * vpfn &= 0x1FF; p = pfn_to_page(PFN_DOWN(*pgd)); - page = kmap(p); + page = kbase_kmap(p); if (page == NULL) { dev_err(kbdev->dev, "%s: kmap failure", __func__); return -EINVAL; @@ -1566,7 +1594,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) { dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level, vpfn); - kunmap(p); + kbase_kunmap(p, page); return -EFAULT; } else { target_pgd = kbdev->mmu_mode->pte_to_phy_addr( @@ -1574,7 +1602,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn])); } - kunmap(p); + kbase_kunmap(p, page); *pgd = target_pgd; return 0; @@ -1700,14 +1728,13 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, count = left; /* need to check if this is a 2MB page or a 4kB */ - for (level = MIDGARD_MMU_TOPLEVEL; - level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { + for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; pgds[level] = pgd; - page = kmap(p); + page = kbase_kmap(p); if (mmu_mode->ate_is_valid(page[idx], level)) break; /* keep the mapping */ - kunmap(p); + kbase_kunmap(p, page); pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx])); p = phys_to_page(pgd); @@ -1740,7 +1767,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, mmu_mode->entries_invalidate(&page[idx], pcount); if (!num_of_valid_entries) { - kunmap(p); + kbase_kunmap(p, page); kbase_mmu_add_to_free_pgds_list(mmut, p); @@ -1758,7 +1785,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)), kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount, KBASE_MMU_OP_NONE); - kunmap(p); + kbase_kunmap(p, page); next: vpfn += count; } @@ -1768,8 +1795,8 @@ next: * going to happen to these pages at this stage. They might return * movable once they are returned to a memory pool. */ - if (kbase_page_migration_enabled && !ignore_page_migration && phys) { - const u64 num_pages = to_vpfn - from_vpfn + 1; + if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) { + const u64 num_pages = to_vpfn - from_vpfn; u64 i; for (i = 0; i < num_pages; i++) { @@ -1835,7 +1862,6 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, * The bottom PGD level. * @insert_level: The level of MMU page table where the chain of newly allocated * PGDs needs to be linked-in/inserted. - * The top-most PDG level to be updated. * @insert_vpfn: The virtual page frame number for the ATE. * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains * the physical addresses of newly allocated PGDs from index @@ -1843,7 +1869,7 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, * insert_level. * * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD - * at insert_level which already exists in the MMU Page Tables.Migration status is also + * at insert_level which already exists in the MMU Page Tables. Migration status is also * updated for all the newly allocated PGD pages. * * Return: @@ -1872,12 +1898,13 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd)); u64 *parent_page_va; - if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) { + if (WARN_ON_ONCE(target_pgd == KBASE_INVALID_PHYSICAL_ADDRESS)) { err = -EFAULT; goto failure_recovery; } - parent_page_va = kmap(parent_page); + parent_page_va = kbase_kmap(parent_page); + if (unlikely(parent_page_va == NULL)) { dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -EINVAL; @@ -1890,7 +1917,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte); kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1); - kunmap(parent_page); + kbase_kunmap(parent_page, parent_page_va); if (parent_index != insert_level) { /* Newly allocated PGDs */ @@ -1909,7 +1936,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table } /* Update the new target_pgd page to its stable state */ - if (kbase_page_migration_enabled) { + if (kbase_is_page_migration_enabled()) { struct kbase_page_metadata *page_md = kbase_page_private(phys_to_page(target_pgd)); @@ -1938,11 +1965,11 @@ failure_recovery: for (; pgd_index < cur_level; pgd_index++) { phys_addr_t pgd = pgds_to_insert[pgd_index]; struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd)); - u64 *pgd_page_va = kmap(pgd_page); + u64 *pgd_page_va = kbase_kmap(pgd_page); u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF; kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1); - kunmap(pgd_page); + kbase_kunmap(pgd_page, pgd_page_va); } return err; @@ -1978,7 +2005,7 @@ static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_ta for (i = level_low; i <= level_high; i++) { do { new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut); - if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS) + if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS) break; mutex_unlock(&mmut->mmu_lock); @@ -1993,7 +2020,7 @@ static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_ta * from (i-1) to level_low */ for (i = (i - 1); i >= level_low; i--) { - if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS) + if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS) kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]); } @@ -2005,10 +2032,11 @@ static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_ta return 0; } -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, - struct tagged_addr phys, size_t nr, unsigned long flags, - int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, - bool ignore_page_migration) +static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + bool ignore_page_migration) { phys_addr_t pgd; u64 *pgd_page; @@ -2038,7 +2066,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, /* If page migration is enabled, pages involved in multiple GPU mappings * are always treated as not movable. */ - if (kbase_page_migration_enabled && !ignore_page_migration) { + if (kbase_is_page_migration_enabled() && !ignore_page_migration) { struct page *phys_page = as_page(phys); struct kbase_page_metadata *page_md = kbase_page_private(phys_page); @@ -2103,7 +2131,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, } p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kmap(p); + + pgd_page = kbase_kmap(p); if (!pgd_page) { dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -ENOMEM; @@ -2111,8 +2140,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, goto fail_unlock_free_pgds; } - num_of_valid_entries = - kbdev->mmu_mode->get_num_valid_entries(pgd_page); + num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page); for (i = 0; i < count; i++) { unsigned int ofs = vindex + i; @@ -2120,12 +2148,11 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, /* Fail if the current page is a valid ATE entry */ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); - pgd_page[ofs] = kbase_mmu_create_ate(kbdev, - phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id); + pgd_page[ofs] = kbase_mmu_create_ate(kbdev, phys, flags, + MIDGARD_MMU_BOTTOMLEVEL, group_id); } - kbdev->mmu_mode->set_num_valid_entries( - pgd_page, num_of_valid_entries + count); + kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries + count); dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL); @@ -2151,14 +2178,14 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); - kunmap(p); + kbase_kunmap(p, pgd_page); goto fail_unlock_free_pgds; } } insert_vpfn += count; remain -= count; - kunmap(p); + kbase_kunmap(p, pgd_page); } mutex_unlock(&mmut->mmu_lock); @@ -2215,6 +2242,9 @@ static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys, struct page *phys_page = as_page(phys); struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; + spin_lock(&page_md->migrate_lock); /* If no GPU va region is given: the metadata provided are @@ -2249,6 +2279,9 @@ static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev, { size_t i; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; + for (i = 0; i < requested_nr; i++) { struct page *phys_page = as_page(phys[i]); struct kbase_page_metadata *page_md = kbase_page_private(phys_page); @@ -2276,7 +2309,7 @@ static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev, * status will subsequently be freed in either * kbase_page_migrate() or kbase_page_putback() */ - phys[i] = as_tagged(0); + phys[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); } else page_md->status = PAGE_STATUS_SET(page_md->status, (u8)FREE_IN_PROGRESS); @@ -2287,21 +2320,19 @@ static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev, } } -u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, - struct tagged_addr const phy, unsigned long const flags, - int const level, int const group_id) +u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, struct tagged_addr const phy, + unsigned long const flags, int const level, int const group_id) { u64 entry; kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level); - return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, - group_id, level, entry); + return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, group_id, level, entry); } -int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - const u64 start_vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id, u64 *dirty_pgds, - struct kbase_va_region *reg, bool ignore_page_migration) +static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + const u64 start_vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id, u64 *dirty_pgds, + struct kbase_va_region *reg, bool ignore_page_migration) { phys_addr_t pgd; u64 *pgd_page; @@ -2312,6 +2343,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu unsigned int i; phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; int l, cur_level, insert_level; + struct tagged_addr *start_phys = phys; /* Note that 0 is a valid start_vpfn */ /* 64-bit address range is the max */ @@ -2382,7 +2414,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu } p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kmap(p); + pgd_page = kbase_kmap(p); + if (!pgd_page) { dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -ENOMEM; @@ -2390,8 +2423,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu goto fail_unlock_free_pgds; } - num_of_valid_entries = - mmu_mode->get_num_valid_entries(pgd_page); + num_of_valid_entries = mmu_mode->get_num_valid_entries(pgd_page); if (cur_level == MIDGARD_MMU_LEVEL(2)) { int level_index = (insert_vpfn >> 9) & 0x1FF; @@ -2413,13 +2445,13 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu */ WARN_ON((*target & 1UL) != 0); - *target = kbase_mmu_create_ate(kbdev, - phys[i], flags, cur_level, group_id); + *target = kbase_mmu_create_ate(kbdev, phys[i], flags, cur_level, + group_id); /* If page migration is enabled, this is the right time * to update the status of the page. */ - if (kbase_page_migration_enabled && !ignore_page_migration && + if (kbase_is_page_migration_enabled() && !ignore_page_migration && !is_huge(phys[i]) && !is_partial(phys[i])) kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut, insert_vpfn + i); @@ -2454,7 +2486,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); - kunmap(p); + kbase_kunmap(p, pgd_page); goto fail_unlock_free_pgds; } } @@ -2462,7 +2494,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu phys += count; insert_vpfn += count; remain -= count; - kunmap(p); + kbase_kunmap(p, pgd_page); } mutex_unlock(&mmut->mmu_lock); @@ -2478,7 +2510,7 @@ fail_unlock: if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, - phys, ignore_page_migration); + start_phys, ignore_page_migration); } mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, @@ -2489,6 +2521,23 @@ fail_unlock: return err; } +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + const u64 start_vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id, u64 *dirty_pgds, + struct kbase_va_region *reg) +{ + int err; + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + err = mmu_insert_pages_no_flush(kbdev, mmut, start_vpfn, phys, nr, flags, group_id, + dirty_pgds, reg, false); + + return err; +} + /* * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space * number 'as_nr'. @@ -2496,17 +2545,19 @@ fail_unlock: int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, - struct kbase_va_region *reg, bool ignore_page_migration) + struct kbase_va_region *reg) { int err; u64 dirty_pgds = 0; + CSTD_UNUSED(as_nr); + /* Early out if there is nothing to do */ if (nr == 0) return 0; - err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, - &dirty_pgds, reg, ignore_page_migration); + err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds, + reg, false); if (err) return err; @@ -2517,15 +2568,18 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); -int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - u64 vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int const group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info, - struct kbase_va_region *reg) +int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg) { int err; u64 dirty_pgds = 0; + CSTD_UNUSED(as_nr); + /* Early out if there is nothing to do */ if (nr == 0) return 0; @@ -2533,8 +2587,8 @@ int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu /* Imported allocations don't have metadata and therefore always ignore the * page migration logic. */ - err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, - &dirty_pgds, reg, true); + err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds, + reg, true); if (err) return err; @@ -2552,6 +2606,8 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_ int err; u64 dirty_pgds = 0; + CSTD_UNUSED(as_nr); + /* Early out if there is nothing to do */ if (nr == 0) return 0; @@ -2559,8 +2615,8 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_ /* Memory aliases are always built on top of existing allocations, * therefore the state of physical pages shall be updated. */ - err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, - &dirty_pgds, reg, false); + err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds, + reg, false); if (err) return err; @@ -2569,9 +2625,7 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_ return 0; } -void kbase_mmu_update(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - int as_nr) +void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) { lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&kbdev->mmu_hw_mutex); @@ -2619,7 +2673,6 @@ void kbase_mmu_disable(struct kbase_context *kctx) /* 0xF value used to prevent skipping of any levels when flushing */ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); -#endif /* lock MMU to prevent existing jobs on GPU from executing while the AS is * not yet disabled @@ -2651,8 +2704,26 @@ void kbase_mmu_disable(struct kbase_context *kctx) dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid, kctx->id); } +#else + CSTD_UNUSED(lock_err); + + /* + * The address space is being disabled, drain all knowledge of it out + * from the caches as pages and page tables might be freed after this. + * + * The job scheduler code will already be holding the locks and context + * so just do the flush. + */ + flush_err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr], &op_param); + if (flush_err) { + dev_err(kbdev->dev, + "Flush for GPU page table update did not complete to disable AS %d for ctx %d_%d", + kctx->as_nr, kctx->tgid, kctx->id); + /* GPU reset would have been triggered by the flush function */ + } + + kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr); -#if !MALI_USE_CSF /* * JM GPUs has some L1 read only caches that need to be invalidated * with START_FLUSH configuration. Purge the MMU disabled kctx from @@ -2673,11 +2744,11 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, lockdep_assert_held(&mmut->mmu_lock); - for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); - current_level--) { + for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); current_level--) { phys_addr_t current_pgd = pgds[current_level]; struct page *p = phys_to_page(current_pgd); - u64 *current_page = kmap(p); + + u64 *current_page = kbase_kmap(p); unsigned int current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(current_page); int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; @@ -2687,26 +2758,24 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, *dirty_pgds |= 1ULL << current_level; kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); - if (current_valid_entries == 1 && - current_level != MIDGARD_MMU_LEVEL(0)) { - kunmap(p); + if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) { + kbase_kunmap(p, current_page); /* Ensure the cacheline containing the last valid entry * of PGD is invalidated from the GPU cache, before the * PGD page is freed. */ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, - current_pgd + (index * sizeof(u64)), - sizeof(u64), flush_op); + current_pgd + (index * sizeof(u64)), sizeof(u64), + flush_op); kbase_mmu_add_to_free_pgds_list(mmut, p); } else { current_valid_entries--; - kbdev->mmu_mode->set_num_valid_entries( - current_page, current_valid_entries); + kbdev->mmu_mode->set_num_valid_entries(current_page, current_valid_entries); - kunmap(p); + kbase_kunmap(p, current_page); kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), @@ -2762,7 +2831,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, for (i = 0; !flush_done && i < phys_page_nr; i++) { spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) + if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE, KBASE_MMU_OP_FLUSH_MEM); else @@ -2770,6 +2839,9 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); } } +#else + CSTD_UNUSED(phys); + CSTD_UNUSED(phys_page_nr); #endif } @@ -2780,6 +2852,8 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase { struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; + CSTD_UNUSED(free_pgds_list); + lockdep_assert_held(&mmut->mmu_lock); kbase_mmu_reset_free_pgds_list(mmut); @@ -2798,12 +2872,11 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase count = nr; /* need to check if this is a 2MB page or a 4kB */ - for (level = MIDGARD_MMU_TOPLEVEL; - level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { + for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { phys_addr_t next_pgd; index = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kmap(p); + page = kbase_kmap(p); if (mmu_mode->ate_is_valid(page[index], level)) break; /* keep the mapping */ else if (!mmu_mode->pte_is_valid(page[index], level)) { @@ -2829,7 +2902,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase next_pgd = mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index])); - kunmap(p); + kbase_kunmap(p, page); pgds[level] = pgd; pgd = next_pgd; p = phys_to_page(pgd); @@ -2840,7 +2913,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase case MIDGARD_MMU_LEVEL(1): dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__, level); - kunmap(p); + kbase_kunmap(p, page); goto out; case MIDGARD_MMU_LEVEL(2): /* can only teardown if count >= 512 */ @@ -2878,15 +2951,14 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase mmu_mode->entries_invalidate(&page[index], pcount); if (!num_of_valid_entries) { - kunmap(p); + kbase_kunmap(p, page); /* Ensure the cacheline(s) containing the last valid entries * of PGD is invalidated from the GPU cache, before the * PGD page is freed. */ - kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, - pgd + (index * sizeof(u64)), - pcount * sizeof(u64), flush_op); + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), + pcount * sizeof(u64), flush_op); kbase_mmu_add_to_free_pgds_list(mmut, p); @@ -2904,7 +2976,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64), flush_op); next: - kunmap(p); + kbase_kunmap(p, page); vpfn += count; nr -= count; } @@ -2912,9 +2984,40 @@ out: return 0; } -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, - int as_nr, bool ignore_page_migration) +/** + * mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table + * + * @kbdev: Pointer to kbase device. + * @mmut: Pointer to GPU MMU page table. + * @vpfn: Start page frame number of the GPU virtual pages to unmap. + * @phys: Array of physical pages currently mapped to the virtual + * pages to unmap, or NULL. This is used for GPU cache maintenance + * and page migration support. + * @nr_phys_pages: Number of physical pages to flush. + * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed. + * @as_nr: Address space number, for GPU cache maintenance operations + * that happen outside a specific kbase context. + * @ignore_page_migration: Whether page migration metadata should be ignored. + * + * We actually discard the ATE and free the page table pages if no valid entries + * exist in the PGD. + * + * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + * + * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, + * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, + * GPU cache maintenance will be done as usual; that is, invalidating the whole GPU caches + * instead of specific physical address ranges. + * + * Return: 0 on success, otherwise an error code. + */ +static int mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, + int as_nr, bool ignore_page_migration) { u64 start_vpfn = vpfn; enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; @@ -2974,7 +3077,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table .mmu_sync_info = mmu_sync_info, .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF, .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT : - KBASE_MMU_OP_FLUSH_MEM, + KBASE_MMU_OP_FLUSH_MEM, .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), }; mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages, @@ -2985,7 +3088,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table * updated before releasing the lock to protect against concurrent * requests to migrate the pages, if they have been isolated. */ - if (kbase_page_migration_enabled && phys && !ignore_page_migration) + if (kbase_is_page_migration_enabled() && phys && !ignore_page_migration) kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages); kbase_mmu_free_pgds_list(kbdev, mmut); @@ -2994,7 +3097,22 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table return err; } -KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); + +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, + int as_nr) +{ + return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr, + false); +} + +int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages, + size_t nr_virt_pages, int as_nr) +{ + return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr, + true); +} /** * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU @@ -3058,28 +3176,25 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb goto fail_unlock; p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kmap(p); + pgd_page = kbase_kmap(p); if (!pgd_page) { dev_warn(kbdev->dev, "kmap failure on update_pages"); err = -ENOMEM; goto fail_unlock; } - num_of_valid_entries = - kbdev->mmu_mode->get_num_valid_entries(pgd_page); + num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page); if (cur_level == MIDGARD_MMU_LEVEL(2)) { int level_index = (vpfn >> 9) & 0x1FF; - struct tagged_addr *target_phys = - phys - index_in_large_page(*phys); + struct tagged_addr *target_phys = phys - index_in_large_page(*phys); #ifdef CONFIG_MALI_BIFROST_DEBUG - WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid( - pgd_page[level_index], MIDGARD_MMU_LEVEL(2))); + WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid(pgd_page[level_index], + MIDGARD_MMU_LEVEL(2))); #endif - pgd_page[level_index] = kbase_mmu_create_ate(kbdev, - *target_phys, flags, MIDGARD_MMU_LEVEL(2), - group_id); + pgd_page[level_index] = kbase_mmu_create_ate( + kbdev, *target_phys, flags, MIDGARD_MMU_LEVEL(2), group_id); kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)), kbase_dma_addr(p) + (level_index * sizeof(u64)), sizeof(u64), KBASE_MMU_OP_NONE); @@ -3087,12 +3202,10 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb for (i = 0; i < count; i++) { #ifdef CONFIG_MALI_BIFROST_DEBUG WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid( - pgd_page[index + i], - MIDGARD_MMU_BOTTOMLEVEL)); + pgd_page[index + i], MIDGARD_MMU_BOTTOMLEVEL)); #endif - pgd_page[index + i] = kbase_mmu_create_ate(kbdev, - phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, - group_id); + pgd_page[index + i] = kbase_mmu_create_ate( + kbdev, phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, group_id); } /* MMU cache flush strategy is NONE because GPU cache maintenance @@ -3103,8 +3216,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb count * sizeof(u64), KBASE_MMU_OP_NONE); } - kbdev->mmu_mode->set_num_valid_entries(pgd_page, - num_of_valid_entries); + kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); if (dirty_pgds && count > 0) *dirty_pgds |= 1ULL << cur_level; @@ -3113,7 +3225,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb vpfn += count; nr -= count; - kunmap(p); + kbase_kunmap(p, pgd_page); } mutex_unlock(&mmut->mmu_lock); @@ -3216,8 +3328,8 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys)); struct kbase_mmu_hw_op_param op_param; struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ? - page_md->data.mapped.mmut : - page_md->data.pt_mapped.mmut; + page_md->data.mapped.mmut : + page_md->data.pt_mapped.mmut; struct kbase_device *kbdev; phys_addr_t pgd; u64 *old_page, *new_page, *pgd_page, *target, vpfn; @@ -3226,6 +3338,9 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p unsigned int num_of_valid_entries; u8 vmap_count = 0; + /* If page migration support is not compiled in, return with fault */ + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return -EINVAL; /* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param, * here we skip the no kctx case, which is only used with MCU's mmut. */ @@ -3243,21 +3358,21 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p index = (vpfn >> ((3 - level) * 9)) & 0x1FF; /* Create all mappings before copying content. - * This is done as early as possible because is the only operation that may + * This is done as early as possible because it is the only operation that may * fail. It is possible to do this before taking any locks because the * pages to migrate are not going to change and even the parent PGD is not * going to be affected by any other concurrent operation, since the page * has been isolated before migration and therefore it cannot disappear in * the middle of this function. */ - old_page = kmap(as_page(old_phys)); + old_page = kbase_kmap(as_page(old_phys)); if (!old_page) { dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__); ret = -EINVAL; goto old_page_map_error; } - new_page = kmap(as_page(new_phys)); + new_page = kbase_kmap(as_page(new_phys)); if (!new_page) { dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__); ret = -EINVAL; @@ -3295,8 +3410,8 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p op_param.op = KBASE_MMU_OP_FLUSH_PT; /* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */ op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ? - pgd_level_to_skip_flush(1ULL << level) : - pgd_level_to_skip_flush(3ULL << level); + pgd_level_to_skip_flush(1ULL << level) : + pgd_level_to_skip_flush(3ULL << level); mutex_lock(&mmut->mmu_lock); @@ -3344,14 +3459,13 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p goto get_pgd_at_level_error; } - pgd_page = kmap(phys_to_page(pgd)); + pgd_page = kbase_kmap(phys_to_page(pgd)); if (!pgd_page) { dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__); ret = -EINVAL; goto pgd_page_map_error; } - mutex_lock(&kbdev->pm.lock); mutex_lock(&kbdev->mmu_hw_mutex); /* Lock MMU region and flush GPU cache by using GPU control, @@ -3362,19 +3476,25 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p /* Defer the migration as L2 is in a transitional phase */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); mutex_unlock(&kbdev->mmu_hw_mutex); - mutex_unlock(&kbdev->pm.lock); dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__); ret = -EAGAIN; goto l2_state_defer_out; } /* Prevent transitional phases in L2 by starting the transaction */ mmu_page_migration_transaction_begin(kbdev); - if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { + if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) { int as_nr = mmut->kctx->as_nr; struct kbase_as *as = &kbdev->as[as_nr]; ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param); if (!ret) { +#if MALI_USE_CSF + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + ret = kbase_gpu_cache_flush_pa_range_and_busy_wait( + kbdev, as_phys_addr_t(old_phys), PAGE_SIZE, + GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC); + else +#endif ret = kbase_gpu_cache_flush_and_busy_wait( kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); } @@ -3385,7 +3505,6 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p if (ret < 0) { mutex_unlock(&kbdev->mmu_hw_mutex); - mutex_unlock(&kbdev->pm.lock); dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__); goto undo_mappings; } @@ -3427,16 +3546,17 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p if (level == MIDGARD_MMU_BOTTOMLEVEL) { WARN_ON_ONCE((*target & 1UL) == 0); - *target = - kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags, - level, page_md->data.mapped.reg->gpu_alloc->group_id); + *target = kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags, + level, + page_md->data.mapped.reg->gpu_alloc->group_id); } else { u64 managed_pte; #ifdef CONFIG_MALI_BIFROST_DEBUG /* The PTE should be pointing to the page being migrated */ - WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr( - kbdev->mgm_dev->ops.mgm_pte_to_original_pte( + WARN_ON_ONCE( + as_phys_addr_t(old_phys) != + kbdev->mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index]))); #endif kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys)); @@ -3461,7 +3581,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p * won't have any effect on them. */ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); - if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { + if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) { int as_nr = mmut->kctx->as_nr; struct kbase_as *as = &kbdev->as[as_nr]; @@ -3477,7 +3597,6 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); /* Releasing locks before checking the migration transaction error state */ mutex_unlock(&kbdev->mmu_hw_mutex); - mutex_unlock(&kbdev->pm.lock); spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); /* Release the transition prevention in L2 by ending the transaction */ @@ -3510,24 +3629,24 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p set_page_private(as_page(old_phys), 0); l2_state_defer_out: - kunmap(phys_to_page(pgd)); + kbase_kunmap(phys_to_page(pgd), pgd_page); pgd_page_map_error: get_pgd_at_level_error: page_state_change_out: mutex_unlock(&mmut->mmu_lock); - kunmap(as_page(new_phys)); + kbase_kunmap(as_page(new_phys), new_page); new_page_map_error: - kunmap(as_page(old_phys)); + kbase_kunmap(as_page(old_phys), old_page); old_page_map_error: return ret; undo_mappings: /* Unlock the MMU table and undo mappings. */ mutex_unlock(&mmut->mmu_lock); - kunmap(phys_to_page(pgd)); - kunmap(as_page(new_phys)); - kunmap(as_page(old_phys)); + kbase_kunmap(phys_to_page(pgd), pgd_page); + kbase_kunmap(as_page(new_phys), new_page); + kbase_kunmap(as_page(old_phys), old_page); return ret; } @@ -3544,7 +3663,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl lockdep_assert_held(&mmut->mmu_lock); - pgd_page = kmap_atomic(p); + pgd_page = kbase_kmap_atomic(p); /* kmap_atomic should NEVER fail. */ if (WARN_ON_ONCE(pgd_page == NULL)) return; @@ -3560,11 +3679,11 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl * there are no pages left mapped on the GPU for a context. Hence the count * of valid entries is expected to be zero here. */ - if (kbase_page_migration_enabled && mmut->kctx) + if (kbase_is_page_migration_enabled() && mmut->kctx) WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page)); /* Invalidate page after copying */ mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); - kunmap_atomic(pgd_page); + kbase_kunmap_atomic(pgd_page); pgd_page = pgd_page_buffer; if (level < MIDGARD_MMU_BOTTOMLEVEL) { @@ -3583,12 +3702,28 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl kbase_mmu_free_pgd(kbdev, mmut, pgd); } -int kbase_mmu_init(struct kbase_device *const kbdev, - struct kbase_mmu_table *const mmut, struct kbase_context *const kctx, - int const group_id) +static void kbase_mmu_mark_non_movable(struct page *page) { - if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || - WARN_ON(group_id < 0)) + struct kbase_page_metadata *page_md; + + if (!kbase_is_page_migration_enabled()) + return; + + page_md = kbase_page_private(page); + + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); + + if (IS_PAGE_MOVABLE(page_md->status)) + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + + spin_unlock(&page_md->migrate_lock); +} + +int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *const mmut, + struct kbase_context *const kctx, int const group_id) +{ + if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || WARN_ON(group_id < 0)) return -EINVAL; compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)), @@ -3599,28 +3734,26 @@ int kbase_mmu_init(struct kbase_device *const kbdev, mmut->group_id = group_id; mutex_init(&mmut->mmu_lock); mmut->kctx = kctx; - mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS; + mmut->pgd = KBASE_INVALID_PHYSICAL_ADDRESS; /* We allocate pages into the kbdev memory pool, then * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to * avoid allocations from the kernel happening with the lock held. */ - while (mmut->pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { + while (mmut->pgd == KBASE_INVALID_PHYSICAL_ADDRESS) { int err; - err = kbase_mem_pool_grow( - &kbdev->mem_pools.small[mmut->group_id], - MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL); + err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id], + MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL); if (err) { kbase_mmu_term(kbdev, mmut); return -ENOMEM; } - mutex_lock(&mmut->mmu_lock); mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut); - mutex_unlock(&mmut->mmu_lock); } + kbase_mmu_mark_non_movable(pfn_to_page(PFN_DOWN(mmut->pgd))); return 0; } @@ -3630,7 +3763,7 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", mmut->kctx->tgid, mmut->kctx->id); - if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) { + if (mmut->pgd != KBASE_INVALID_PHYSICAL_ADDRESS) { mutex_lock(&mmut->mmu_lock); mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); mutex_unlock(&mmut->mmu_lock); @@ -3648,23 +3781,28 @@ void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i) } void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx, - phys_addr_t phys, size_t size, - enum kbase_mmu_op_type flush_op) + phys_addr_t phys, size_t size, enum kbase_mmu_op_type flush_op) { #if MALI_USE_CSF unsigned long irq_flags; spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) && - kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) + kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT); spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); +#else + CSTD_UNUSED(kbdev); + CSTD_UNUSED(kctx); + CSTD_UNUSED(phys); + CSTD_UNUSED(size); + CSTD_UNUSED(flush_op); #endif } #ifdef CONFIG_MALI_VECTOR_DUMP -static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, - int level, char ** const buffer, size_t *size_left) +static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, + char **const buffer, size_t *size_left) { phys_addr_t target_pgd; u64 *pgd_page; @@ -3681,7 +3819,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, kbdev = kctx->kbdev; mmu_mode = kbdev->mmu_mode; - pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + pgd_page = kbase_kmap(pfn_to_page(PFN_DOWN(pgd))); if (!pgd_page) { dev_warn(kbdev->dev, "%s: kmap failure", __func__); return 0; @@ -3709,14 +3847,13 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, if (mmu_mode->pte_is_valid(pgd_page[i], level)) { target_pgd = mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, - level, pgd_page[i])); + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, + pgd_page[i])); - dump_size = kbasep_mmu_dump_level(kctx, - target_pgd, level + 1, - buffer, size_left); + dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, + buffer, size_left); if (!dump_size) { - kunmap(pfn_to_page(PFN_DOWN(pgd))); + kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page); return 0; } size += dump_size; @@ -3724,7 +3861,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, } } - kunmap(pfn_to_page(PFN_DOWN(pgd))); + kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page); return size; } @@ -3760,8 +3897,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) buffer = (char *)kaddr; mmu_dump_buffer = buffer; - kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu, - &as_setup); + kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu, &as_setup); config[0] = as_setup.transtab; config[1] = as_setup.memattr; config[2] = as_setup.transcfg; @@ -3770,11 +3906,8 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) size_left -= sizeof(config); size += sizeof(config); - dump_size = kbasep_mmu_dump_level(kctx, - kctx->mmu.pgd, - MIDGARD_MMU_TOPLEVEL, - &mmu_dump_buffer, - &size_left); + dump_size = kbasep_mmu_dump_level(kctx, kctx->mmu.pgd, MIDGARD_MMU_TOPLEVEL, + &mmu_dump_buffer, &size_left); if (!dump_size) goto fail_free; @@ -3845,14 +3978,11 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data) #endif if (unlikely(fault->protected_mode)) { - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Permission failure", fault); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Permission failure", fault); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); return; - } #if MALI_USE_CSF @@ -3866,7 +3996,7 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data) * we don't need to switch to unmapped */ if (!kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault); kbase_pm_context_idle(kbdev); } diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h index 699b1f340482..73f41b4d94cf 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,6 @@ #include #define KBASE_MMU_PAGE_ENTRIES 512 -#define KBASE_MMU_INVALID_PGD_ADDRESS (~(phys_addr_t)0) struct kbase_context; struct kbase_mmu_table; @@ -36,8 +35,8 @@ struct kbase_va_region; * A pointer to this type is passed down from the outer-most callers in the kbase * module - where the information resides as to the synchronous / asynchronous * nature of the call flow, with respect to MMU operations. ie - does the call flow relate to - * existing GPU work does it come from requests (like ioctl) from user-space, power management, - * etc. + * existing GPU work or does it come from requests (like ioctl) from user-space, power + * management, etc. * * @CALLER_MMU_UNSET_SYNCHRONICITY: default value must be invalid to avoid accidental choice * of a 'valid' value @@ -109,7 +108,7 @@ void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i); * Return: 0 if successful, otherwise a negative error code. */ int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - struct kbase_context *kctx, int group_id); + struct kbase_context *kctx, int group_id); /** * kbase_mmu_interrupt - Process an MMU interrupt. @@ -148,31 +147,49 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); * Return: An address translation entry, either in LPAE or AArch64 format * (depending on the driver's configuration). */ -u64 kbase_mmu_create_ate(struct kbase_device *kbdev, - struct tagged_addr phy, unsigned long flags, int level, int group_id); +u64 kbase_mmu_create_ate(struct kbase_device *kbdev, struct tagged_addr phy, unsigned long flags, + int level, int group_id); int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int group_id, u64 *dirty_pgds, - struct kbase_va_region *reg, bool ignore_page_migration); + struct kbase_va_region *reg); int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, - struct kbase_va_region *reg, bool ignore_page_migration); -int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - u64 vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info, - struct kbase_va_region *reg); + struct kbase_va_region *reg); + +/** + * kbase_mmu_insert_pages_skip_status_update - Map 'nr' pages pointed to by 'phys' + * at GPU PFN 'vpfn' for GPU address space number 'as_nr'. + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @mmut: GPU page tables. + * @vpfn: Start page frame number of the GPU virtual pages to map. + * @phys: Physical address of the page to be mapped. + * @nr: The number of pages to map. + * @flags: Bitmask of attributes of the GPU memory region being mapped. + * @as_nr: The GPU address space number. + * @group_id: The physical memory group in which the page was allocated. + * @mmu_sync_info: MMU-synchronous caller info. + * @reg: The region whose physical allocation is to be mapped. + * + * Similar to kbase_mmu_insert_pages() but skips updating each pages metadata + * for page migration. + * + * Return: 0 if successful, otherwise a negative error code. + */ +int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg); int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, struct kbase_va_region *reg); -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, - size_t nr, unsigned long flags, int group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info, - bool ignore_page_migration); int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, size_t nr, unsigned long flags, int group_id, @@ -182,44 +199,19 @@ int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info); -/** - * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table - * - * @kbdev: Pointer to kbase device. - * @mmut: Pointer to GPU MMU page table. - * @vpfn: Start page frame number of the GPU virtual pages to unmap. - * @phys: Array of physical pages currently mapped to the virtual - * pages to unmap, or NULL. This is used for GPU cache maintenance - * and page migration support. - * @nr_phys_pages: Number of physical pages to flush. - * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed. - * @as_nr: Address space number, for GPU cache maintenance operations - * that happen outside a specific kbase context. - * @ignore_page_migration: Whether page migration metadata should be ignored. - * - * We actually discard the ATE and free the page table pages if no valid entries - * exist in PGD. - * - * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is - * currently scheduled into the runpool, and so potentially uses a lot of locks. - * These locks must be taken in the correct order with respect to others - * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more - * information. - * - * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, - * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, - * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches - * instead of specific physical address ranges. - * - * Return: 0 on success, otherwise an error code. - */ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, - int as_nr, bool ignore_page_migration); + int as_nr); +int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages, + size_t nr_virt_pages, int as_nr); +#define kbase_mmu_teardown_firmware_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, \ + as_nr) \ + kbase_mmu_teardown_imported_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, \ + as_nr) -int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id); +int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, + size_t nr, unsigned long flags, int const group_id); #if MALI_USE_CSF /** * kbase_mmu_update_csf_mcu_pages - Update MCU mappings with changes of phys and flags @@ -287,8 +279,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p * This function is basically a wrapper for kbase_gpu_cache_flush_pa_range_and_busy_wait(). */ void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx, - phys_addr_t phys, size_t size, - enum kbase_mmu_op_type flush_op); + phys_addr_t phys, size_t size, enum kbase_mmu_op_type flush_op); /** * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt. @@ -302,8 +293,7 @@ void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context * * * Return: zero if the operation was successful, non-zero otherwise. */ -int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, - u32 as_nr); +int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, u32 as_nr); /** * kbase_mmu_gpu_fault_interrupt() - Report a GPU fault. @@ -317,8 +307,8 @@ int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, * This function builds GPU fault information to submit a work * for reporting the details of the fault. */ -void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, - u32 as_nr, u64 address, bool as_valid); +void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, u32 as_nr, u64 address, + bool as_valid); /** * kbase_context_mmu_group_id_get - Decode a memory group ID from @@ -330,11 +320,9 @@ void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, * * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). */ -static inline int -kbase_context_mmu_group_id_get(base_context_create_flags const flags) +static inline int kbase_context_mmu_group_id_get(base_context_create_flags const flags) { - KBASE_DEBUG_ASSERT(flags == - (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); + KBASE_DEBUG_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); return (int)BASE_CONTEXT_MMU_GROUP_ID_GET(flags); } diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h index 50d2ea5d07c8..b0b1837ae18b 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,7 +58,7 @@ enum kbase_mmu_fault_type { * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions * @vpfn: MMU Virtual Page Frame Number to start the operation on. * @nr: Number of pages to work on. - * @op: Operation type (written to ASn_COMMAND). + * @op: Operation type (written to AS_COMMAND). * @kctx_id: Kernel context ID for MMU command tracepoint. * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. * @flush_skip_levels: Page table levels to skip flushing. (Only @@ -81,8 +81,7 @@ struct kbase_mmu_hw_op_param { * Configure the MMU using the address space details setup in the * kbase_context structure. */ -void kbase_mmu_hw_configure(struct kbase_device *kbdev, - struct kbase_as *as); +void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as); /** * kbase_mmu_hw_do_lock - Issue LOCK command to the MMU and program @@ -195,7 +194,7 @@ int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_a * Clear a bus error or page fault that has been reported by the MMU. */ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, - enum kbase_mmu_fault_type type); + enum kbase_mmu_fault_type type); /** * kbase_mmu_hw_enable_fault - Enable fault that has been previously reported by @@ -209,6 +208,6 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, * called to enable the page fault or bus error fault again. */ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, - enum kbase_mmu_fault_type type); + enum kbase_mmu_fault_type type); -#endif /* _KBASE_MMU_HW_H_ */ +#endif /* _KBASE_MMU_HW_H_ */ diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c index 3f6da35d80f2..a2f55b847f71 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c @@ -43,10 +43,7 @@ */ static bool mmu_has_flush_skip_pgd_levels(struct kbase_gpu_props const *gpu_props) { - u32 const signature = - gpu_props->props.raw_props.gpu_id & (GPU_ID2_ARCH_MAJOR | GPU_ID2_ARCH_REV); - - return signature >= (u32)GPU_ID2_PRODUCT_MAKE(12, 0, 4, 0); + return gpu_props->gpu_id.arch_id >= GPU_ID_ARCH_MAKE(12, 0, 4); } #endif @@ -170,10 +167,10 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) { const ktime_t wait_loop_start = ktime_get_raw(); - const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; + const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms; s64 diff; - if (unlikely(kbdev->as[as_nr].is_unresponsive)) + if (unlikely(kbdev->mmu_unresponsive)) return -EBUSY; do { @@ -181,8 +178,8 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) for (i = 0; i < 1000; i++) { /* Wait for the MMU status to indicate there is no active command */ - if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) & - AS_STATUS_AS_ACTIVE)) + if (!(kbase_reg_read32(kbdev, MMU_AS_OFFSET(as_nr, STATUS)) & + AS_STATUS_AS_ACTIVE_EXT_MASK)) return 0; } @@ -192,7 +189,7 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) dev_err(kbdev->dev, "AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system", as_nr); - kbdev->as[as_nr].is_unresponsive = true; + kbdev->mmu_unresponsive = true; if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu_locked(kbdev); @@ -205,7 +202,8 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) const int status = wait_ready(kbdev, as_nr); if (likely(status == 0)) - kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); + kbase_reg_write32(kbdev, MMU_AS_OFFSET(as_nr, COMMAND), + AS_COMMAND_COMMAND_SET(0, cmd)); else if (status == -EBUSY) { dev_dbg(kbdev->dev, "Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u", @@ -222,7 +220,7 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) #if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) static int wait_cores_power_trans_complete(struct kbase_device *kbdev) { -#define WAIT_TIMEOUT 1000 /* 1ms timeout */ +#define WAIT_TIMEOUT 50000 /* 50ms timeout */ #define DELAY_TIME_IN_US 1 const int max_iterations = WAIT_TIMEOUT; int loop; @@ -230,19 +228,17 @@ static int wait_cores_power_trans_complete(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); for (loop = 0; loop < max_iterations; loop++) { - u32 lo = - kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_LO)); - u32 hi = - kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_HI)); + u64 val = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_PWRTRANS)); - if (!lo && !hi) + if (!val) break; udelay(DELAY_TIME_IN_US); } if (loop == max_iterations) { - dev_warn(kbdev->dev, "SHADER_PWRTRANS set for too long"); + dev_warn(kbdev->dev, "SHADER_PWRTRANS %016llx set for too long", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_PWRTRANS))); return -ETIMEDOUT; } @@ -275,9 +271,8 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c * the workaround can be safely skipped. */ if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { - if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) { - dev_warn(kbdev->dev, - "Unexpected mmu command received"); + if (unlikely(*mmu_cmd != AS_COMMAND_COMMAND_FLUSH_MEM)) { + dev_warn(kbdev->dev, "Unexpected MMU command(%u) received", *mmu_cmd); return -EINVAL; } @@ -286,8 +281,7 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c if (unlikely(ret)) return ret; - ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, - GPU_COMMAND_CACHE_CLN_INV_LSC); + ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, GPU_COMMAND_CACHE_CLN_INV_LSC); if (unlikely(ret)) return ret; @@ -302,7 +296,7 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c /* As LSC is guaranteed to have been flushed we can use FLUSH_PT * MMU command to only flush the L2. */ - *mmu_cmd = AS_COMMAND_FLUSH_PT; + *mmu_cmd = AS_COMMAND_COMMAND_FLUSH_PT; } return ret; @@ -319,47 +313,27 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) transcfg = current_setup->transcfg; - /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK - * Clear PTW_MEMATTR bits - */ - transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; - /* Enable correct PTW_MEMATTR bits */ - transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ + transcfg = AS_TRANSCFG_PTW_MEMATTR_SET(transcfg, AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK); + /* Ensure page-tables reads use read-allocate cache-policy in * the L2 */ - transcfg |= AS_TRANSCFG_R_ALLOCATE; + transcfg |= AS_TRANSCFG_R_ALLOCATE_MASK; if (kbdev->system_coherency != COHERENCY_NONE) { - /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) - * Clear PTW_SH bits - */ - transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); - /* Enable correct PTW_SH bits */ - transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); + /* Set flag AS_TRANSCFG_PTW_SH_OUTER_SHAREABLE */ + transcfg = AS_TRANSCFG_PTW_SH_SET(transcfg, AS_TRANSCFG_PTW_SH_OUTER_SHAREABLE); } - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), - transcfg); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), - (transcfg >> 32) & 0xFFFFFFFFUL); + kbase_reg_write64(kbdev, MMU_AS_OFFSET(as->number, TRANSCFG), transcfg); + kbase_reg_write64(kbdev, MMU_AS_OFFSET(as->number, TRANSTAB), current_setup->transtab); + kbase_reg_write64(kbdev, MMU_AS_OFFSET(as->number, MEMATTR), current_setup->memattr); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), - current_setup->transtab & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), - (current_setup->transtab >> 32) & 0xFFFFFFFFUL); + KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as, current_setup->transtab, + current_setup->memattr, transcfg); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), - current_setup->memattr & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), - (current_setup->memattr >> 32) & 0xFFFFFFFFUL); - - KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as, - current_setup->transtab, - current_setup->memattr, - transcfg); - - write_cmd(kbdev, as->number, AS_COMMAND_UPDATE); + write_cmd(kbdev, as->number, AS_COMMAND_COMMAND_UPDATE); #if MALI_USE_CSF /* Wait for UPDATE command to complete */ wait_ready(kbdev, as->number); @@ -376,7 +350,7 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. */ static void mmu_command_instr(struct kbase_device *kbdev, u32 kctx_id, u32 cmd, u64 lock_addr, - enum kbase_caller_mmu_sync_info mmu_sync_info) + enum kbase_caller_mmu_sync_info mmu_sync_info) { u64 lock_addr_base = AS_LOCKADDR_LOCKADDR_BASE_GET(lock_addr); u32 lock_addr_size = AS_LOCKADDR_LOCKADDR_SIZE_GET(lock_addr); @@ -399,10 +373,7 @@ static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock if (!ret) { /* Set the region that needs to be updated */ - kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO), - *lock_addr & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_HI), - (*lock_addr >> 32) & 0xFFFFFFFFUL); + kbase_reg_write64(kbdev, MMU_AS_OFFSET(as_nr, LOCKADDR), *lock_addr); } return ret; } @@ -426,7 +397,7 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param); if (likely(!ret)) - ret = write_cmd(kbdev, as->number, AS_COMMAND_LOCK); + ret = write_cmd(kbdev, as->number, AS_COMMAND_COMMAND_LOCK); return ret; } @@ -455,7 +426,7 @@ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, ret = wait_ready(kbdev, as->number); if (!ret) - mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_LOCK, lock_addr, + mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_COMMAND_LOCK, lock_addr, op_param->mmu_sync_info); return ret; @@ -477,21 +448,18 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as * if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) return -EINVAL; - ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); + ret = write_cmd(kbdev, as->number, AS_COMMAND_COMMAND_UNLOCK); /* Wait for UNLOCK command to complete */ if (likely(!ret)) ret = wait_ready(kbdev, as->number); if (likely(!ret)) { - u64 lock_addr = 0x0; /* read MMU_AS_CONTROL.LOCKADDR register */ - lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI)) - << 32; - lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO)); + u64 lock_addr = kbase_reg_read64(kbdev, MMU_AS_OFFSET(as->number, LOCKADDR)); - mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK, - lock_addr, op_param->mmu_sync_info); + mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_COMMAND_UNLOCK, lock_addr, + op_param->mmu_sync_info); } return ret; @@ -509,8 +477,7 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, ret = mmu_hw_set_lock_addr(kbdev, as->number, &lock_addr, op_param); if (!ret) - ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, - op_param); + ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param); return ret; } @@ -526,11 +493,11 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, * Return: 0 if flushing MMU was successful, otherwise an error code. */ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, - const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked) + const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked) { int ret; u64 lock_addr = 0x0; - u32 mmu_cmd = AS_COMMAND_FLUSH_MEM; + u32 mmu_cmd = AS_COMMAND_COMMAND_FLUSH_MEM; if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) return -EINVAL; @@ -538,8 +505,7 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at * this point would be unexpected. */ - if (op_param->op != KBASE_MMU_OP_FLUSH_PT && - op_param->op != KBASE_MMU_OP_FLUSH_MEM) { + if (op_param->op != KBASE_MMU_OP_FLUSH_PT && op_param->op != KBASE_MMU_OP_FLUSH_MEM) { dev_err(kbdev->dev, "Unexpected flush operation received"); return -EINVAL; } @@ -547,7 +513,7 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, lockdep_assert_held(&kbdev->mmu_hw_mutex); if (op_param->op == KBASE_MMU_OP_FLUSH_PT) - mmu_cmd = AS_COMMAND_FLUSH_PT; + mmu_cmd = AS_COMMAND_COMMAND_FLUSH_PT; /* Lock the region that needs to be updated */ ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); @@ -557,7 +523,7 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, #if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) /* WA for the BASE_HW_ISSUE_GPU2019_3901. */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) && - mmu_cmd == AS_COMMAND_FLUSH_MEM) { + mmu_cmd == AS_COMMAND_COMMAND_FLUSH_MEM) { if (!hwaccess_locked) { unsigned long flags = 0; @@ -568,9 +534,17 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number); } - if (ret) - return ret; + if (ret) { + dev_warn( + kbdev->dev, + "Failed to apply WA for HW issue when doing MMU flush op on VA range %llx-%llx for AS %u", + op_param->vpfn << PAGE_SHIFT, + ((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1, as->number); + /* Continue with the MMU flush operation */ + } } +#else + CSTD_UNUSED(hwaccess_locked); #endif ret = write_cmd(kbdev, as->number, mmu_cmd); @@ -612,8 +586,7 @@ int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_a /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at * this point would be unexpected. */ - if (op_param->op != KBASE_MMU_OP_FLUSH_PT && - op_param->op != KBASE_MMU_OP_FLUSH_MEM) { + if (op_param->op != KBASE_MMU_OP_FLUSH_PT && op_param->op != KBASE_MMU_OP_FLUSH_MEM) { dev_err(kbdev->dev, "Unexpected flush operation received"); return -EINVAL; } @@ -639,11 +612,13 @@ int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_a } void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, - enum kbase_mmu_fault_type type) + enum kbase_mmu_fault_type type) { unsigned long flags; u32 pf_bf_mask; + CSTD_UNUSED(type); + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); /* @@ -656,22 +631,23 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, /* Clear the page (and bus fault IRQ as well in case one occurred) */ pf_bf_mask = MMU_PAGE_FAULT(as->number); #if !MALI_USE_CSF - if (type == KBASE_MMU_FAULT_TYPE_BUS || - type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) + if (type == KBASE_MMU_FAULT_TYPE_BUS || type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) pf_bf_mask |= MMU_BUS_ERROR(as->number); #endif - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_CLEAR), pf_bf_mask); unlock: spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, - enum kbase_mmu_fault_type type) + enum kbase_mmu_fault_type type) { unsigned long flags; u32 irq_mask; + CSTD_UNUSED(type); + /* Enable the page fault IRQ * (and bus fault IRQ as well in case one occurred) */ @@ -684,15 +660,13 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, if (kbdev->irq_reset_flush) goto unlock; - irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) | - MMU_PAGE_FAULT(as->number); + irq_mask = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK)) | MMU_PAGE_FAULT(as->number); #if !MALI_USE_CSF - if (type == KBASE_MMU_FAULT_TYPE_BUS || - type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) + if (type == KBASE_MMU_FAULT_TYPE_BUS || type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) irq_mask |= MMU_BUS_ERROR(as->number); #endif - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), irq_mask); unlock: spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h index 9d7ce48568e4..4c2c1a64ca41 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,8 +22,7 @@ #ifndef _KBASE_MMU_INTERNAL_H_ #define _KBASE_MMU_INTERNAL_H_ -void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, - struct kbase_mmu_setup * const setup); +void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup); /** * kbase_mmu_report_mcu_as_fault_and_reset - Report page fault for all @@ -31,15 +30,13 @@ void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, * @kbdev: The kbase_device the fault happened on * @fault: Data relating to the fault */ -void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, - struct kbase_fault *fault); +void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, struct kbase_fault *fault); -void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, struct kbase_fault *fault); +void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault); -void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, const char *reason_str, - struct kbase_fault *fault); +void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as *as, + const char *reason_str, struct kbase_fault *fault); /** * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible @@ -52,8 +49,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, * * Return: 0 if successful, otherwise a negative error code. */ -int kbase_mmu_switch_to_ir(struct kbase_context *kctx, - struct kbase_va_region *reg); +int kbase_mmu_switch_to_ir(struct kbase_context *kctx, struct kbase_va_region *reg); /** * kbase_mmu_page_fault_worker() - Process a page fault. diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c index f2c627482c18..3e0fab6e64f8 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2014, 2016-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,22 +20,22 @@ */ #include "mali_kbase.h" -#include +#include #include "mali_kbase_defs.h" #include #include -#define ENTRY_TYPE_MASK 3ULL +#define ENTRY_TYPE_MASK 3ULL /* For valid ATEs bit 1 = ((level == 3) ? 1 : 0). * Valid ATE entries at level 3 are flagged with the value 3. * Valid ATE entries at level 0-2 are flagged with the value 1. */ -#define ENTRY_IS_ATE_L3 3ULL -#define ENTRY_IS_ATE_L02 1ULL -#define ENTRY_IS_INVAL 2ULL -#define ENTRY_IS_PTE 3ULL +#define ENTRY_IS_ATE_L3 3ULL +#define ENTRY_IS_ATE_L02 1ULL +#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_PTE 3ULL -#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ +#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ #define ENTRY_ACCESS_RO (3ULL << 6) #define ENTRY_ACCESS_BIT (1ULL << 10) #define ENTRY_NX_BIT (1ULL << 54) @@ -51,8 +51,7 @@ static inline void page_table_entry_set(u64 *pte, u64 phy) WRITE_ONCE(*pte, phy); } -static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - int as_nr) +static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) { struct kbase_as *as; struct kbase_mmu_setup *current_setup; @@ -71,11 +70,11 @@ static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) { - struct kbase_as * const as = &kbdev->as[as_nr]; - struct kbase_mmu_setup * const current_setup = &as->current_setup; + struct kbase_as *const as = &kbdev->as[as_nr]; + struct kbase_mmu_setup *const current_setup = &as->current_setup; current_setup->transtab = 0ULL; - current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED; + current_setup->transcfg = AS_TRANSCFG_MODE_SET(0, AS_TRANSCFG_MODE_UNMAPPED); /* Apply the address space setting */ kbase_mmu_hw_configure(kbdev, as); @@ -138,34 +137,26 @@ static u64 get_mmu_flags(unsigned long flags) return mmu_flags; } -static void entry_set_ate(u64 *entry, - struct tagged_addr phy, - unsigned long flags, - int const level) +static void entry_set_ate(u64 *entry, struct tagged_addr phy, unsigned long flags, int const level) { if (level == MIDGARD_MMU_BOTTOMLEVEL) - page_table_entry_set(entry, as_phys_addr_t(phy) | - get_mmu_flags(flags) | - ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3); + page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | + ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3); else - page_table_entry_set(entry, as_phys_addr_t(phy) | - get_mmu_flags(flags) | - ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02); + page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | + ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02); } static unsigned int get_num_valid_entries(u64 *pgd) { register unsigned int num_of_valid_entries; - num_of_valid_entries = - (unsigned int)((pgd[2] & VALID_ENTRY_MASK) >> - (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 8)); - num_of_valid_entries |= - (unsigned int)((pgd[1] & VALID_ENTRY_MASK) >> - (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 4)); - num_of_valid_entries |= - (unsigned int)((pgd[0] & VALID_ENTRY_MASK) >> - (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR)); + num_of_valid_entries = (unsigned int)((pgd[2] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 8)); + num_of_valid_entries |= (unsigned int)((pgd[1] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 4)); + num_of_valid_entries |= (unsigned int)((pgd[0] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR)); return num_of_valid_entries; } @@ -175,8 +166,7 @@ static void set_num_valid_entries(u64 *pgd, unsigned int num_of_valid_entries) WARN_ON_ONCE(num_of_valid_entries > KBASE_MMU_PAGE_ENTRIES); pgd[0] &= ~VALID_ENTRY_MASK; - pgd[0] |= ((u64)(num_of_valid_entries & 0xF) - << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); + pgd[0] |= ((u64)(num_of_valid_entries & 0xF) << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); pgd[1] &= ~VALID_ENTRY_MASK; pgd[1] |= ((u64)((num_of_valid_entries >> 4) & 0xF) diff --git a/drivers/gpu/arm/bifrost/platform/Kconfig b/drivers/gpu/arm/bifrost/platform/Kconfig index 3e1bd235b842..fa51160e049d 100644 --- a/drivers/gpu/arm/bifrost/platform/Kconfig +++ b/drivers/gpu/arm/bifrost/platform/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2013, 2017, 2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -20,7 +20,7 @@ # Add your platform specific Kconfig file here # -# "drivers/gpu/arm/bifrost/platform/xxx/Kconfig" +# "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/bifrost/platform/xxx/Kconfig" # # Where xxx is the platform name is the name set in MALI_PLATFORM_NAME # diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c index 4bcd5854d3a3..f0995a4cb22e 100644 --- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c +++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015, 2017-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,8 +28,7 @@ #include #endif -static void *enumerate_gpu_clk(struct kbase_device *kbdev, - unsigned int index) +static void *enumerate_gpu_clk(struct kbase_device *kbdev, unsigned int index) { if (index >= kbdev->nr_clocks) return NULL; @@ -42,9 +41,9 @@ static void *enumerate_gpu_clk(struct kbase_device *kbdev, return kbdev->clocks[index]; } -static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, - void *gpu_clk_handle) +static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, void *gpu_clk_handle) { + CSTD_UNUSED(kbdev); #if MALI_USE_CSF /* On Juno fpga platforms, the GPU clock rate is reported as 600 MHZ at * the boot time. Then after the first call to kbase_devfreq_target() @@ -66,16 +65,19 @@ static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, return clk_get_rate((struct clk *)gpu_clk_handle); } -static int gpu_clk_notifier_register(struct kbase_device *kbdev, - void *gpu_clk_handle, struct notifier_block *nb) +static int gpu_clk_notifier_register(struct kbase_device *kbdev, void *gpu_clk_handle, + struct notifier_block *nb) { - compiletime_assert(offsetof(struct clk_notifier_data, clk) == - offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), - "mismatch in the offset of clk member"); + CSTD_UNUSED(kbdev); - compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) == - sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), - "mismatch in the size of clk member"); + compiletime_assert(offsetof(struct clk_notifier_data, clk) == + offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), + "mismatch in the offset of clk member"); + + compiletime_assert( + sizeof(((struct clk_notifier_data *)0)->clk) == + sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), + "mismatch in the size of clk member"); #if MALI_USE_CSF /* Frequency is fixed on Juno platforms */ @@ -86,9 +88,11 @@ static int gpu_clk_notifier_register(struct kbase_device *kbdev, return clk_notifier_register((struct clk *)gpu_clk_handle, nb); } -static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, - void *gpu_clk_handle, struct notifier_block *nb) +static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, void *gpu_clk_handle, + struct notifier_block *nb) { + CSTD_UNUSED(kbdev); + #if MALI_USE_CSF if (of_machine_is_compatible("arm,juno")) return; diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c index a0b4a434e2a7..6edc02b394dd 100644 --- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c +++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015, 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,7 +45,8 @@ void kbase_platform_unregister(void) #if MALI_USE_CSF int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) #else -int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, + u32 util_cl_share[2]) #endif { return 1; diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c index 2687bee96ec9..2a5030745586 100644 --- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c +++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -61,7 +61,6 @@ static void disable_gpu_power_control(struct kbase_device *kbdev) clk_disable_unprepare(kbdev->clocks[i]); WARN_ON(__clk_is_enabled(kbdev->clocks[i])); } - } #if defined(CONFIG_REGULATOR) @@ -135,7 +134,7 @@ static void pm_callback_power_off(struct kbase_device *kbdev) /* Power down the GPU immediately */ disable_gpu_power_control(kbdev); -#else /* MALI_USE_CSF */ +#else /* MALI_USE_CSF */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #ifdef KBASE_PM_RUNTIME @@ -217,9 +216,8 @@ static int kbase_device_runtime_init(struct kbase_device *kbdev) dev_warn(kbdev->dev, "pm_runtime not enabled"); ret = -EINVAL; } else if (atomic_read(&kbdev->dev->power.usage_count)) { - dev_warn(kbdev->dev, - "%s: Device runtime usage count unexpectedly non zero %d", - __func__, atomic_read(&kbdev->dev->power.usage_count)); + dev_warn(kbdev->dev, "%s: Device runtime usage count unexpectedly non zero %d", + __func__, atomic_read(&kbdev->dev->power.usage_count)); ret = -EINVAL; } @@ -231,9 +229,8 @@ static void kbase_device_runtime_disable(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "%s\n", __func__); if (atomic_read(&kbdev->dev->power.usage_count)) - dev_warn(kbdev->dev, - "%s: Device runtime usage count unexpectedly non zero %d", - __func__, atomic_read(&kbdev->dev->power.usage_count)); + dev_warn(kbdev->dev, "%s: Device runtime usage count unexpectedly non zero %d", + __func__, atomic_read(&kbdev->dev->power.usage_count)); pm_runtime_disable(kbdev->dev); } @@ -281,12 +278,12 @@ struct kbase_pm_callback_conf pm_callbacks = { .power_runtime_term_callback = kbase_device_runtime_disable, .power_runtime_on_callback = pm_callback_runtime_on, .power_runtime_off_callback = pm_callback_runtime_off, -#else /* KBASE_PM_RUNTIME */ +#else /* KBASE_PM_RUNTIME */ .power_runtime_init_callback = NULL, .power_runtime_term_callback = NULL, .power_runtime_on_callback = NULL, .power_runtime_off_callback = NULL, -#endif /* KBASE_PM_RUNTIME */ +#endif /* KBASE_PM_RUNTIME */ #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) .power_runtime_gpu_idle_callback = pm_callback_runtime_gpu_idle, diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c index 7b896b602e96..6edc02b394dd 100644 --- a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c +++ b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015, 2017, 2019, 2021, 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,7 +45,8 @@ void kbase_platform_unregister(void) #if MALI_USE_CSF int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) #else -int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, + u32 util_cl_share[2]) #endif { return 1; diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h index 06279e2f62ca..866a7de16bff 100644 --- a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2017, 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2017, 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,7 +20,7 @@ */ /** - * Power management configuration + * POWER_MANAGEMENT_CALLBACKS - Power management configuration * * Attached value: pointer to @ref kbase_pm_callback_conf * Default value: See @ref kbase_pm_callback_conf @@ -28,7 +28,7 @@ #define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) /** - * Platform specific configuration functions + * PLATFORM_FUNCS - Platform specific configuration functions * * Attached value: pointer to @ref kbase_platform_funcs_conf * Default value: See @ref kbase_platform_funcs_conf @@ -38,7 +38,7 @@ extern struct kbase_pm_callback_conf pm_callbacks; /** - * Autosuspend delay + * AUTO_SUSPEND_DELAY - Autosuspend delay * * The delay time (in milliseconds) to be used for autosuspend */ diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c index 910d4b4fd3e1..45f7638ad904 100644 --- a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c +++ b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015, 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,7 +32,6 @@ #include "mali_kbase_config_platform.h" - static struct reset_control **resets; static int nr_resets; @@ -50,14 +49,12 @@ static int resets_init(struct kbase_device *kbdev) return nr_resets; } - resets = devm_kcalloc(kbdev->dev, nr_resets, sizeof(*resets), - GFP_KERNEL); + resets = devm_kcalloc(kbdev->dev, nr_resets, sizeof(*resets), GFP_KERNEL); if (!resets) return -ENOMEM; for (i = 0; i < nr_resets; ++i) { - resets[i] = devm_reset_control_get_exclusive_by_index( - kbdev->dev, i); + resets[i] = devm_reset_control_get_exclusive_by_index(kbdev->dev, i); if (IS_ERR(resets[i])) { err = PTR_ERR(resets[i]); nr_resets = i; @@ -89,9 +86,8 @@ static int pm_callback_soft_reset(struct kbase_device *kbdev) udelay(10); /* Override Power Management Settings, values from manufacturer's defaults */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), - 0xfff | (0x20 << 16)); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PWR_KEY), 0x2968A819); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PWR_OVERRIDE1), 0xfff | (0x20 << 16)); /* * RESET_COMPLETED interrupt will be raised, so continue with @@ -203,6 +199,10 @@ static int kbase_device_runtime_init(struct kbase_device *kbdev) ret = -EINVAL; } + /* allocate resources for reset */ + if (!ret) + ret = resets_init(kbdev); + return ret; } @@ -256,10 +256,10 @@ struct kbase_pm_callback_conf pm_callbacks = { .power_runtime_term_callback = kbase_device_runtime_disable, .power_runtime_on_callback = pm_callback_runtime_on, .power_runtime_off_callback = pm_callback_runtime_off, -#else /* KBASE_PM_RUNTIME */ +#else /* KBASE_PM_RUNTIME */ .power_runtime_init_callback = NULL, .power_runtime_term_callback = NULL, .power_runtime_on_callback = NULL, .power_runtime_off_callback = NULL, -#endif /* KBASE_PM_RUNTIME */ +#endif /* KBASE_PM_RUNTIME */ }; diff --git a/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c index 8add708d0f8a..4cfd7ffb09f9 100644 --- a/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c +++ b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,10 +32,7 @@ static struct kbase_io_resources io_resources = { .job_irq_number = 68, .mmu_irq_number = 69, .gpu_irq_number = 70, - .io_memory_region = { - .start = 0xFC010000, - .end = 0xFC010000 + (4096 * 4) - 1 - } + .io_memory_region = { .start = 0xFC010000, .end = 0xFC010000 + (4096 * 4) - 1 } }; #endif /* CONFIG_OF */ @@ -49,12 +46,10 @@ static void pm_callback_power_off(struct kbase_device *kbdev) { } -struct kbase_pm_callback_conf pm_callbacks = { - .power_on_callback = pm_callback_power_on, - .power_off_callback = pm_callback_power_off, - .power_suspend_callback = NULL, - .power_resume_callback = NULL -}; +struct kbase_pm_callback_conf pm_callbacks = { .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL }; static struct kbase_platform_config versatile_platform_config = { #ifndef CONFIG_OF @@ -71,7 +66,8 @@ struct kbase_platform_config *kbase_get_platform_config(void) #if MALI_USE_CSF int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) #else -int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, + u32 util_cl_share[2]) #endif { return 1; diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c index 835b7587cfbf..980682c85693 100644 --- a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c +++ b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,9 +31,7 @@ static struct kbase_io_resources io_resources = { .job_irq_number = 68, .mmu_irq_number = 69, .gpu_irq_number = 70, - .io_memory_region = { - .start = 0x2f010000, - .end = 0x2f010000 + (4096 * 4) - 1} + .io_memory_region = { .start = 0x2f010000, .end = 0x2f010000 + (4096 * 4) - 1 } }; #endif @@ -47,12 +45,10 @@ static void pm_callback_power_off(struct kbase_device *kbdev) { } -struct kbase_pm_callback_conf pm_callbacks = { - .power_on_callback = pm_callback_power_on, - .power_off_callback = pm_callback_power_off, - .power_suspend_callback = NULL, - .power_resume_callback = NULL -}; +struct kbase_pm_callback_conf pm_callbacks = { .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL }; static struct kbase_platform_config versatile_platform_config = { #ifndef CONFIG_OF @@ -69,7 +65,8 @@ struct kbase_platform_config *kbase_get_platform_config(void) #if MALI_USE_CSF int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) #else -int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, + u32 util_cl_share[2]) #endif { return 1; diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c index 8be30fb25bba..c80242184edb 100644 --- a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c +++ b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,9 +31,7 @@ static struct kbase_io_resources io_resources = { .job_irq_number = 75, .mmu_irq_number = 76, .gpu_irq_number = 77, - .io_memory_region = { - .start = 0x2F000000, - .end = 0x2F000000 + (4096 * 4) - 1} + .io_memory_region = { .start = 0x2F000000, .end = 0x2F000000 + (4096 * 4) - 1 } }; #endif @@ -47,12 +45,10 @@ static void pm_callback_power_off(struct kbase_device *kbdev) { } -struct kbase_pm_callback_conf pm_callbacks = { - .power_on_callback = pm_callback_power_on, - .power_off_callback = pm_callback_power_off, - .power_suspend_callback = NULL, - .power_resume_callback = NULL -}; +struct kbase_pm_callback_conf pm_callbacks = { .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL }; static struct kbase_platform_config versatile_platform_config = { #ifndef CONFIG_OF @@ -69,7 +65,8 @@ struct kbase_platform_config *kbase_get_platform_config(void) #if MALI_USE_CSF int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) #else -int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, + u32 util_cl_share[2]) #endif { return 1; diff --git a/drivers/gpu/arm/bifrost/protected_mode_switcher.h b/drivers/gpu/arm/bifrost/protected_mode_switcher.h index 9dd9253c7e95..88def63d4e2e 100644 --- a/drivers/gpu/arm/bifrost/protected_mode_switcher.h +++ b/drivers/gpu/arm/bifrost/protected_mode_switcher.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,10 +34,8 @@ struct protected_mode_device; * Returns 0 on success, non-zero on error */ struct protected_mode_ops { - int (*protected_mode_enable)( - struct protected_mode_device *protected_dev); - int (*protected_mode_disable)( - struct protected_mode_device *protected_dev); + int (*protected_mode_enable)(struct protected_mode_device *protected_dev); + int (*protected_mode_disable)(struct protected_mode_device *protected_dev); }; /** diff --git a/drivers/gpu/arm/bifrost/tests/Kbuild b/drivers/gpu/arm/bifrost/tests/Kbuild index 38e4dd4d712a..72ca70ac8779 100644 --- a/drivers/gpu/arm/bifrost/tests/Kbuild +++ b/drivers/gpu/arm/bifrost/tests/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -17,6 +17,7 @@ # http://www.gnu.org/licenses/gpl-2.0.html. # # +src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) ccflags-y += -I$(src)/include \ -I$(src) @@ -29,3 +30,4 @@ obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/ obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/ obj-$(CONFIG_MALI_KUTF_MGM_INTEGRATION) += mali_kutf_mgm_integration_test/ + diff --git a/drivers/gpu/arm/bifrost/tests/Kconfig b/drivers/gpu/arm/bifrost/tests/Kconfig index e9fe22771416..aa011bac8990 100644 --- a/drivers/gpu/arm/bifrost/tests/Kconfig +++ b/drivers/gpu/arm/bifrost/tests/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -65,5 +65,6 @@ config MALI_KUTF_MGM_INTEGRATION_TEST - mali_kutf_mgm_integration_test.ko + comment "Enable MALI_BIFROST_DEBUG for KUTF modules support" depends on MALI_BIFROST && !MALI_BIFROST_DEBUG && MALI_KUTF diff --git a/drivers/gpu/arm/bifrost/tests/build.bp b/drivers/gpu/arm/bifrost/tests/build.bp index 5581ba934cd3..c9c59afbf01f 100644 --- a/drivers/gpu/arm/bifrost/tests/build.bp +++ b/drivers/gpu/arm/bifrost/tests/build.bp @@ -37,10 +37,20 @@ bob_defaults { unit_test_kernel_modules: { kbuild_options: ["CONFIG_UNIT_TEST_KERNEL_MODULES=y"], }, + cflags: [ + "-Wno-sign-compare", + "-Wno-unused-but-set-variable", + "-Wno-unused-parameter", + ], } bob_defaults { name: "kernel_unit_tests", add_to_alias: ["unit_tests"], srcs: [".*_unit_test/"], + cflags: [ + "-Wno-sign-compare", + "-Wno-unused-but-set-variable", + "-Wno-unused-parameter", + ], } diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h index 3f68efa4257d..e70d2b197ba7 100644 --- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h +++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -70,8 +70,7 @@ char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size); * Return: 0 on success, -EFAULT if the line cannot be copied from user space, * -ENOMEM if out of memory. */ -int kutf_helper_input_enqueue(struct kutf_context *context, - const char __user *str, size_t size); +int kutf_helper_input_enqueue(struct kutf_context *context, const char __user *str, size_t size); /** * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent @@ -106,4 +105,4 @@ void kutf_helper_ignore_dmesg(struct device *dev); */ void kutf_helper_stop_ignoring_dmesg(struct device *dev); -#endif /* _KERNEL_UTF_HELPERS_H_ */ +#endif /* _KERNEL_UTF_HELPERS_H_ */ diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h index e147cbb90154..4a05ba9c3ddb 100644 --- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h +++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,7 +30,6 @@ #include #include - #define KUTF_HELPER_MAX_VAL_NAME_LEN 255 enum kutf_helper_valtype { @@ -86,7 +85,6 @@ enum kutf_helper_err { KUTF_HELPER_ERR_INVALID_VALUE, }; - /* Send named NAME=value pair, u64 value * * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long @@ -95,8 +93,7 @@ enum kutf_helper_err { * * Returns 0 on success, non-zero on failure */ -int kutf_helper_send_named_u64(struct kutf_context *context, - const char *val_name, u64 val); +int kutf_helper_send_named_u64(struct kutf_context *context, const char *val_name, u64 val); /* Get the maximum length of a string that can be represented as a particular * NAME="value" pair without string-value truncation in the kernel's buffer @@ -126,8 +123,8 @@ int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz); * * Returns 0 on success, non-zero on failure */ -int kutf_helper_send_named_str(struct kutf_context *context, - const char *val_name, const char *val_str); +int kutf_helper_send_named_str(struct kutf_context *context, const char *val_name, + const char *val_str); /* Receive named NAME=value pair * @@ -143,9 +140,8 @@ int kutf_helper_send_named_str(struct kutf_context *context, * file, positive value indicates an enum kutf_helper_err value for correct * reception of data but invalid parsing */ -int kutf_helper_receive_named_val( - struct kutf_context *context, - struct kutf_helper_named_val *named_val); +int kutf_helper_receive_named_val(struct kutf_context *context, + struct kutf_helper_named_val *named_val); /* Receive and validate NAME=value pair * @@ -171,14 +167,11 @@ int kutf_helper_receive_named_val( * The rationale behind this is that we'd prefer to continue the rest of the * test with failures propagated, rather than hitting a timeout */ -int kutf_helper_receive_check_val( - struct kutf_helper_named_val *named_val, - struct kutf_context *context, - const char *expect_val_name, - enum kutf_helper_valtype expect_val_type); +int kutf_helper_receive_check_val(struct kutf_helper_named_val *named_val, + struct kutf_context *context, const char *expect_val_name, + enum kutf_helper_valtype expect_val_type); /* Output a named value to kmsg */ void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val); - -#endif /* _KERNEL_UTF_HELPERS_USER_H_ */ +#endif /* _KERNEL_UTF_HELPERS_USER_H_ */ diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h index 5d4d96ef39b1..14035ccc3b93 100644 --- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h +++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -69,4 +69,4 @@ void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size); * @pool: The memory pool to free */ void kutf_mempool_destroy(struct kutf_mempool *pool); -#endif /* _KERNEL_UTF_MEM_H_ */ +#endif /* _KERNEL_UTF_MEM_H_ */ diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h index 2fb1a47a59b4..6d70cb1544e9 100644 --- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h +++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -55,18 +55,18 @@ */ enum kutf_result_status { KUTF_RESULT_BENCHMARK = -3, - KUTF_RESULT_SKIP = -2, + KUTF_RESULT_SKIP = -2, KUTF_RESULT_UNKNOWN = -1, - KUTF_RESULT_PASS = 0, - KUTF_RESULT_DEBUG = 1, - KUTF_RESULT_INFO = 2, - KUTF_RESULT_WARN = 3, - KUTF_RESULT_FAIL = 4, - KUTF_RESULT_FATAL = 5, - KUTF_RESULT_ABORT = 6, + KUTF_RESULT_PASS = 0, + KUTF_RESULT_DEBUG = 1, + KUTF_RESULT_INFO = 2, + KUTF_RESULT_WARN = 3, + KUTF_RESULT_FAIL = 4, + KUTF_RESULT_FATAL = 5, + KUTF_RESULT_ABORT = 6, - KUTF_RESULT_USERDATA = 7, + KUTF_RESULT_USERDATA = 7, KUTF_RESULT_USERDATA_WAIT = 8, KUTF_RESULT_TEST_FINISHED = 9 }; @@ -90,9 +90,9 @@ struct kutf_context; * @message: A more verbose status message. */ struct kutf_result { - struct list_head node; - enum kutf_result_status status; - const char *message; + struct list_head node; + enum kutf_result_status status; + const char *message; }; /** @@ -112,9 +112,9 @@ struct kutf_result { * @flags: Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT */ struct kutf_result_set { - struct list_head results; - wait_queue_head_t waitq; - int flags; + struct list_head results; + wait_queue_head_t waitq; + int flags; }; /** @@ -134,8 +134,8 @@ struct kutf_result_set *kutf_create_result_set(void); * * Return: 0 if the result is successfully added. -ENOMEM if allocation fails. */ -int kutf_add_result(struct kutf_context *context, - enum kutf_result_status status, const char *message); +int kutf_add_result(struct kutf_context *context, enum kutf_result_status status, + const char *message); /** * kutf_remove_result() - Remove a result from the head of a result set. @@ -146,8 +146,7 @@ int kutf_add_result(struct kutf_context *context, * * Return: result or ERR_PTR if interrupted */ -struct kutf_result *kutf_remove_result( - struct kutf_result_set *set); +struct kutf_result *kutf_remove_result(struct kutf_result_set *set); /** * kutf_destroy_result_set() - Free a previously created result set. @@ -175,6 +174,6 @@ void kutf_set_waiting_for_input(struct kutf_result_set *set); */ void kutf_clear_waiting_for_input(struct kutf_result_set *set); -#endif /* __KERNEL__ */ +#endif /* __KERNEL__ */ -#endif /* _KERNEL_UTF_RESULTSET_H_ */ +#endif /* _KERNEL_UTF_RESULTSET_H_ */ diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h index 9e459c556013..c7702550d346 100644 --- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h +++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -48,28 +48,28 @@ * value; tests without a more specific class must be marked with the flag * KUTF_F_TEST_GENERIC. */ -#define KUTF_F_TEST_NONE ((unsigned int)(0)) +#define KUTF_F_TEST_NONE ((unsigned int)(0)) /** * KUTF_F_TEST_SMOKETEST - Class indicating this test is a smoke test. * A given set of smoke tests should be quick to run, enabling rapid turn-around * of "regress-on-commit" test runs. */ -#define KUTF_F_TEST_SMOKETEST ((unsigned int)(1 << 1)) +#define KUTF_F_TEST_SMOKETEST ((unsigned int)(1 << 1)) /** * KUTF_F_TEST_PERFORMANCE - Class indicating this test is a performance test. * These tests typically produce a performance metric, such as "time to run" or * "frames per second", */ -#define KUTF_F_TEST_PERFORMANCE ((unsigned int)(1 << 2)) +#define KUTF_F_TEST_PERFORMANCE ((unsigned int)(1 << 2)) /** * KUTF_F_TEST_DEPRECATED - Class indicating that this test is a deprecated test. * These tests have typically been replaced by an alternative test which is * more efficient, or has better coverage. */ -#define KUTF_F_TEST_DEPRECATED ((unsigned int)(1 << 3)) +#define KUTF_F_TEST_DEPRECATED ((unsigned int)(1 << 3)) /** * KUTF_F_TEST_EXPECTED_FAILURE - Class indicating that this test is a known failure. @@ -80,7 +80,7 @@ * results database and web UI, as this means there is no need to modify the * test code. */ -#define KUTF_F_TEST_EXPECTED_FAILURE ((unsigned int)(1 << 4)) +#define KUTF_F_TEST_EXPECTED_FAILURE ((unsigned int)(1 << 4)) /** * KUTF_F_TEST_GENERIC - Class indicating that this test is a generic test, @@ -88,14 +88,14 @@ * Tests which are not created with a specific set * of filter flags by the user are assigned this test class by default. */ -#define KUTF_F_TEST_GENERIC ((unsigned int)(1 << 5)) +#define KUTF_F_TEST_GENERIC ((unsigned int)(1 << 5)) /** * KUTF_F_TEST_RESFAIL - Class indicating this test is a resource allocation failure test. * A resource allocation failure test will test that an error code is * correctly propagated when an allocation fails. */ -#define KUTF_F_TEST_RESFAIL ((unsigned int)(1 << 6)) +#define KUTF_F_TEST_RESFAIL ((unsigned int)(1 << 6)) /** * KUTF_F_TEST_EXPECTED_FAILURE_RF - Additional flag indicating that this test @@ -147,7 +147,7 @@ /** * KUTF_F_TEST_ALL - Pseudo-flag indicating that all test classes should be executed. */ -#define KUTF_F_TEST_ALL ((unsigned int)(0xFFFFFFFFU)) +#define KUTF_F_TEST_ALL ((unsigned int)(0xFFFFFFFFU)) /** * union kutf_callback_data - Union used to store test callback data @@ -157,7 +157,7 @@ */ union kutf_callback_data { void *ptr_value; - u32 u32_value; + u32 u32_value; }; /** @@ -183,7 +183,7 @@ struct kutf_userdata_line { * the warning followed by 'cat' exiting due to EOF - which is much more user * friendly than blocking indefinitely waiting for user data. */ -#define KUTF_USERDATA_WARNING_OUTPUT 1 +#define KUTF_USERDATA_WARNING_OUTPUT 1 /** * struct kutf_userdata - Structure holding user data @@ -201,38 +201,40 @@ struct kutf_userdata { /** * struct kutf_context - Structure representing a kernel test context - * @kref: Refcount for number of users of this context - * @suite: Convenience pointer to the suite this context - * is running - * @test_fix: The fixture that is being run in this context - * @fixture_pool: The memory pool used for the duration of - * the fixture/text context. - * @fixture: The user provided fixture structure. - * @fixture_index: The index (id) of the current fixture. - * @fixture_name: The name of the current fixture (or NULL if unnamed). - * @test_data: Any user private data associated with this test - * @result_set: All the results logged by this test context - * @status: The status of the currently running fixture. - * @expected_status: The expected status on exist of the currently - * running fixture. - * @work: Work item to enqueue onto the work queue to run the test - * @userdata: Structure containing the user data for the test to read + * @kref: Refcount for number of users of this context + * @suite: Convenience pointer to the suite this context + * is running + * @test_fix: The fixture that is being run in this context + * @fixture_pool: The memory pool used for the duration of + * the fixture/text context. + * @fixture: The user provided fixture structure. + * @fixture_index: The index (id) of the current fixture. + * @fixture_name: The name of the current fixture (or NULL if unnamed). + * @test_data: Any user private data associated with this test + * @result_set: All the results logged by this test context + * @output_sync: Mutex to serialize test failure output. + * @status: The status of the currently running fixture. + * @expected_status: The expected status on exist of the currently + * running fixture. + * @work: Work item to enqueue onto the work queue to run the test + * @userdata: Structure containing the user data for the test to read */ struct kutf_context { - struct kref kref; - struct kutf_suite *suite; - struct kutf_test_fixture *test_fix; - struct kutf_mempool fixture_pool; - void *fixture; - unsigned int fixture_index; - const char *fixture_name; - union kutf_callback_data test_data; - struct kutf_result_set *result_set; - enum kutf_result_status status; - enum kutf_result_status expected_status; + struct kref kref; + struct kutf_suite *suite; + struct kutf_test_fixture *test_fix; + struct kutf_mempool fixture_pool; + void *fixture; + unsigned int fixture_index; + const char *fixture_name; + union kutf_callback_data test_data; + struct kutf_result_set *result_set; + struct mutex output_sync; + enum kutf_result_status status; + enum kutf_result_status expected_status; - struct work_struct work; - struct kutf_userdata userdata; + struct work_struct work; + struct kutf_userdata userdata; }; /** @@ -252,16 +254,16 @@ struct kutf_context { * part of this suite */ struct kutf_suite { - struct kutf_application *app; - const char *name; - union kutf_callback_data suite_data; + struct kutf_application *app; + const char *name; + union kutf_callback_data suite_data; void *(*create_fixture)(struct kutf_context *context); - void (*remove_fixture)(struct kutf_context *context); - unsigned int fixture_variants; - unsigned int suite_default_flags; - struct list_head node; - struct dentry *dir; - struct list_head test_list; + void (*remove_fixture)(struct kutf_context *context); + unsigned int fixture_variants; + unsigned int suite_default_flags; + struct list_head node; + struct dentry *dir; + struct list_head test_list; }; /** =========================================================================== @@ -309,12 +311,10 @@ void kutf_destroy_application(struct kutf_application *app); * Return: pointer to the created kutf_suite on success or NULL * on failure */ -struct kutf_suite *kutf_create_suite( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context)); +struct kutf_suite *kutf_create_suite(struct kutf_application *app, const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context)); /** * kutf_create_suite_with_filters() - Create a kernel test suite with user @@ -337,12 +337,9 @@ struct kutf_suite *kutf_create_suite( * Return: pointer to the created kutf_suite on success or NULL on failure */ struct kutf_suite *kutf_create_suite_with_filters( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context), - unsigned int filters); + struct kutf_application *app, const char *name, unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), unsigned int filters); /** * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with @@ -362,14 +359,12 @@ struct kutf_suite *kutf_create_suite_with_filters( * Return: pointer to the created kutf_suite on success or NULL * on failure */ -struct kutf_suite *kutf_create_suite_with_filters_and_data( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context), - unsigned int filters, - union kutf_callback_data suite_data); +struct kutf_suite * +kutf_create_suite_with_filters_and_data(struct kutf_application *app, const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), + unsigned int filters, union kutf_callback_data suite_data); /** * kutf_add_test() - Add a test to a kernel test suite. @@ -380,10 +375,8 @@ struct kutf_suite *kutf_create_suite_with_filters_and_data( * * Note: As no filters are provided the test will use the suite filters instead */ -void kutf_add_test(struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context)); +void kutf_add_test(struct kutf_suite *suite, unsigned int id, const char *name, + void (*execute)(struct kutf_context *context)); /** * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters @@ -393,11 +386,9 @@ void kutf_add_test(struct kutf_suite *suite, * @execute: Callback to the test function to run. * @filters: A set of filtering flags, assigning test categories. */ -void kutf_add_test_with_filters(struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context), - unsigned int filters); +void kutf_add_test_with_filters(struct kutf_suite *suite, unsigned int id, const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters); /** * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite @@ -410,13 +401,10 @@ void kutf_add_test_with_filters(struct kutf_suite *suite, * @test_data: Test specific callback data, provided during the * running of the test in the kutf_context */ -void kutf_add_test_with_filters_and_data( - struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context), - unsigned int filters, - union kutf_callback_data test_data); +void kutf_add_test_with_filters_and_data(struct kutf_suite *suite, unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters, union kutf_callback_data test_data); /** =========================================================================== * Test functions @@ -430,10 +418,8 @@ void kutf_add_test_with_filters_and_data( * @message: The message for this result * @new_status: The result status of this log message */ -void kutf_test_log_result_external( - struct kutf_context *context, - const char *message, - enum kutf_result_status new_status); +void kutf_test_log_result_external(struct kutf_context *context, const char *message, + enum kutf_result_status new_status); /** * kutf_test_expect_abort() - Tell the kernel that you expect the current @@ -525,6 +511,21 @@ void kutf_test_debug(struct kutf_context *context, char const *message); */ void kutf_test_info(struct kutf_context *context, char const *message); +/** + * kutf_test_info_msg() - Send a formatted information message. + * + * @context: The test context this test is running in. + * @msg: A format string with the failure message. + * @...: Additional parameters corresponding to the format flags of the + * format string. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a prebaked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_info_msg(struct kutf_context *context, char const *msg, ...) __printf(2, 3); + /** * kutf_test_warn() - Send a warning message * @context: The test context this test is running in. @@ -549,6 +550,16 @@ void kutf_test_warn(struct kutf_context *context, char const *message); */ void kutf_test_fail(struct kutf_context *context, char const *message); +/** + * kutf_test_fail_msg() - Send a formatted failure message. + * + * @context: The test context this test is running in. + * @msg: A format string with the failure message. + * @...: Additional parameters corresponding to the format flags of the + * format string. + */ +void kutf_test_fail_msg(struct kutf_context *context, char const *msg, ...) __printf(2, 3); + /** * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error * @context: The test context this test is running in. @@ -568,4 +579,4 @@ void kutf_test_fatal(struct kutf_context *context, char const *message); */ void kutf_test_abort(struct kutf_context *context); -#endif /* _KERNEL_UTF_SUITE_H_ */ +#endif /* _KERNEL_UTF_SUITE_H_ */ diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h index f6e758b80d98..67dac7b516a6 100644 --- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h +++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,7 +36,7 @@ * KUTF_MAX_DSPRINTF_LEN - Maximum size of the message strings within * kernel UTF, messages longer then this will be truncated. */ -#define KUTF_MAX_DSPRINTF_LEN 1024 +#define KUTF_MAX_DSPRINTF_LEN 1024 /** * kutf_dsprintf() - dynamic sprintf @@ -53,8 +53,6 @@ * * Return: Returns pointer to allocated string, or NULL on error. */ -const char *kutf_dsprintf(struct kutf_mempool *pool, - const char *fmt, ...) __printf(2, 3); +const char *kutf_dsprintf(struct kutf_mempool *pool, const char *fmt, ...) __printf(2, 3); - -#endif /* _KERNEL_UTF_UTILS_H_ */ +#endif /* _KERNEL_UTF_UTILS_H_ */ diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c index 42736195e071..e0fe59865c93 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,7 +59,7 @@ char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) spin_unlock(&kutf_input_lock); err = wait_event_interruptible(context->userdata.input_waitq, - kutf_helper_pending_input(context)); + kutf_helper_pending_input(context)); if (err) return ERR_PTR(-EINTR); @@ -67,8 +67,7 @@ char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) spin_lock(&kutf_input_lock); } - line = list_first_entry(&context->userdata.input_head, - struct kutf_userdata_line, node); + line = list_first_entry(&context->userdata.input_head, struct kutf_userdata_line, node); if (line->str) { /* * Unless it is the end-of-input marker, @@ -84,13 +83,11 @@ char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) return line->str; } -int kutf_helper_input_enqueue(struct kutf_context *context, - const char __user *str, size_t size) +int kutf_helper_input_enqueue(struct kutf_context *context, const char __user *str, size_t size) { struct kutf_userdata_line *line; - line = kutf_mempool_alloc(&context->fixture_pool, - sizeof(*line) + size + 1); + line = kutf_mempool_alloc(&context->fixture_pool, sizeof(*line) + size + 1); if (!line) return -ENOMEM; if (str) { diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c index c4e294325262..8654fd503960 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -60,7 +60,8 @@ static int validate_val_name(const char *val_str, int str_len) { int i = 0; - for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) { + for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; + ++i, --str_len) { char val_chr = val_str[i]; if (val_chr >= 'A' && val_chr <= 'Z') @@ -78,7 +79,8 @@ static int validate_val_name(const char *val_str, int str_len) if (i == 0) return 1; /* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */ - if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0')) + if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || + (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0')) return 1; return 0; @@ -97,19 +99,18 @@ static int find_quoted_string_valid_len(const char *str) ptr = strpbrk(str, check_chars); if (ptr) - return (int)(ptr-str); + return (int)(ptr - str); return (int)strlen(str); } -static int kutf_helper_userdata_enqueue(struct kutf_context *context, - const char *str) +static int kutf_helper_userdata_enqueue(struct kutf_context *context, const char *str) { char *str_copy; size_t len; int err; - len = strlen(str)+1; + len = strlen(str) + 1; str_copy = kutf_mempool_alloc(&context->fixture_pool, len); if (!str_copy) @@ -126,8 +127,7 @@ static int kutf_helper_userdata_enqueue(struct kutf_context *context, /* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */ #define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1) -int kutf_helper_send_named_u64(struct kutf_context *context, - const char *val_name, u64 val) +int kutf_helper_send_named_u64(struct kutf_context *context, const char *val_name, u64 val) { int ret = 1; char msgbuf[NAMED_U64_VAL_BUF_SZ]; @@ -135,23 +135,25 @@ int kutf_helper_send_named_u64(struct kutf_context *context, if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send u64 value named '%s': Invalid value name", val_name); + "Failed to send u64 value named '%s': Invalid value name", + val_name); goto out_err; } ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val); if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) { - errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d", - val_name, NAMED_U64_VAL_BUF_SZ, ret); + errmsg = kutf_dsprintf( + &context->fixture_pool, + "Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d", + val_name, NAMED_U64_VAL_BUF_SZ, ret); goto out_err; } ret = kutf_helper_userdata_enqueue(context, msgbuf); if (ret) { errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send u64 value named '%s': send returned %d", - val_name, ret); + "Failed to send u64 value named '%s': send returned %d", + val_name, ret); goto out_err; } @@ -166,8 +168,7 @@ EXPORT_SYMBOL(kutf_helper_send_named_u64); #define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\"" #define NAMED_STR_END_DELIM "\"" -int kutf_helper_max_str_len_for_kern(const char *val_name, - int kern_buf_sz) +int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz) { const int val_name_len = strlen(val_name); const int start_delim_len = strlen(NAMED_STR_START_DELIM); @@ -175,16 +176,14 @@ int kutf_helper_max_str_len_for_kern(const char *val_name, int max_msg_len = kern_buf_sz; int max_str_len; - max_str_len = max_msg_len - val_name_len - start_delim_len - - end_delim_len; + max_str_len = max_msg_len - val_name_len - start_delim_len - end_delim_len; return max_str_len; } EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern); -int kutf_helper_send_named_str(struct kutf_context *context, - const char *val_name, - const char *val_str) +int kutf_helper_send_named_str(struct kutf_context *context, const char *val_name, + const char *val_str) { int val_str_len; int str_buf_sz; @@ -198,7 +197,8 @@ int kutf_helper_send_named_str(struct kutf_context *context, if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send u64 value named '%s': Invalid value name", val_name); + "Failed to send u64 value named '%s': Invalid value name", + val_name); goto out_err; } val_name_len = strlen(val_name); @@ -213,9 +213,10 @@ int kutf_helper_send_named_str(struct kutf_context *context, */ str_buf = kmalloc(str_buf_sz, GFP_KERNEL); if (!str_buf) { - errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d", - val_name, str_buf_sz); + errmsg = kutf_dsprintf( + &context->fixture_pool, + "Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d", + val_name, str_buf_sz); goto out_err; } copy_ptr = str_buf; @@ -247,8 +248,8 @@ int kutf_helper_send_named_str(struct kutf_context *context, if (ret) { errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send str value named '%s': send returned %d", - val_name, ret); + "Failed to send str value named '%s': send returned %d", + val_name, ret); goto out_err; } @@ -262,9 +263,8 @@ out_err: } EXPORT_SYMBOL(kutf_helper_send_named_str); -int kutf_helper_receive_named_val( - struct kutf_context *context, - struct kutf_helper_named_val *named_val) +int kutf_helper_receive_named_val(struct kutf_context *context, + struct kutf_helper_named_val *named_val) { size_t recv_sz; char *recv_str; @@ -298,8 +298,7 @@ int kutf_helper_receive_named_val( } } if (!name_str) { - pr_err("Invalid name part for received string '%s'\n", - recv_str); + pr_err("Invalid name part for received string '%s'\n", recv_str); return KUTF_HELPER_ERR_INVALID_NAME; } @@ -324,11 +323,13 @@ int kutf_helper_receive_named_val( recv_sz -= (strval_len + 1); type = KUTF_HELPER_VALTYPE_STR; } else { - pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str); + pr_err("String value contains invalid characters in rest of received string '%s'\n", + recv_str); err = KUTF_HELPER_ERR_CHARS_AFTER_VAL; } } else { - pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str); + pr_err("End of string delimiter not found in rest of received string '%s'\n", + recv_str); err = KUTF_HELPER_ERR_NO_END_DELIMITER; } } else { @@ -345,7 +346,8 @@ int kutf_helper_receive_named_val( recv_sz -= len_remain; } else { /* special case: not a number, report as such */ - pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str); + pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", + recv_str); } } @@ -354,8 +356,8 @@ int kutf_helper_receive_named_val( /* Any remaining characters - error */ if (strnlen(recv_str, recv_sz) != 0) { - pr_err("Characters remain after value of type %s: '%s'\n", - get_val_type_name(type), recv_str); + pr_err("Characters remain after value of type %s: '%s'\n", get_val_type_name(type), + recv_str); return KUTF_HELPER_ERR_CHARS_AFTER_VAL; } @@ -381,44 +383,41 @@ int kutf_helper_receive_named_val( EXPORT_SYMBOL(kutf_helper_receive_named_val); #define DUMMY_MSG "" -int kutf_helper_receive_check_val( - struct kutf_helper_named_val *named_val, - struct kutf_context *context, - const char *expect_val_name, - enum kutf_helper_valtype expect_val_type) +int kutf_helper_receive_check_val(struct kutf_helper_named_val *named_val, + struct kutf_context *context, const char *expect_val_name, + enum kutf_helper_valtype expect_val_type) { int err; err = kutf_helper_receive_named_val(context, named_val); if (err < 0) { const char *msg = kutf_dsprintf(&context->fixture_pool, - "Failed to receive value named '%s'", - expect_val_name); + "Failed to receive value named '%s'", + expect_val_name); kutf_test_fail(context, msg); return err; } else if (err > 0) { - const char *msg = kutf_dsprintf(&context->fixture_pool, - "Named-value parse error when expecting value named '%s'", - expect_val_name); + const char *msg = kutf_dsprintf( + &context->fixture_pool, + "Named-value parse error when expecting value named '%s'", expect_val_name); kutf_test_fail(context, msg); goto out_fail_and_fixup; } - if (named_val->val_name != NULL && - strcmp(named_val->val_name, expect_val_name) != 0) { - const char *msg = kutf_dsprintf(&context->fixture_pool, - "Expecting to receive value named '%s' but got '%s'", - expect_val_name, named_val->val_name); + if (named_val->val_name != NULL && strcmp(named_val->val_name, expect_val_name) != 0) { + const char *msg = + kutf_dsprintf(&context->fixture_pool, + "Expecting to receive value named '%s' but got '%s'", + expect_val_name, named_val->val_name); kutf_test_fail(context, msg); goto out_fail_and_fixup; } - if (named_val->type != expect_val_type) { - const char *msg = kutf_dsprintf(&context->fixture_pool, - "Expecting value named '%s' to be of type %s but got %s", - expect_val_name, get_val_type_name(expect_val_type), - get_val_type_name(named_val->type)); + const char *msg = kutf_dsprintf( + &context->fixture_pool, + "Expecting value named '%s' to be of type %s but got %s", expect_val_name, + get_val_type_name(expect_val_type), get_val_type_name(named_val->type)); kutf_test_fail(context, msg); goto out_fail_and_fixup; } @@ -431,17 +430,16 @@ out_fail_and_fixup: case KUTF_HELPER_VALTYPE_U64: named_val->u.val_u64 = 0ull; break; - case KUTF_HELPER_VALTYPE_STR: - { - char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG)); + case KUTF_HELPER_VALTYPE_STR: { + char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG)); - if (!str) - return -1; + if (!str) + return -1; - strcpy(str, DUMMY_MSG); - named_val->u.val_str = str; - break; - } + strcpy(str, DUMMY_MSG); + named_val->u.val_str = str; + break; + } default: break; } diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c index 716970abb8c4..527ddaa3cfc4 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,6 @@ #include - /** * struct kutf_alloc_entry - Structure representing an allocation. * @node: List node for use with kutf_mempool. @@ -71,7 +70,6 @@ void kutf_mempool_destroy(struct kutf_mempool *pool) kfree(remove_alloc); } mutex_unlock(&pool->lock); - } EXPORT_SYMBOL(kutf_mempool_destroy); diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c index 3a7ade2831b3..4f62d046eb7e 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -54,9 +54,8 @@ fail_alloc: return NULL; } -int kutf_add_result(struct kutf_context *context, - enum kutf_result_status status, - const char *message) +int kutf_add_result(struct kutf_context *context, enum kutf_result_status status, + const char *message) { struct kutf_mempool *mempool = &context->fixture_pool; struct kutf_result_set *set = context->result_set; @@ -115,8 +114,7 @@ struct kutf_result *kutf_remove_result(struct kutf_result_set *set) int ret; do { - ret = wait_event_interruptible(set->waitq, - kutf_has_result(set)); + ret = wait_event_interruptible(set->waitq, kutf_has_result(set)); if (ret) return ERR_PTR(ret); @@ -124,15 +122,11 @@ struct kutf_result *kutf_remove_result(struct kutf_result_set *set) spin_lock(&kutf_result_lock); if (!list_empty(&set->results)) { - result = list_first_entry(&set->results, - struct kutf_result, - node); + result = list_first_entry(&set->results, struct kutf_result, node); list_del(&result->node); } else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) { /* Return a fake result */ - static struct kutf_result waiting = { - .status = KUTF_RESULT_USERDATA_WAIT - }; + static struct kutf_result waiting = { .status = KUTF_RESULT_USERDATA_WAIT }; result = &waiting; } /* If result == NULL then there was a race with the event diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c index 4468066f1b27..9e57c10befdf 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -49,9 +49,9 @@ * application */ struct kutf_application { - const char *name; - struct dentry *dir; - struct list_head suite_list; + const char *name; + struct dentry *dir; + struct list_head suite_list; }; /** @@ -68,14 +68,14 @@ struct kutf_application { * @dir: debugfs directory for this test function */ struct kutf_test_function { - struct kutf_suite *suite; - unsigned int filters; - unsigned int test_id; + struct kutf_suite *suite; + unsigned int filters; + unsigned int test_id; void (*execute)(struct kutf_context *context); union kutf_callback_data test_data; - struct list_head node; - struct list_head variant_list; - struct dentry *dir; + struct list_head node; + struct list_head variant_list; + struct dentry *dir; }; /** @@ -88,9 +88,9 @@ struct kutf_test_function { */ struct kutf_test_fixture { struct kutf_test_function *test_func; - unsigned int fixture_index; - struct list_head node; - struct dentry *dir; + unsigned int fixture_index; + struct list_head node; + struct dentry *dir; }; static struct dentry *base_dir; @@ -102,14 +102,14 @@ static struct workqueue_struct *kutf_workq; * @result: Status value for a single test */ struct kutf_convert_table { - char result_name[50]; + char result_name[50]; enum kutf_result_status result; }; static const struct kutf_convert_table kutf_convert[] = { -#define ADD_UTF_RESULT(_name) \ - { \ -#_name, _name, \ +#define ADD_UTF_RESULT(_name) \ + { \ +#_name, _name, \ } ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK), ADD_UTF_RESULT(KUTF_RESULT_SKIP), ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN), ADD_UTF_RESULT(KUTF_RESULT_PASS), @@ -130,8 +130,7 @@ static const struct kutf_convert_table kutf_convert[] = { * * Return: Returns the created test context on success or NULL on failure */ -static struct kutf_context *kutf_create_context( - struct kutf_test_fixture *test_fix); +static struct kutf_context *kutf_create_context(struct kutf_test_fixture *test_fix); /** * kutf_destroy_context() - Destroy a previously created test context, only @@ -166,8 +165,7 @@ static void kutf_context_put(struct kutf_context *context); * @context: Test context * @status: Result status */ -static void kutf_set_result(struct kutf_context *context, - enum kutf_result_status status); +static void kutf_set_result(struct kutf_context *context, enum kutf_result_status status); /** * kutf_set_expected_result() - Set the expected test result for the specified @@ -176,7 +174,7 @@ static void kutf_set_result(struct kutf_context *context, * @expected_status: Expected result status */ static void kutf_set_expected_result(struct kutf_context *context, - enum kutf_result_status expected_status); + enum kutf_result_status expected_status); /** * kutf_result_to_string() - Converts a KUTF result into a string @@ -187,7 +185,7 @@ static void kutf_set_expected_result(struct kutf_context *context, */ static int kutf_result_to_string(const char **result_str, enum kutf_result_status result) { - int i; + size_t i; int ret = 0; for (i = 0; i < UTF_CONVERT_SIZE; i++) { @@ -210,8 +208,8 @@ static int kutf_result_to_string(const char **result_str, enum kutf_result_statu * Return: On success, the number of bytes read and offset @ppos advanced by * this number; on error, negative value */ -static ssize_t kutf_debugfs_const_string_read(struct file *file, - char __user *buf, size_t len, loff_t *ppos) +static ssize_t kutf_debugfs_const_string_read(struct file *file, char __user *buf, size_t len, + loff_t *ppos) { char *str = file->private_data; @@ -222,7 +220,7 @@ static const struct file_operations kutf_debugfs_const_string_ops = { .owner = THIS_MODULE, .open = simple_open, .read = kutf_debugfs_const_string_read, - .llseek = default_llseek, + .llseek = default_llseek, }; /** @@ -237,41 +235,33 @@ static void kutf_add_explicit_result(struct kutf_context *context) case KUTF_RESULT_WARN: if (context->status == KUTF_RESULT_WARN) - kutf_test_pass(context, - "Pass (expected warn occurred)"); + kutf_test_pass(context, "Pass (expected warn occurred)"); else if (context->status != KUTF_RESULT_SKIP) - kutf_test_fail(context, - "Fail (expected warn missing)"); + kutf_test_fail(context, "Fail (expected warn missing)"); break; case KUTF_RESULT_FAIL: if (context->status == KUTF_RESULT_FAIL) - kutf_test_pass(context, - "Pass (expected fail occurred)"); + kutf_test_pass(context, "Pass (expected fail occurred)"); else if (context->status != KUTF_RESULT_SKIP) { /* Force the expected status so the fail gets logged */ context->expected_status = KUTF_RESULT_PASS; - kutf_test_fail(context, - "Fail (expected fail missing)"); + kutf_test_fail(context, "Fail (expected fail missing)"); } break; case KUTF_RESULT_FATAL: if (context->status == KUTF_RESULT_FATAL) - kutf_test_pass(context, - "Pass (expected fatal occurred)"); + kutf_test_pass(context, "Pass (expected fatal occurred)"); else if (context->status != KUTF_RESULT_SKIP) - kutf_test_fail(context, - "Fail (expected fatal missing)"); + kutf_test_fail(context, "Fail (expected fatal missing)"); break; case KUTF_RESULT_ABORT: if (context->status == KUTF_RESULT_ABORT) - kutf_test_pass(context, - "Pass (expected abort occurred)"); + kutf_test_pass(context, "Pass (expected abort occurred)"); else if (context->status != KUTF_RESULT_SKIP) - kutf_test_fail(context, - "Fail (expected abort missing)"); + kutf_test_fail(context, "Fail (expected abort missing)"); break; default: break; @@ -280,8 +270,7 @@ static void kutf_add_explicit_result(struct kutf_context *context) static void kutf_run_test(struct work_struct *data) { - struct kutf_context *test_context = container_of(data, - struct kutf_context, work); + struct kutf_context *test_context = container_of(data, struct kutf_context, work); struct kutf_suite *suite = test_context->suite; struct kutf_test_function *test_func; @@ -295,10 +284,13 @@ static void kutf_run_test(struct work_struct *data) test_context->fixture = suite->create_fixture(test_context); /* Only run the test if the fixture was created (if required) */ - if ((suite->create_fixture && test_context->fixture) || - (!suite->create_fixture)) { - /* Run this fixture */ - test_func->execute(test_context); + if ((suite->create_fixture && test_context->fixture) || (!suite->create_fixture)) { + if (test_func->filters & KUTF_F_TEST_EXPECTED_FAILURE) { + kutf_test_fail(test_context, + "KUTF: Test marked as 'Expected Failure' not run."); + } else { + test_func->execute(test_context); + } if (suite->remove_fixture) suite->remove_fixture(test_context); @@ -368,8 +360,7 @@ finish: * * Return: Number of bytes read. */ -static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) +static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { struct kutf_context *test_context = file->private_data; struct kutf_result *res; @@ -394,8 +385,7 @@ static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, case KUTF_RESULT_TEST_FINISHED: return 0; case KUTF_RESULT_USERDATA_WAIT: - if (test_context->userdata.flags & - KUTF_USERDATA_WARNING_OUTPUT) { + if (test_context->userdata.flags & KUTF_USERDATA_WARNING_OUTPUT) { /* * Warning message already output, * signal end-of-file @@ -403,37 +393,33 @@ static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, return 0; } - message_len = sizeof(USERDATA_WARNING_MESSAGE)-1; + message_len = sizeof(USERDATA_WARNING_MESSAGE) - 1; if (message_len > len) message_len = len; - bytes_not_copied = copy_to_user(buf, - USERDATA_WARNING_MESSAGE, - message_len); + bytes_not_copied = copy_to_user(buf, USERDATA_WARNING_MESSAGE, message_len); if (bytes_not_copied != 0) return -EFAULT; test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT; return message_len; case KUTF_RESULT_USERDATA: message_len = strlen(res->message); - if (message_len > len-1) { - message_len = len-1; + if (message_len > len - 1) { + message_len = len - 1; pr_warn("User data truncated, read not long enough\n"); } - bytes_not_copied = copy_to_user(buf, res->message, - message_len); + bytes_not_copied = copy_to_user(buf, res->message, message_len); if (bytes_not_copied != 0) { pr_warn("Failed to copy data to user space buffer\n"); return -EFAULT; } /* Finally the terminator */ - bytes_not_copied = copy_to_user(&buf[message_len], - &terminator, 1); + bytes_not_copied = copy_to_user(&buf[message_len], &terminator, 1); if (bytes_not_copied != 0) { pr_warn("Failed to copy data to user space buffer\n"); return -EFAULT; } - return message_len+1; + return message_len + 1; default: /* Fall through - this is a test result */ break; @@ -454,32 +440,28 @@ static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, /* First copy the result string */ if (kutf_str_ptr) { - bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr, - kutf_str_len); + bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr, kutf_str_len); bytes_copied += kutf_str_len - bytes_not_copied; if (bytes_not_copied) goto exit; } /* Then the separator */ - bytes_not_copied = copy_to_user(&buf[bytes_copied], - &separator, 1); + bytes_not_copied = copy_to_user(&buf[bytes_copied], &separator, 1); bytes_copied += 1 - bytes_not_copied; if (bytes_not_copied) goto exit; /* Finally Next copy the result string */ if (res->message) { - bytes_not_copied = copy_to_user(&buf[bytes_copied], - res->message, message_len); + bytes_not_copied = copy_to_user(&buf[bytes_copied], res->message, message_len); bytes_copied += message_len - bytes_not_copied; if (bytes_not_copied) goto exit; } /* Finally the terminator */ - bytes_not_copied = copy_to_user(&buf[bytes_copied], - &terminator, 1); + bytes_not_copied = copy_to_user(&buf[bytes_copied], &terminator, 1); bytes_copied += 1 - bytes_not_copied; exit: @@ -500,8 +482,8 @@ exit: * * Return: Number of bytes written */ -static ssize_t kutf_debugfs_run_write(struct file *file, - const char __user *buf, size_t len, loff_t *ppos) +static ssize_t kutf_debugfs_run_write(struct file *file, const char __user *buf, size_t len, + loff_t *ppos) { int ret = 0; struct kutf_context *test_context = file->private_data; @@ -544,7 +526,7 @@ static const struct file_operations kutf_debugfs_run_ops = { .read = kutf_debugfs_run_read, .write = kutf_debugfs_run_write, .release = kutf_debugfs_run_release, - .llseek = default_llseek, + .llseek = default_llseek, }; /** @@ -556,11 +538,10 @@ static const struct file_operations kutf_debugfs_run_ops = { * * Return: 0 on success, negative value corresponding to error code in failure */ -static int create_fixture_variant(struct kutf_test_function *test_func, - unsigned int fixture_index) +static int create_fixture_variant(struct kutf_test_function *test_func, unsigned int fixture_index) { struct kutf_test_fixture *test_fix; - char name[11]; /* Enough to print the MAX_UINT32 + the null terminator */ + char name[11]; /* Enough to print the MAX_UINT32 + the null terminator */ struct dentry *tmp; int err; @@ -592,10 +573,8 @@ static int create_fixture_variant(struct kutf_test_function *test_func, goto fail_file; } - tmp = debugfs_create_file_unsafe( - "run", 0600, test_fix->dir, - test_fix, - &kutf_debugfs_run_ops); + tmp = debugfs_create_file_unsafe("run", 0600, test_fix->dir, test_fix, + &kutf_debugfs_run_ops); if (IS_ERR_OR_NULL(tmp)) { pr_err("Failed to create debugfs file \"run\" when adding fixture\n"); /* Might not be the right error, we don't get it passed back to us */ @@ -635,13 +614,10 @@ static int ktufp_u32_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(kutfp_fops_x32_ro, ktufp_u32_get, NULL, "0x%08llx\n"); #endif -void kutf_add_test_with_filters_and_data( - struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context), - unsigned int filters, - union kutf_callback_data test_data) +void kutf_add_test_with_filters_and_data(struct kutf_suite *suite, unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters, union kutf_callback_data test_data) { struct kutf_test_function *test_func; struct dentry *tmp; @@ -670,11 +646,10 @@ void kutf_add_test_with_filters_and_data( test_func->filters = filters; #if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE - tmp = debugfs_create_file_unsafe("filters", 0004, test_func->dir, - &test_func->filters, &kutfp_fops_x32_ro); + tmp = debugfs_create_file_unsafe("filters", 0004, test_func->dir, &test_func->filters, + &kutfp_fops_x32_ro); #else - tmp = debugfs_create_x32("filters", 0004, test_func->dir, - &test_func->filters); + tmp = debugfs_create_x32("filters", 0004, test_func->dir, &test_func->filters); #endif if (IS_ERR_OR_NULL(tmp)) { pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name); @@ -683,11 +658,9 @@ void kutf_add_test_with_filters_and_data( test_func->test_id = id; #if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE - debugfs_create_u32("test_id", 0004, test_func->dir, - &test_func->test_id); + debugfs_create_u32("test_id", 0004, test_func->dir, &test_func->test_id); #else - tmp = debugfs_create_u32("test_id", 0004, test_func->dir, - &test_func->test_id); + tmp = debugfs_create_u32("test_id", 0004, test_func->dir, &test_func->test_id); if (IS_ERR_OR_NULL(tmp)) { pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name); goto fail_file; @@ -717,40 +690,26 @@ fail_alloc: } EXPORT_SYMBOL(kutf_add_test_with_filters_and_data); -void kutf_add_test_with_filters( - struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context), - unsigned int filters) +void kutf_add_test_with_filters(struct kutf_suite *suite, unsigned int id, const char *name, + void (*execute)(struct kutf_context *context), unsigned int filters) { union kutf_callback_data data; data.ptr_value = NULL; - kutf_add_test_with_filters_and_data(suite, - id, - name, - execute, - suite->suite_default_flags, - data); + kutf_add_test_with_filters_and_data(suite, id, name, execute, + suite->suite_default_flags & filters, data); } EXPORT_SYMBOL(kutf_add_test_with_filters); -void kutf_add_test(struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context)) +void kutf_add_test(struct kutf_suite *suite, unsigned int id, const char *name, + void (*execute)(struct kutf_context *context)) { union kutf_callback_data data; data.ptr_value = NULL; - kutf_add_test_with_filters_and_data(suite, - id, - name, - execute, - suite->suite_default_flags, + kutf_add_test_with_filters_and_data(suite, id, name, execute, suite->suite_default_flags, data); } EXPORT_SYMBOL(kutf_add_test); @@ -776,14 +735,12 @@ static void kutf_remove_test(struct kutf_test_function *test_func) kfree(test_func); } -struct kutf_suite *kutf_create_suite_with_filters_and_data( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context), - unsigned int filters, - union kutf_callback_data suite_data) +struct kutf_suite * +kutf_create_suite_with_filters_and_data(struct kutf_application *app, const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), + unsigned int filters, union kutf_callback_data suite_data) { struct kutf_suite *suite; struct dentry *tmp; @@ -830,43 +787,28 @@ fail_kmalloc: EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data); struct kutf_suite *kutf_create_suite_with_filters( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context), - unsigned int filters) + struct kutf_application *app, const char *name, unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), unsigned int filters) { union kutf_callback_data data; data.ptr_value = NULL; - return kutf_create_suite_with_filters_and_data(app, - name, - fixture_count, - create_fixture, - remove_fixture, - filters, - data); + return kutf_create_suite_with_filters_and_data(app, name, fixture_count, create_fixture, + remove_fixture, filters, data); } EXPORT_SYMBOL(kutf_create_suite_with_filters); -struct kutf_suite *kutf_create_suite( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context)) +struct kutf_suite *kutf_create_suite(struct kutf_application *app, const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context)) { union kutf_callback_data data; data.ptr_value = NULL; - return kutf_create_suite_with_filters_and_data(app, - name, - fixture_count, - create_fixture, - remove_fixture, - KUTF_F_TEST_GENERIC, - data); + return kutf_create_suite_with_filters_and_data(app, name, fixture_count, create_fixture, + remove_fixture, KUTF_F_TEST_GENERIC, data); } EXPORT_SYMBOL(kutf_create_suite); @@ -911,7 +853,8 @@ struct kutf_application *kutf_create_application(const char *name) tmp = debugfs_create_file("type", 0004, app->dir, "application\n", &kutf_debugfs_const_string_ops); if (IS_ERR_OR_NULL(tmp)) { - pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name); + pr_err("Failed to create debugfs file \"type\" when creating application %s\n", + name); goto fail_file; } @@ -946,15 +889,14 @@ void kutf_destroy_application(struct kutf_application *app) } EXPORT_SYMBOL(kutf_destroy_application); -static struct kutf_context *kutf_create_context( - struct kutf_test_fixture *test_fix) +static struct kutf_context *kutf_create_context(struct kutf_test_fixture *test_fix) { struct kutf_context *new_context; new_context = kmalloc(sizeof(*new_context), GFP_KERNEL); if (!new_context) { pr_err("Failed to allocate test context"); - goto fail_alloc; + goto fail_context_alloc; } new_context->result_set = kutf_create_result_set(); @@ -975,6 +917,8 @@ static struct kutf_context *kutf_create_context( new_context->fixture_name = NULL; new_context->test_data = test_fix->test_func->test_data; + mutex_init(&new_context->output_sync); + new_context->userdata.flags = 0; INIT_LIST_HEAD(&new_context->userdata.input_head); init_waitqueue_head(&new_context->userdata.input_waitq); @@ -987,7 +931,7 @@ static struct kutf_context *kutf_create_context( fail_result_set: kfree(new_context); -fail_alloc: +fail_context_alloc: return NULL; } @@ -1011,15 +955,13 @@ static void kutf_context_put(struct kutf_context *context) kref_put(&context->kref, kutf_destroy_context); } - -static void kutf_set_result(struct kutf_context *context, - enum kutf_result_status status) +static void kutf_set_result(struct kutf_context *context, enum kutf_result_status status) { context->status = status; } static void kutf_set_expected_result(struct kutf_context *context, - enum kutf_result_status expected_status) + enum kutf_result_status expected_status) { context->expected_status = expected_status; } @@ -1030,10 +972,8 @@ static void kutf_set_expected_result(struct kutf_context *context, * @message: Result string * @new_status: Result status */ -static void kutf_test_log_result( - struct kutf_context *context, - const char *message, - enum kutf_result_status new_status) +static void kutf_test_log_result(struct kutf_context *context, const char *message, + enum kutf_result_status new_status) { if (context->status < new_status) context->status = new_status; @@ -1042,10 +982,8 @@ static void kutf_test_log_result( kutf_add_result(context, new_status, message); } -void kutf_test_log_result_external( - struct kutf_context *context, - const char *message, - enum kutf_result_status new_status) +void kutf_test_log_result_external(struct kutf_context *context, const char *message, + enum kutf_result_status new_status) { kutf_test_log_result(context, message, new_status); } @@ -1095,8 +1033,9 @@ void kutf_test_skip_msg(struct kutf_context *context, const char *message) kutf_set_result(context, KUTF_RESULT_SKIP); kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); - kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool, - "Test skipped: %s", message), KUTF_RESULT_SKIP); + kutf_test_log_result(context, + kutf_dsprintf(&context->fixture_pool, "Test skipped: %s", message), + KUTF_RESULT_SKIP); kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP); } EXPORT_SYMBOL(kutf_test_skip_msg); @@ -1124,6 +1063,20 @@ void kutf_test_info(struct kutf_context *context, char const *message) } EXPORT_SYMBOL(kutf_test_info); +__printf(2, 3) void kutf_test_info_msg(struct kutf_context *context, char const *msg, ...) +{ + va_list args; + + mutex_lock(&context->output_sync); + + va_start(args, msg); + kutf_test_info(context, kutf_dsprintf(&context->fixture_pool, msg, args)); + va_end(args); + + mutex_unlock(&context->output_sync); +} +EXPORT_SYMBOL(kutf_test_info_msg); + void kutf_test_warn(struct kutf_context *context, char const *message) { kutf_test_log_result(context, message, KUTF_RESULT_WARN); @@ -1136,6 +1089,20 @@ void kutf_test_fail(struct kutf_context *context, char const *message) } EXPORT_SYMBOL(kutf_test_fail); +__printf(2, 3) void kutf_test_fail_msg(struct kutf_context *context, char const *msg, ...) +{ + va_list args; + + mutex_lock(&context->output_sync); + + va_start(args, msg); + kutf_test_fail(context, kutf_dsprintf(&context->fixture_pool, msg, args)); + va_end(args); + + mutex_unlock(&context->output_sync); +} +EXPORT_SYMBOL(kutf_test_fail_msg); + void kutf_test_fatal(struct kutf_context *context, char const *message) { kutf_test_log_result(context, message, KUTF_RESULT_FATAL); @@ -1185,7 +1152,7 @@ static void __exit exit_kutf_core(void) destroy_workqueue(kutf_workq); } -#else /* CONFIG_DEBUG_FS */ +#else /* CONFIG_DEBUG_FS */ /** * init_kutf_core - Module entry point @@ -1208,7 +1175,7 @@ static int __init init_kutf_core(void) static void __exit exit_kutf_core(void) { } -#endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_DEBUG_FS */ MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c index 21f5fadcc5f6..6454a20b8ae9 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,8 +33,7 @@ static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN]; static DEFINE_MUTEX(buffer_lock); -const char *kutf_dsprintf(struct kutf_mempool *pool, - const char *fmt, ...) +const char *kutf_dsprintf(struct kutf_mempool *pool, const char *fmt, ...) { va_list args; int len; @@ -51,7 +50,7 @@ const char *kutf_dsprintf(struct kutf_mempool *pool, goto fail_format; } - if (len >= sizeof(tmp_buffer)) { + if (len >= (int)sizeof(tmp_buffer)) { pr_warn("%s: Truncated dsprintf message %s\n", __func__, fmt); size = sizeof(tmp_buffer); } else { diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c index a6f54b61d4ad..a221aa75a191 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,7 +43,7 @@ #include "../mali_kutf_clk_rate_trace_test.h" -#define MINOR_FOR_FIRST_KBASE_DEV (-1) +#define MINOR_FOR_FIRST_KBASE_DEV (-1) /* KUTF test application pointer for this test */ static struct kutf_application *kutf_app; @@ -133,21 +133,19 @@ static struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data; #define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN) static char portal_msg_buf[PORTAL_MSG_LEN]; -static void kutf_portal_trace_write( - struct kbase_clk_rate_listener *listener, - u32 index, u32 new_rate) +static void kutf_portal_trace_write(struct kbase_clk_rate_listener *listener, u32 index, + u32 new_rate) { struct clk_trace_snapshot *snapshot; struct kutf_clk_rate_trace_fixture_data *data; if (listener == NULL) { - pr_err("%s - index: %u, new_rate: %u, listener is NULL\n", - __func__, index, new_rate); + pr_err("%s - index: %u, new_rate: %u, listener is NULL\n", __func__, index, + new_rate); return; } - data = container_of(listener, struct kutf_clk_rate_trace_fixture_data, - listener); + data = container_of(listener, struct kutf_clk_rate_trace_fixture_data, listener); lockdep_assert_held(&data->kbdev->pm.clk_rtm.lock); @@ -196,7 +194,7 @@ static void kutf_set_pm_ctx_idle(struct kutf_context *context) } static const char *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context, - struct clk_trace_portal_input *cmd) + struct clk_trace_portal_input *cmd) { struct kutf_clk_rate_trace_fixture_data *data = context->fixture; int seq = cmd->cmd_input.u.val_u64 & 0xFF; @@ -204,8 +202,7 @@ static const char *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context, const enum kbasep_clk_rate_trace_req req = cmd->portal_cmd; char const *errmsg = NULL; - WARN_ON(req != PORTAL_CMD_INC_PM_CTX_CNT && - req != PORTAL_CMD_DEC_PM_CTX_CNT); + WARN_ON(req != PORTAL_CMD_INC_PM_CTX_CNT && req != PORTAL_CMD_DEC_PM_CTX_CNT); if (req == PORTAL_CMD_INC_PM_CTX_CNT && cnt < UINT_MAX) { data->pm_ctx_cnt++; @@ -220,20 +217,19 @@ static const char *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context, } /* Skip the length check, no chance of overflow for two ints */ - snprintf(portal_msg_buf, PORTAL_MSG_LEN, - "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); + snprintf(portal_msg_buf, PORTAL_MSG_LEN, "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { pr_warn("Error in sending ack for adjusting pm_ctx_cnt\n"); errmsg = kutf_dsprintf(&context->fixture_pool, - "Error in sending ack for adjusting pm_ctx_cnt"); + "Error in sending ack for adjusting pm_ctx_cnt"); } return errmsg; } static const char *kutf_clk_trace_do_get_rate(struct kutf_context *context, - struct clk_trace_portal_input *cmd) + struct clk_trace_portal_input *cmd) { struct kutf_clk_rate_trace_fixture_data *data = context->fixture; struct kbase_device *kbdev = data->kbdev; @@ -247,8 +243,7 @@ static const char *kutf_clk_trace_do_get_rate(struct kutf_context *context, WARN_ON((cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_MGR) && (cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_TRACE)); - ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, - "{SEQ:%d, RATE:[", seq); + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, "{SEQ:%d, RATE:[", seq); for (i = 0; i < data->nclks; i++) { spin_lock(&kbdev->pm.clk_rtm.lock); @@ -260,24 +255,22 @@ static const char *kutf_clk_trace_do_get_rate(struct kutf_context *context, spin_unlock(&kbdev->pm.clk_rtm.lock); if ((i + 1) == data->nclks) - ret += snprintf(portal_msg_buf + ret, - PORTAL_MSG_LEN - ret, "0x%lx], GPU_IDLE:%d}", - rate, idle); + ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret, + "0x%lx], GPU_IDLE:%d}", rate, idle); else - ret += snprintf(portal_msg_buf + ret, - PORTAL_MSG_LEN - ret, "0x%lx, ", rate); + ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret, "0x%lx, ", + rate); if (ret >= PORTAL_MSG_LEN) { pr_warn("Message buf overflow with rate array data\n"); return kutf_dsprintf(&context->fixture_pool, - "Message buf overflow with rate array data"); + "Message buf overflow with rate array data"); } } if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { pr_warn("Error in sending back rate array\n"); - errmsg = kutf_dsprintf(&context->fixture_pool, - "Error in sending rate array"); + errmsg = kutf_dsprintf(&context->fixture_pool, "Error in sending rate array"); } return errmsg; @@ -297,7 +290,7 @@ static const char *kutf_clk_trace_do_get_rate(struct kutf_context *context, * Return: generated string */ static const char *kutf_clk_trace_do_get_snapshot(struct kutf_context *context, - struct clk_trace_portal_input *cmd) + struct clk_trace_portal_input *cmd) { struct kutf_clk_rate_trace_fixture_data *data = context->fixture; struct clk_trace_snapshot snapshot; @@ -309,8 +302,7 @@ static const char *kutf_clk_trace_do_get_snapshot(struct kutf_context *context, WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_TRACE_SNAPSHOT); - ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, - "{SEQ:%d, SNAPSHOT_ARRAY:[", seq); + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, "{SEQ:%d, SNAPSHOT_ARRAY:[", seq); for (i = 0; i < data->nclks; i++) { spin_lock(&data->kbdev->pm.clk_rtm.lock); @@ -327,20 +319,19 @@ static const char *kutf_clk_trace_do_get_snapshot(struct kutf_context *context, fmt = "(0x%lx, 0x%lx, %u, %u)]}"; else fmt = "(0x%lx, 0x%lx, %u, %u), "; - ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret, - fmt, snapshot.previous_rate, snapshot.current_rate, - snapshot.rate_up_cnt, snapshot.rate_down_cnt); + ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret, fmt, + snapshot.previous_rate, snapshot.current_rate, snapshot.rate_up_cnt, + snapshot.rate_down_cnt); if (ret >= PORTAL_MSG_LEN) { pr_warn("Message buf overflow with snapshot data\n"); return kutf_dsprintf(&context->fixture_pool, - "Message buf overflow with snapshot data"); + "Message buf overflow with snapshot data"); } } if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { pr_warn("Error in sending back snapshot array\n"); - errmsg = kutf_dsprintf(&context->fixture_pool, - "Error in sending snapshot array"); + errmsg = kutf_dsprintf(&context->fixture_pool, "Error in sending snapshot array"); } return errmsg; @@ -356,9 +347,8 @@ static const char *kutf_clk_trace_do_get_snapshot(struct kutf_context *context, * * Return: generated string */ -static const char *kutf_clk_trace_do_invoke_notify_42k( - struct kutf_context *context, - struct clk_trace_portal_input *cmd) +static const char *kutf_clk_trace_do_invoke_notify_42k(struct kutf_context *context, + struct clk_trace_portal_input *cmd) { struct kutf_clk_rate_trace_fixture_data *data = context->fixture; int seq = cmd->cmd_input.u.val_u64 & 0xFF; @@ -372,32 +362,30 @@ static const char *kutf_clk_trace_do_invoke_notify_42k( spin_lock(&clk_rtm->lock); data->invoke_notify = true; - kbase_clk_rate_trace_manager_notify_all( - clk_rtm, 0, new_rate_hz); + kbase_clk_rate_trace_manager_notify_all(clk_rtm, 0, new_rate_hz); data->invoke_notify = false; spin_unlock(&clk_rtm->lock); - ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, - "{SEQ:%d, HZ:%lu}", seq, new_rate_hz); + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, "{SEQ:%d, HZ:%lu}", seq, new_rate_hz); if (ret >= PORTAL_MSG_LEN) { pr_warn("Message buf overflow with invoked data\n"); return kutf_dsprintf(&context->fixture_pool, - "Message buf overflow with invoked data"); + "Message buf overflow with invoked data"); } if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { pr_warn("Error in sending ack for " INVOKE_NOTIFY_42KHZ "request\n"); errmsg = kutf_dsprintf(&context->fixture_pool, - "Error in sending ack for " INVOKE_NOTIFY_42KHZ "request"); + "Error in sending ack for " INVOKE_NOTIFY_42KHZ "request"); } return errmsg; } static const char *kutf_clk_trace_do_close_portal(struct kutf_context *context, - struct clk_trace_portal_input *cmd) + struct clk_trace_portal_input *cmd) { struct kutf_clk_rate_trace_fixture_data *data = context->fixture; int seq = cmd->cmd_input.u.val_u64 & 0xFF; @@ -408,13 +396,12 @@ static const char *kutf_clk_trace_do_close_portal(struct kutf_context *context, data->server_state = PORTAL_STATE_CLOSING; /* Skip the length check, no chance of overflow for two ints */ - snprintf(portal_msg_buf, PORTAL_MSG_LEN, - "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); + snprintf(portal_msg_buf, PORTAL_MSG_LEN, "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); errmsg = kutf_dsprintf(&context->fixture_pool, - "Error in sending ack for " CLOSE_PORTAL "reuquest"); + "Error in sending ack for " CLOSE_PORTAL "reuquest"); } return errmsg; @@ -430,9 +417,8 @@ static const char *kutf_clk_trace_do_close_portal(struct kutf_context *context, * * Return: A string to indicate the platform (PV/PTM/GPU/UNKNOWN) */ -static const char *kutf_clk_trace_do_get_platform( - struct kutf_context *context, - struct clk_trace_portal_input *cmd) +static const char *kutf_clk_trace_do_get_platform(struct kutf_context *context, + struct clk_trace_portal_input *cmd) { int seq = cmd->cmd_input.u.val_u64 & 0xFF; char const *errmsg = NULL; @@ -442,17 +428,16 @@ static const char *kutf_clk_trace_do_get_platform( #if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) struct kutf_clk_rate_trace_fixture_data *data = context->fixture; - arbiter_if_node = - of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL); + arbiter_if_node = of_get_property(data->kbdev->dev->of_node, "arbiter-if", NULL); + if (!arbiter_if_node) + arbiter_if_node = of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL); #endif if (arbiter_if_node) { - power_node = of_find_compatible_node(NULL, NULL, - "arm,mali-gpu-power"); + power_node = of_find_compatible_node(NULL, NULL, "arm,mali-gpu-power"); if (power_node) { platform = "PV"; } else { - power_node = of_find_compatible_node(NULL, NULL, - "arm,mali-ptm"); + power_node = of_find_compatible_node(NULL, NULL, "arm,mali-ptm"); if (power_node) platform = "PTM"; else @@ -463,36 +448,33 @@ static const char *kutf_clk_trace_do_get_platform( } pr_debug("%s - platform is %s\n", __func__, platform); - snprintf(portal_msg_buf, PORTAL_MSG_LEN, - "{SEQ:%d, PLATFORM:%s}", seq, platform); + snprintf(portal_msg_buf, PORTAL_MSG_LEN, "{SEQ:%d, PLATFORM:%s}", seq, platform); WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_PLATFORM); if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); errmsg = kutf_dsprintf(&context->fixture_pool, - "Error in sending ack for " GET_PLATFORM "request"); + "Error in sending ack for " GET_PLATFORM "request"); } return errmsg; } static bool kutf_clk_trace_dequeue_portal_cmd(struct kutf_context *context, - struct clk_trace_portal_input *cmd) + struct clk_trace_portal_input *cmd) { int i; int err = kutf_helper_receive_named_val(context, &cmd->cmd_input); cmd->named_val_err = err; - if (err == KUTF_HELPER_ERR_NONE && - cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { + if (err == KUTF_HELPER_ERR_NONE && cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { /* All portal request commands are of format (named u64): * CMD_NAME=1234 * where, 1234 is a (variable) sequence number tag. */ for (i = 0; i < PORTAL_TOTAL_CMDS; i++) { - if (strcmp(cmd->cmd_input.val_name, - kbasep_portal_cmd_name_map[i].name)) + if (strcmp(cmd->cmd_input.val_name, kbasep_portal_cmd_name_map[i].name)) continue; cmd->portal_cmd = kbasep_portal_cmd_name_map[i].cmd; @@ -504,8 +486,8 @@ static bool kutf_clk_trace_dequeue_portal_cmd(struct kutf_context *context, return false; } -static void kutf_clk_trace_flag_result(struct kutf_context *context, - enum kutf_result_status result, char const *msg) +static void kutf_clk_trace_flag_result(struct kutf_context *context, enum kutf_result_status result, + char const *msg) { struct kutf_clk_rate_trace_fixture_data *data = context->fixture; @@ -513,20 +495,18 @@ static void kutf_clk_trace_flag_result(struct kutf_context *context, data->test_status = result; if (msg) data->result_msg = msg; - if (data->server_state == PORTAL_STATE_LIVE && - result > KUTF_RESULT_WARN) { + if (data->server_state == PORTAL_STATE_LIVE && result > KUTF_RESULT_WARN) { data->server_state = PORTAL_STATE_CLOSING; } } } static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context, - struct clk_trace_portal_input *cmd) + struct clk_trace_portal_input *cmd) { char const *errmsg = NULL; - BUILD_BUG_ON(ARRAY_SIZE(kbasep_portal_cmd_name_map) != - PORTAL_TOTAL_CMDS); + BUILD_BUG_ON(ARRAY_SIZE(kbasep_portal_cmd_name_map) != PORTAL_TOTAL_CMDS); WARN_ON(cmd->portal_cmd == PORTAL_CMD_INVALID); switch (cmd->portal_cmd) { @@ -554,10 +534,9 @@ static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context, break; default: pr_warn("Don't know how to handle portal_cmd: %d, abort session.\n", - cmd->portal_cmd); + cmd->portal_cmd); errmsg = kutf_dsprintf(&context->fixture_pool, - "Don't know how to handle portal_cmd: %d", - cmd->portal_cmd); + "Don't know how to handle portal_cmd: %d", cmd->portal_cmd); break; } @@ -578,7 +557,7 @@ static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context, * Return: 0 on success, non-zero on failure */ static int kutf_clk_trace_do_nack_response(struct kutf_context *context, - struct clk_trace_portal_input *cmd) + struct clk_trace_portal_input *cmd) { int seq; int err; @@ -587,21 +566,19 @@ static int kutf_clk_trace_do_nack_response(struct kutf_context *context, WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVALID); if (cmd->named_val_err == KUTF_HELPER_ERR_NONE && - cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { + cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { /* Keep seq number as % 256 */ seq = cmd->cmd_input.u.val_u64 & 255; - snprintf(portal_msg_buf, PORTAL_MSG_LEN, - "{SEQ:%d, MSG: Unknown command '%s'.}", seq, - cmd->cmd_input.val_name); - err = kutf_helper_send_named_str(context, "NACK", - portal_msg_buf); + snprintf(portal_msg_buf, PORTAL_MSG_LEN, "{SEQ:%d, MSG: Unknown command '%s'.}", + seq, cmd->cmd_input.val_name); + err = kutf_helper_send_named_str(context, "NACK", portal_msg_buf); } else - err = kutf_helper_send_named_str(context, "NACK", - "Wrong portal cmd format (Ref example: CMD_NAME=0X16)"); + err = kutf_helper_send_named_str( + context, "NACK", "Wrong portal cmd format (Ref example: CMD_NAME=0X16)"); if (err) { errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send portal NACK response"); + "Failed to send portal NACK response"); kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); } @@ -642,8 +619,7 @@ static void kutf_clk_trace_barebone_check(struct kutf_context *context) } spin_unlock(&kbdev->pm.clk_rtm.lock); if (fail) { - msg = kutf_dsprintf(&context->fixture_pool, - "GPU Idle not yielding 0-rate"); + msg = kutf_dsprintf(&context->fixture_pool, "GPU Idle not yielding 0-rate"); pr_err("Trace did not see idle rate\n"); } else { /* Make local PM active if not done so yet */ @@ -658,7 +634,7 @@ static void kutf_clk_trace_barebone_check(struct kutf_context *context) for (i = 0; i < data->nclks; i++) { /* Rate match between the manager and the trace */ if (kbdev->pm.clk_rtm.clks[i]->clock_val != - data->snapshot[i].current_rate) { + data->snapshot[i].current_rate) { fail = true; break; } @@ -667,20 +643,19 @@ static void kutf_clk_trace_barebone_check(struct kutf_context *context) if (idle[1]) { msg = kutf_dsprintf(&context->fixture_pool, - "GPU still idle after set_pm_ctx_active"); + "GPU still idle after set_pm_ctx_active"); pr_err("GPU still idle after set_pm_ctx_active\n"); } if (!msg && fail) { msg = kutf_dsprintf(&context->fixture_pool, - "Trace rate not matching Clk manager's read"); + "Trace rate not matching Clk manager's read"); pr_err("Trace rate not matching Clk manager's read\n"); } } if (!msg && idle[0] && !idle[1] && !data->total_update_cnt) { - msg = kutf_dsprintf(&context->fixture_pool, - "Trace update did not occur"); + msg = kutf_dsprintf(&context->fixture_pool, "Trace update did not occur"); pr_err("Trace update did not occur\n"); } if (msg) @@ -705,19 +680,17 @@ static void kutf_clk_trace_no_clks_dummy(struct kutf_context *context) while (time_before(jiffies, timeout)) { if (kutf_helper_pending_input(context)) { - has_cmd = kutf_clk_trace_dequeue_portal_cmd(context, - &cmd); + has_cmd = kutf_clk_trace_dequeue_portal_cmd(context, &cmd); if (!has_cmd && kutf_clk_trace_end_of_stream(&cmd)) break; kutf_helper_send_named_str(context, "NACK", - "Fatal! No clocks visible, aborting"); + "Fatal! No clocks visible, aborting"); } msleep(20); } - kutf_clk_trace_flag_result(context, KUTF_RESULT_FATAL, - "No clocks visble to the portal"); + kutf_clk_trace_flag_result(context, KUTF_RESULT_FATAL, "No clocks visble to the portal"); } /** @@ -755,9 +728,8 @@ static void mali_kutf_clk_rate_trace_test_portal(struct kutf_context *context) if (data->server_state == PORTAL_STATE_CLOSING) { while (kutf_helper_pending_input(context) && (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd) || - !kutf_clk_trace_end_of_stream(&new_cmd))) { - kutf_helper_send_named_str(context, "NACK", - "Portal closing down"); + !kutf_clk_trace_end_of_stream(&new_cmd))) { + kutf_helper_send_named_str(context, "NACK", "Portal closing down"); } } @@ -803,8 +775,7 @@ static void mali_kutf_clk_rate_trace_test_portal(struct kutf_context *context) * * Return: Fixture data created on success or NULL on failure */ -static void *mali_kutf_clk_rate_trace_create_fixture( - struct kutf_context *context) +static void *mali_kutf_clk_rate_trace_create_fixture(struct kutf_context *context) { struct kutf_clk_rate_trace_fixture_data *data; struct kbase_device *kbdev; @@ -821,7 +792,7 @@ static void *mali_kutf_clk_rate_trace_create_fixture( pr_debug("Creating fixture\n"); data = kutf_mempool_alloc(&context->fixture_pool, - sizeof(struct kutf_clk_rate_trace_fixture_data)); + sizeof(struct kutf_clk_rate_trace_fixture_data)); if (!data) return NULL; @@ -855,8 +826,7 @@ static void *mali_kutf_clk_rate_trace_create_fixture( data->listener.notify = kutf_portal_trace_write; data->invoke_notify = false; - kbase_clk_rate_trace_manager_subscribe( - &kbdev->pm.clk_rtm, &data->listener); + kbase_clk_rate_trace_manager_subscribe(&kbdev->pm.clk_rtm, &data->listener); /* Update the kutf_server_portal fixture_data pointer */ g_ptr_portal_data = data; } @@ -882,8 +852,7 @@ static void *mali_kutf_clk_rate_trace_create_fixture( * * @context: KUTF context. */ -static void mali_kutf_clk_rate_trace_remove_fixture( - struct kutf_context *context) +static void mali_kutf_clk_rate_trace_remove_fixture(struct kutf_context *context) { struct kutf_clk_rate_trace_fixture_data *data = context->fixture; struct kbase_device *kbdev = data->kbdev; @@ -892,8 +861,7 @@ static void mali_kutf_clk_rate_trace_remove_fixture( /* Clean up the portal trace write arrangement */ g_ptr_portal_data = NULL; - kbase_clk_rate_trace_manager_unsubscribe( - &kbdev->pm.clk_rtm, &data->listener); + kbase_clk_rate_trace_manager_unsubscribe(&kbdev->pm.clk_rtm, &data->listener); } pr_debug("Destroying fixture\n"); kbase_release_device(kbdev); @@ -917,31 +885,25 @@ static int __init mali_kutf_clk_rate_trace_test_module_init(void) kutf_app = kutf_create_application(CLK_RATE_TRACE_APP_NAME); if (!kutf_app) { - pr_warn("Creation of app " CLK_RATE_TRACE_APP_NAME - " failed!\n"); + pr_warn("Creation of app " CLK_RATE_TRACE_APP_NAME " failed!\n"); return -ENOMEM; } pr_debug("Create suite %s\n", CLK_RATE_TRACE_SUITE_NAME); - suite = kutf_create_suite_with_filters_and_data( - kutf_app, CLK_RATE_TRACE_SUITE_NAME, 1, - mali_kutf_clk_rate_trace_create_fixture, - mali_kutf_clk_rate_trace_remove_fixture, - KUTF_F_TEST_GENERIC, - suite_data); + suite = kutf_create_suite_with_filters_and_data(kutf_app, CLK_RATE_TRACE_SUITE_NAME, 1, + mali_kutf_clk_rate_trace_create_fixture, + mali_kutf_clk_rate_trace_remove_fixture, + KUTF_F_TEST_GENERIC, suite_data); if (!suite) { - pr_warn("Creation of suite %s failed!\n", - CLK_RATE_TRACE_SUITE_NAME); + pr_warn("Creation of suite %s failed!\n", CLK_RATE_TRACE_SUITE_NAME); kutf_destroy_application(kutf_app); return -ENOMEM; } filters = suite->suite_default_flags; - kutf_add_test_with_filters( - suite, 0x0, CLK_RATE_TRACE_PORTAL, - mali_kutf_clk_rate_trace_test_portal, - filters); + kutf_add_test_with_filters(suite, 0x0, CLK_RATE_TRACE_PORTAL, + mali_kutf_clk_rate_trace_test_portal, filters); pr_debug("Init complete\n"); return 0; @@ -958,7 +920,6 @@ static void __exit mali_kutf_clk_rate_trace_test_module_exit(void) pr_debug("Exit complete\n"); } - module_init(mali_kutf_clk_rate_trace_test_module_init); module_exit(mali_kutf_clk_rate_trace_test_module_exit); diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h index a716b9f70135..93b18a500b41 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -126,14 +126,14 @@ enum kbasep_clk_rate_trace_req { * Note, at the close, PM_CTX_CNT is 1. The PM_CTX_CNT will internally be * dropped down to 0 as part of the portal close clean up. */ -#define GET_PLATFORM "GET_PLATFORM" -#define GET_CLK_RATE_MGR "GET_CLK_RATE_MGR" -#define GET_CLK_RATE_TRACE "GET_CLK_RATE_TRACE" -#define GET_TRACE_SNAPSHOT "GET_TRACE_SNAPSHOT" -#define INC_PM_CTX_CNT "INC_PM_CTX_CNT" -#define DEC_PM_CTX_CNT "DEC_PM_CTX_CNT" -#define CLOSE_PORTAL "CLOSE_PORTAL" -#define INVOKE_NOTIFY_42KHZ "INVOKE_NOTIFY_42KHZ" +#define GET_PLATFORM "GET_PLATFORM" +#define GET_CLK_RATE_MGR "GET_CLK_RATE_MGR" +#define GET_CLK_RATE_TRACE "GET_CLK_RATE_TRACE" +#define GET_TRACE_SNAPSHOT "GET_TRACE_SNAPSHOT" +#define INC_PM_CTX_CNT "INC_PM_CTX_CNT" +#define DEC_PM_CTX_CNT "DEC_PM_CTX_CNT" +#define CLOSE_PORTAL "CLOSE_PORTAL" +#define INVOKE_NOTIFY_42KHZ "INVOKE_NOTIFY_42KHZ" /** * DOC: Portal service response tag names. diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c index f2a014d9b5ca..112aef433759 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -64,9 +64,8 @@ struct kutf_irq_fixture_data { #define IRQ_TIMEOUT HZ /* Kernel API for setting irq throttle hook callback and irq time in us*/ -extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev, - irq_handler_t custom_handler, - int irq_type); +extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler, + int irq_type); extern irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val); static DECLARE_WAIT_QUEUE_HEAD(wait); @@ -75,7 +74,7 @@ static u64 irq_time; static void *kbase_untag(void *ptr) { - return (void *)(((uintptr_t) ptr) & ~3); + return (void *)(((uintptr_t)ptr) & ~3); } /** @@ -88,7 +87,7 @@ static void *kbase_untag(void *ptr) static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) { struct kbase_device *kbdev = kbase_untag(data); - u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); + u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_STATUS)); irqreturn_t result; u64 tval; bool has_test_irq = val & TEST_IRQ; @@ -96,8 +95,7 @@ static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) if (has_test_irq) { tval = ktime_get_real_ns(); /* Clear the test source only here */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), - TEST_IRQ); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), TEST_IRQ); /* Remove the test IRQ status bit */ val = val ^ TEST_IRQ; } @@ -121,13 +119,11 @@ static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) * * Return: Fixture data created on success or NULL on failure */ -static void *mali_kutf_irq_default_create_fixture( - struct kutf_context *context) +static void *mali_kutf_irq_default_create_fixture(struct kutf_context *context) { struct kutf_irq_fixture_data *data; - data = kutf_mempool_alloc(&context->fixture_pool, - sizeof(struct kutf_irq_fixture_data)); + data = kutf_mempool_alloc(&context->fixture_pool, sizeof(struct kutf_irq_fixture_data)); if (!data) goto fail; @@ -151,8 +147,7 @@ fail: * * @context: KUTF context. */ -static void mali_kutf_irq_default_remove_fixture( - struct kutf_context *context) +static void mali_kutf_irq_default_remove_fixture(struct kutf_context *context) { struct kutf_irq_fixture_data *data = context->fixture; struct kbase_device *kbdev = data->kbdev; @@ -185,8 +180,7 @@ static void mali_kutf_irq_latency(struct kutf_context *context) kbase_pm_context_active(kbdev); kbase_pm_wait_for_desired_state(kbdev); - kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler, - GPU_IRQ_HANDLER); + kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler, GPU_IRQ_HANDLER); for (i = 1; i <= NR_TEST_IRQS; i++) { u64 start_time = ktime_get_real_ns(); @@ -194,8 +188,7 @@ static void mali_kutf_irq_latency(struct kutf_context *context) triggered = false; /* Trigger fake IRQ */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), - TEST_IRQ); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT), TEST_IRQ); if (wait_event_timeout(wait, triggered, IRQ_TIMEOUT) == 0) { /* Wait extra time to see if it would come */ @@ -224,14 +217,16 @@ static void mali_kutf_irq_latency(struct kutf_context *context) if (i > NR_TEST_IRQS) { do_div(average_time, NR_TEST_IRQS); - results = kutf_dsprintf(&context->fixture_pool, - "Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n", - min_time, max_time, average_time); + results = kutf_dsprintf( + &context->fixture_pool, + "Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n", + min_time, max_time, average_time); kutf_test_pass(context, results); } else { - results = kutf_dsprintf(&context->fixture_pool, - "Timed out for the %u-th IRQ (loop_limit: %u), triggered late: %d\n", - i, NR_TEST_IRQS, triggered); + results = kutf_dsprintf( + &context->fixture_pool, + "Timed out for the %u-th IRQ (loop_limit: %u), triggered late: %d\n", i, + NR_TEST_IRQS, triggered); kutf_test_fail(context, results); } } @@ -252,9 +247,8 @@ static int __init mali_kutf_irq_test_main_init(void) return -ENOMEM; } - suite = kutf_create_suite(irq_app, "irq_default", - 1, mali_kutf_irq_default_create_fixture, - mali_kutf_irq_default_remove_fixture); + suite = kutf_create_suite(irq_app, "irq_default", 1, mali_kutf_irq_default_create_fixture, + mali_kutf_irq_default_remove_fixture); if (suite == NULL) { pr_warn("Creation of test suite failed!\n"); @@ -262,8 +256,7 @@ static int __init mali_kutf_irq_test_main_init(void) return -ENOMEM; } - kutf_add_test(suite, 0x0, "irq_latency", - mali_kutf_irq_latency); + kutf_add_test(suite, 0x0, "irq_latency", mali_kutf_irq_latency); return 0; } diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c index 5a42bd675c2a..6b0c0ffbe6c4 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -85,11 +85,11 @@ static void mali_kutf_mgm_pte_translation_test(struct kutf_context *context) u64 original_pte; if (mmu_level == MIDGARD_MMU_LEVEL(3)) - original_pte = - (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3; + original_pte = (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | + ENTRY_IS_ATE_L3; else - original_pte = - (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02; + original_pte = (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | + ENTRY_IS_ATE_L02; dev_dbg(kbdev->dev, "Testing group_id=%u, mmu_level=%u, pte=0x%llx\n", data->group_id, mmu_level, original_pte); diff --git a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c index 1e636b9a7759..1abcb8fd98a0 100644 --- a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c +++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c @@ -12,6 +12,7 @@ #include "linux/mman.h" #include #include +#include /* mali_kbase_mmap.c * @@ -19,7 +20,6 @@ * kbase_context_get_unmapped_area() interface. */ - /** * align_and_check() - Align the specified pointer to the provided alignment and * check that it is still in range. @@ -37,8 +37,8 @@ * false otherwise */ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, - struct vm_unmapped_area_info *info, bool is_shader_code, - bool is_same_4gb_page) + struct vm_unmapped_area_info *info, bool is_shader_code, + bool is_same_4gb_page) { /* Compute highest gap address at the desired alignment */ (*gap_end) -= info->length; @@ -47,14 +47,12 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, if (is_shader_code) { /* Check for 4GB boundary */ if (0 == (*gap_end & BASE_MEM_MASK_4GB)) - (*gap_end) -= (info->align_offset ? info->align_offset : - info->length); + (*gap_end) -= (info->align_offset ? info->align_offset : info->length); if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) - (*gap_end) -= (info->align_offset ? info->align_offset : - info->length); + (*gap_end) -= (info->align_offset ? info->align_offset : info->length); - if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end + - info->length) & BASE_MEM_MASK_4GB)) + if (!(*gap_end & BASE_MEM_MASK_4GB) || + !((*gap_end + info->length) & BASE_MEM_MASK_4GB)) return false; } else if (is_same_4gb_page) { unsigned long start = *gap_end; @@ -70,8 +68,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, * allocation size is > 2MB and there is enough CPU & * GPU virtual space. */ - unsigned long rounded_offset = - ALIGN(offset, info->align_mask + 1); + unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1); start -= rounded_offset; end -= rounded_offset; @@ -87,7 +84,6 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, } } - if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) return false; @@ -129,8 +125,8 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, * -ENOMEM if search is unsuccessful */ -static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info - *info, bool is_shader_code, bool is_same_4gb_page) +static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info *info, + bool is_shader_code, bool is_same_4gb_page) { #if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) struct mm_struct *mm = current->mm; @@ -158,8 +154,7 @@ static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info /* Check highest gap, which does not precede any rbtree node */ gap_start = mm->highest_vm_end; if (gap_start <= high_limit) { - if (align_and_check(&gap_end, gap_start, info, - is_shader_code, is_same_4gb_page)) + if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page)) return gap_end; } @@ -175,8 +170,7 @@ static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; if (gap_start <= high_limit && vma->vm_rb.rb_right) { struct vm_area_struct *right = - rb_entry(vma->vm_rb.rb_right, - struct vm_area_struct, vm_rb); + rb_entry(vma->vm_rb.rb_right, struct vm_area_struct, vm_rb); if (right->rb_subtree_gap >= length) { vma = right; continue; @@ -195,16 +189,15 @@ check_current: if (gap_end > info->high_limit) gap_end = info->high_limit; - if (align_and_check(&gap_end, gap_start, info, - is_shader_code, is_same_4gb_page)) + if (align_and_check(&gap_end, gap_start, info, is_shader_code, + is_same_4gb_page)) return gap_end; } /* Visit left subtree if it looks promising */ if (vma->vm_rb.rb_left) { struct vm_area_struct *left = - rb_entry(vma->vm_rb.rb_left, - struct vm_area_struct, vm_rb); + rb_entry(vma->vm_rb.rb_left, struct vm_area_struct, vm_rb); if (left->rb_subtree_gap >= length) { vma = left; continue; @@ -217,11 +210,9 @@ check_current: if (!rb_parent(prev)) return -ENOMEM; - vma = rb_entry(rb_parent(prev), - struct vm_area_struct, vm_rb); + vma = rb_entry(rb_parent(prev), struct vm_area_struct, vm_rb); if (prev == vma->vm_rb.rb_right) { - gap_start = vma->vm_prev ? - vma->vm_prev->vm_end : 0; + gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; goto check_current; } } @@ -260,14 +251,13 @@ check_current: return -ENOMEM; } - /* This function is based on Linux kernel's arch_get_unmapped_area, but * simplified slightly. Modifications come from the fact that some values * about the memory area are known in advance. */ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, - const unsigned long addr, const unsigned long len, - const unsigned long pgoff, const unsigned long flags) + const unsigned long addr, const unsigned long len, + const unsigned long pgoff, const unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_unmapped_area_info info; @@ -281,8 +271,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, unsigned long low_limit = PAGE_SIZE; #endif int cpu_va_bits = BITS_PER_LONG; - int gpu_pc_bits = - kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + int gpu_pc_bits = kctx->kbdev->gpu_props.log2_program_counter_size; bool is_shader_code = false; bool is_same_4gb_page = false; unsigned long ret; @@ -303,8 +292,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, * is no free region at the address found originally by too large a * same_va_end_addr here, and will fail the allocation gracefully. */ - struct kbase_reg_zone *zone = - kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA); + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, SAME_VA_ZONE); u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT; #if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); @@ -324,8 +312,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, return -ENOMEM; if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { - high_limit = - min_t(unsigned long, high_limit, same_va_end_addr); + high_limit = min_t(unsigned long, high_limit, same_va_end_addr); /* If there's enough (> 33 bits) of GPU VA space, align * to 2MB boundaries. @@ -343,7 +330,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, } #endif /* CONFIG_64BIT */ if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) && - (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) { + (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) { int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); struct kbase_va_region *reg; @@ -363,21 +350,17 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, #if !MALI_USE_CSF } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { unsigned long extension_bytes = - (unsigned long)(reg->extension - << PAGE_SHIFT); + (unsigned long)(reg->extension << PAGE_SHIFT); /* kbase_check_alloc_sizes() already satisfies * these checks, but they're here to avoid * maintenance hazards due to the assumptions * involved */ - WARN_ON(reg->extension > - (ULONG_MAX >> PAGE_SHIFT)); + WARN_ON(reg->extension > (ULONG_MAX >> PAGE_SHIFT)); WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT)); WARN_ON(!is_power_of_2(extension_bytes)); align_mask = extension_bytes - 1; - align_offset = - extension_bytes - - (reg->initial_commit << PAGE_SHIFT); + align_offset = extension_bytes - (reg->initial_commit << PAGE_SHIFT); #endif /* !MALI_USE_CSF */ } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { is_same_4gb_page = true; @@ -385,8 +368,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, kbase_gpu_vm_unlock(kctx); #ifndef CONFIG_64BIT } else { - return current->mm->get_unmapped_area( - kctx->filp, addr, len, pgoff, flags); + return current->mm->get_unmapped_area(kctx->kfile->filp, addr, len, pgoff, flags); #endif } @@ -397,11 +379,9 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, info.align_offset = align_offset; info.align_mask = align_mask; - ret = kbase_unmapped_area_topdown(&info, is_shader_code, - is_same_4gb_page); + ret = kbase_unmapped_area_topdown(&info, is_shader_code, is_same_4gb_page); - if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && - high_limit < same_va_end_addr) { + if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && high_limit < same_va_end_addr) { #if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) /* Retry above TASK_UNMAPPED_BASE */ info.low_limit = TASK_UNMAPPED_BASE; @@ -412,8 +392,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr); #endif - ret = kbase_unmapped_area_topdown(&info, is_shader_code, - is_same_4gb_page); + ret = kbase_unmapped_area_topdown(&info, is_shader_code, is_same_4gb_page); } return ret; diff --git a/drivers/gpu/arm/bifrost/thirdparty/mm.h b/drivers/gpu/arm/bifrost/thirdparty/mm.h new file mode 100644 index 000000000000..bab407b18555 --- /dev/null +++ b/drivers/gpu/arm/bifrost/thirdparty/mm.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include + +#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE) + +#include + +static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) +{ + int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); + + if (unlikely(err == -ENOMEM)) + return VM_FAULT_OOM; + if (unlikely(err < 0 && err != -EBUSY)) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} +#endif diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c index a6062f170ff9..77ebcb05e2b4 100644 --- a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c +++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,25 +25,14 @@ #include -#define GPU_FEATURES_CROSS_STREAM_SYNC_MASK (1ull << 3ull) - void kbase_create_timeline_objects(struct kbase_device *kbdev) { - unsigned int as_nr; + int as_nr; unsigned int slot_i; struct kbase_context *kctx; struct kbase_timeline *timeline = kbdev->timeline; - struct kbase_tlstream *summary = - &kbdev->timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; - u32 const kbdev_has_cross_stream_sync = - (kbdev->gpu_props.props.raw_props.gpu_features & - GPU_FEATURES_CROSS_STREAM_SYNC_MASK) ? - 1 : - 0; - u32 const arch_maj = (kbdev->gpu_props.props.raw_props.gpu_id & - GPU_ID2_ARCH_MAJOR) >> - GPU_ID2_ARCH_MAJOR_SHIFT; - u32 const num_sb_entries = arch_maj >= 11 ? 16 : 8; + struct kbase_tlstream *summary = &kbdev->timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; + u32 const num_sb_entries = kbdev->gpu_props.gpu_id.arch_major >= 11 ? 16 : 8; u32 const supports_gpu_sleep = #ifdef KBASE_PM_RUNTIME kbdev->pm.backend.gpu_sleep_supported; @@ -56,23 +45,18 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); /* Create Legacy GPU object to track in AOM for dumping */ - __kbase_tlstream_tl_new_gpu(summary, - kbdev, - kbdev->gpu_props.props.raw_props.gpu_id, - kbdev->gpu_props.num_cores); - + __kbase_tlstream_tl_new_gpu(summary, kbdev, kbdev->id, kbdev->gpu_props.num_cores); for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) - __kbase_tlstream_tl_lifelink_as_gpu(summary, - &kbdev->as[as_nr], - kbdev); + __kbase_tlstream_tl_lifelink_as_gpu(summary, &kbdev->as[as_nr], kbdev); /* Trace the creation of a new kbase device and set its properties. */ - __kbase_tlstream_tl_kbase_new_device(summary, kbdev->gpu_props.props.raw_props.gpu_id, - kbdev->gpu_props.num_cores, - kbdev->csf.global_iface.group_num, - kbdev->nr_hw_address_spaces, num_sb_entries, - kbdev_has_cross_stream_sync, supports_gpu_sleep); + __kbase_tlstream_tl_kbase_new_device( + summary, kbdev->id, kbdev->gpu_props.num_cores, kbdev->csf.global_iface.group_num, + kbdev->nr_hw_address_spaces, num_sb_entries, + kbdev->gpu_props.gpu_features.cross_stream_sync, supports_gpu_sleep, + 0 + ); /* Lock the context list, to ensure no changes to the list are made * while we're summarizing the contexts and their contents. @@ -87,15 +71,12 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) mutex_lock(&kbdev->csf.scheduler.lock); for (slot_i = 0; slot_i < kbdev->csf.global_iface.group_num; slot_i++) { - struct kbase_queue_group *group = kbdev->csf.scheduler.csg_slots[slot_i].resident_group; if (group) __kbase_tlstream_tl_kbase_device_program_csg( - summary, - kbdev->gpu_props.props.raw_props.gpu_id, - group->kctx->id, group->handle, slot_i, 0); + summary, kbdev->id, group->kctx->id, group->handle, slot_i, 0); } /* Reset body stream buffers while holding the kctx lock. @@ -110,8 +91,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) /* For each context in the device... */ list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) { size_t i; - struct kbase_tlstream *body = - &timeline->streams[TL_STREAM_TYPE_OBJ]; + struct kbase_tlstream *body = &timeline->streams[TL_STREAM_TYPE_OBJ]; /* Lock the context's KCPU queues, to ensure no KCPU-queue * related actions can occur in this context from now on. @@ -135,20 +115,14 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) * hasn't been traced yet. They may, however, cause benign * errors to be emitted. */ - __kbase_tlstream_tl_kbase_new_ctx(body, kctx->id, - kbdev->gpu_props.props.raw_props.gpu_id); + __kbase_tlstream_tl_kbase_new_ctx(body, kctx->id, kbdev->id); /* Also trace with the legacy AOM tracepoint for dumping */ - __kbase_tlstream_tl_new_ctx(body, - kctx, - kctx->id, - (u32)(kctx->tgid)); + __kbase_tlstream_tl_new_ctx(body, kctx, kctx->id, (u32)(kctx->tgid)); /* Trace the currently assigned address space */ if (kctx->as_nr != KBASEP_AS_NR_INVALID) - __kbase_tlstream_tl_kbase_ctx_assign_as(body, kctx->id, - kctx->as_nr); - + __kbase_tlstream_tl_kbase_ctx_assign_as(body, kctx->id, kctx->as_nr); /* Trace all KCPU queues in the context into the body stream. * As we acquired the KCPU lock after resetting the body stream, diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c index 9ba89f59f03b..628f29aab9ff 100644 --- a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c +++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,17 +28,16 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) { unsigned int lpu_id; - unsigned int as_nr; + int as_nr; struct kbase_context *kctx; struct kbase_timeline *timeline = kbdev->timeline; - struct kbase_tlstream *summary = - &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; + struct kbase_tlstream *summary = &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; /* Summarize the LPU objects. */ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { - u32 *lpu = - &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; - __kbase_tlstream_tl_new_lpu(summary, lpu, lpu_id, *lpu); + void *lpu = &kbdev->gpu_props.js_features[lpu_id]; + + __kbase_tlstream_tl_new_lpu(summary, lpu, lpu_id, 0); } /* Summarize the Address Space objects. */ @@ -46,21 +45,15 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); /* Create GPU object and make it retain all LPUs and address spaces. */ - __kbase_tlstream_tl_new_gpu(summary, - kbdev, - kbdev->gpu_props.props.raw_props.gpu_id, - kbdev->gpu_props.num_cores); + __kbase_tlstream_tl_new_gpu(summary, kbdev, kbdev->id, kbdev->gpu_props.num_cores); for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { - void *lpu = - &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; + void *lpu = &kbdev->gpu_props.js_features[lpu_id]; __kbase_tlstream_tl_lifelink_lpu_gpu(summary, lpu, kbdev); } for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) - __kbase_tlstream_tl_lifelink_as_gpu(summary, - &kbdev->as[as_nr], - kbdev); + __kbase_tlstream_tl_lifelink_as_gpu(summary, &kbdev->as[as_nr], kbdev); /* Lock the context list, to ensure no changes to the list are made * while we're summarizing the contexts and their contents. @@ -70,10 +63,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) /* For each context in the device... */ list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) { /* Summarize the context itself */ - __kbase_tlstream_tl_new_ctx(summary, - kctx, - kctx->id, - (u32)(kctx->tgid)); + __kbase_tlstream_tl_new_ctx(summary, kctx, kctx->id, (u32)(kctx->tgid)); } /* Reset body stream buffers while holding the kctx lock. diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c index 9ed59633c41b..ec38c70504b7 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c @@ -42,13 +42,11 @@ /* These values are used in mali_kbase_tracepoints.h * to retrieve the streams from a kbase_timeline instance. */ -const size_t __obj_stream_offset = - offsetof(struct kbase_timeline, streams) - + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_OBJ; +const size_t __obj_stream_offset = offsetof(struct kbase_timeline, streams) + + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_OBJ; -const size_t __aux_stream_offset = - offsetof(struct kbase_timeline, streams) - + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_AUX; +const size_t __aux_stream_offset = offsetof(struct kbase_timeline, streams) + + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_AUX; /** * kbasep_timeline_autoflush_timer_callback - autoflush timer callback @@ -60,14 +58,13 @@ const size_t __aux_stream_offset = static void kbasep_timeline_autoflush_timer_callback(struct timer_list *timer) { enum tl_stream_type stype; - int rcode; + int rcode; struct kbase_timeline *timeline = container_of(timer, struct kbase_timeline, autoflush_timer); CSTD_UNUSED(timer); - for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; - stype++) { + for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; stype++) { struct kbase_tlstream *stream = &timeline->streams[stype]; int af_cnt = atomic_read(&stream->autoflush_counter); @@ -77,10 +74,7 @@ static void kbasep_timeline_autoflush_timer_callback(struct timer_list *timer) continue; /* Check if stream should be flushed now. */ - if (af_cnt != atomic_cmpxchg( - &stream->autoflush_counter, - af_cnt, - af_cnt + 1)) + if (af_cnt != atomic_cmpxchg(&stream->autoflush_counter, af_cnt, af_cnt + 1)) continue; if (!af_cnt) continue; @@ -90,18 +84,14 @@ static void kbasep_timeline_autoflush_timer_callback(struct timer_list *timer) } if (atomic_read(&timeline->autoflush_timer_active)) - rcode = mod_timer( - &timeline->autoflush_timer, - jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + rcode = mod_timer(&timeline->autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); CSTD_UNUSED(rcode); } - - /*****************************************************************************/ -int kbase_timeline_init(struct kbase_timeline **timeline, - atomic_t *timeline_flags) +int kbase_timeline_init(struct kbase_timeline **timeline, atomic_t *timeline_flags) { enum tl_stream_type i; struct kbase_timeline *result; @@ -121,8 +111,7 @@ int kbase_timeline_init(struct kbase_timeline **timeline, /* Prepare stream structures. */ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) - kbase_tlstream_init(&result->streams[i], i, - &result->event_queue); + kbase_tlstream_init(&result->streams[i], i, &result->event_queue); /* Initialize the kctx list */ mutex_init(&result->tl_kctx_list_lock); @@ -130,8 +119,7 @@ int kbase_timeline_init(struct kbase_timeline **timeline, /* Initialize autoflush timer. */ atomic_set(&result->autoflush_timer_active, 0); - kbase_timer_setup(&result->autoflush_timer, - kbasep_timeline_autoflush_timer_callback); + kbase_timer_setup(&result->autoflush_timer, kbasep_timeline_autoflush_timer_callback); result->timeline_flags = timeline_flags; #if MALI_USE_CSF @@ -271,7 +259,7 @@ void kbase_timeline_release(struct kbase_timeline *timeline) elapsed_time = ktime_sub(ktime_get_raw(), timeline->last_acquire_time); elapsed_time_ms = ktime_to_ms(elapsed_time); time_to_sleep = (elapsed_time_ms < 0 ? TIMELINE_HYSTERESIS_TIMEOUT_MS : - TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); + TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); if (time_to_sleep > 0) msleep_interruptible(time_to_sleep); @@ -314,13 +302,10 @@ int kbase_timeline_streams_flush(struct kbase_timeline *timeline) void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline) { - kbase_tlstream_reset( - &timeline->streams[TL_STREAM_TYPE_OBJ]); - kbase_tlstream_reset( - &timeline->streams[TL_STREAM_TYPE_AUX]); + kbase_tlstream_reset(&timeline->streams[TL_STREAM_TYPE_OBJ]); + kbase_tlstream_reset(&timeline->streams[TL_STREAM_TYPE_AUX]); #if MALI_USE_CSF - kbase_tlstream_reset( - &timeline->streams[TL_STREAM_TYPE_CSFFW]); + kbase_tlstream_reset(&timeline->streams[TL_STREAM_TYPE_CSFFW]); #endif } @@ -364,8 +349,7 @@ void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx) * duplicate creation tracepoints. */ #if MALI_USE_CSF - KBASE_TLSTREAM_TL_KBASE_NEW_CTX( - kbdev, kctx->id, kbdev->gpu_props.props.raw_props.gpu_id); + KBASE_TLSTREAM_TL_KBASE_NEW_CTX(kbdev, kctx->id, kbdev->id); #endif /* Trace with the AOM tracepoint even in CSF for dumping */ KBASE_TLSTREAM_TL_NEW_CTX(kbdev, kctx, kctx->id, 0); @@ -393,8 +377,8 @@ void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx) } #if MALI_UNIT_TEST -void kbase_timeline_stats(struct kbase_timeline *timeline, - u32 *bytes_collected, u32 *bytes_generated) +void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, + u32 *bytes_generated) { enum tl_stream_type stype; @@ -402,10 +386,8 @@ void kbase_timeline_stats(struct kbase_timeline *timeline, /* Accumulate bytes generated per stream */ *bytes_generated = 0; - for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; - stype++) - *bytes_generated += atomic_read( - &timeline->streams[stype].bytes_generated); + for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; stype++) + *bytes_generated += atomic_read(&timeline->streams[stype].bytes_generated); *bytes_collected = atomic_read(&timeline->bytes_collected); } diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h index 62be6c64c850..47231c67877a 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,8 +37,7 @@ struct kbase_timeline; * while timeline instance is valid. * Return: zero on success, negative number on error */ -int kbase_timeline_init(struct kbase_timeline **timeline, - atomic_t *timeline_flags); +int kbase_timeline_init(struct kbase_timeline **timeline, atomic_t *timeline_flags); /** * kbase_timeline_term - terminate timeline infrastructure in kernel @@ -114,7 +113,8 @@ void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); * @bytes_collected: Will hold number of bytes read by the user * @bytes_generated: Will hold number of bytes generated by trace points */ -void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); +void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, + u32 *bytes_generated); #endif /* MALI_UNIT_TEST */ /** diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c index ae570064e7d0..5f3b79b6ecbd 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c @@ -58,6 +58,8 @@ static int kbase_unprivileged_global_profiling_set(const char *val, const struct int new_val; int ret = kstrtoint(val, 0, &new_val); + CSTD_UNUSED(kp); + if (ret == 0) { if (new_val < 1) return -EINVAL; @@ -77,12 +79,11 @@ module_param_cb(kbase_unprivileged_global_profiling, &kbase_global_unprivileged_ &kbase_unprivileged_global_profiling, 0600); /* The timeline stream file operations functions. */ -static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, - size_t size, loff_t *f_pos); +static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, size_t size, + loff_t *f_pos); static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait); static int kbasep_timeline_io_release(struct inode *inode, struct file *filp); -static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, - int datasync); +static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, int datasync); static bool timeline_is_permitted(void) { @@ -108,10 +109,9 @@ static bool timeline_is_permitted(void) * * Return: non-zero if any of timeline streams has at last one packet ready */ -static int -kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline, - struct kbase_tlstream **ready_stream, - unsigned int *rb_idx_raw) +static int kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline, + struct kbase_tlstream **ready_stream, + unsigned int *rb_idx_raw) { enum tl_stream_type i; @@ -126,7 +126,7 @@ kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline, * loaded in correct order. */ smp_rmb(); - if (atomic_read(&stream->wbi) != *rb_idx_raw) { + if ((uint)atomic_read(&stream->wbi) != *rb_idx_raw) { *ready_stream = stream; return 1; } @@ -165,9 +165,8 @@ static int kbasep_timeline_has_header_data(struct kbase_timeline *timeline) * * Return: 0 if success, -1 otherwise. */ -static inline int copy_stream_header(char __user *buffer, size_t size, - ssize_t *copy_len, const char *hdr, - size_t hdr_size, size_t *hdr_btc) +static inline int copy_stream_header(char __user *buffer, size_t size, ssize_t *copy_len, + const char *hdr, size_t hdr_size, size_t *hdr_btc) { const size_t offset = hdr_size - *hdr_btc; const size_t copy_size = MIN(size - *copy_len, *hdr_btc); @@ -202,20 +201,18 @@ static inline int copy_stream_header(char __user *buffer, size_t size, * * Return: 0 if success, -1 if copy_to_user has failed. */ -static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline, - char __user *buffer, size_t size, - ssize_t *copy_len) +static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline, char __user *buffer, + size_t size, ssize_t *copy_len) { - if (copy_stream_header(buffer, size, copy_len, obj_desc_header, - obj_desc_header_size, &timeline->obj_header_btc)) + if (copy_stream_header(buffer, size, copy_len, obj_desc_header, obj_desc_header_size, + &timeline->obj_header_btc)) return -1; - if (copy_stream_header(buffer, size, copy_len, aux_desc_header, - aux_desc_header_size, &timeline->aux_header_btc)) + if (copy_stream_header(buffer, size, copy_len, aux_desc_header, aux_desc_header_size, + &timeline->aux_header_btc)) return -1; #if MALI_USE_CSF - if (copy_stream_header(buffer, size, copy_len, - timeline->csf_tl_reader.tl_header.data, + if (copy_stream_header(buffer, size, copy_len, timeline->csf_tl_reader.tl_header.data, timeline->csf_tl_reader.tl_header.size, &timeline->csf_tl_reader.tl_header.btc)) return -1; @@ -233,8 +230,8 @@ static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline, * * Return: number of bytes stored in the buffer */ -static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, - size_t size, loff_t *f_pos) +static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, size_t size, + loff_t *f_pos) { ssize_t copy_len = 0; struct kbase_timeline *timeline; @@ -255,15 +252,14 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, mutex_lock(&timeline->reader_lock); - while (copy_len < size) { + while (copy_len < (ssize_t)size) { struct kbase_tlstream *stream = NULL; unsigned int rb_idx_raw = 0; unsigned int wb_idx_raw; unsigned int rb_idx; size_t rb_size; - if (kbasep_timeline_copy_headers(timeline, buffer, size, - ©_len)) { + if (kbasep_timeline_copy_headers(timeline, buffer, size, ©_len)) { copy_len = -EFAULT; break; } @@ -274,14 +270,12 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, * submitted. */ if (copy_len > 0) { - if (!kbasep_timeline_io_packet_pending( - timeline, &stream, &rb_idx_raw)) + if (!kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx_raw)) break; } else { - if (wait_event_interruptible( - timeline->event_queue, - kbasep_timeline_io_packet_pending( - timeline, &stream, &rb_idx_raw))) { + if (wait_event_interruptible(timeline->event_queue, + kbasep_timeline_io_packet_pending( + timeline, &stream, &rb_idx_raw))) { copy_len = -ERESTARTSYS; break; } @@ -299,8 +293,7 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, rb_size = atomic_read(&stream->buffer[rb_idx].size); if (rb_size > size - copy_len) break; - if (copy_to_user(&buffer[copy_len], stream->buffer[rb_idx].data, - rb_size)) { + if (copy_to_user(&buffer[copy_len], stream->buffer[rb_idx].data, rb_size)) { copy_len = -EFAULT; break; } @@ -321,8 +314,7 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, #endif /* MALI_UNIT_TEST */ } else { - const unsigned int new_rb_idx_raw = - wb_idx_raw - PACKET_COUNT + 1; + const unsigned int new_rb_idx_raw = wb_idx_raw - PACKET_COUNT + 1; /* Adjust read buffer index to the next valid buffer */ atomic_set(&stream->rbi, new_rb_idx_raw); } @@ -454,8 +446,7 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) return 0; } -static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, - int datasync) +static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { CSTD_UNUSED(start); CSTD_UNUSED(end); diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h index de30bccc7cca..ab4651168247 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,19 +59,19 @@ */ struct kbase_timeline { struct kbase_tlstream streams[TL_STREAM_TYPE_COUNT]; - struct list_head tl_kctx_list; - struct mutex tl_kctx_list_lock; + struct list_head tl_kctx_list; + struct mutex tl_kctx_list_lock; struct timer_list autoflush_timer; - atomic_t autoflush_timer_active; - struct mutex reader_lock; + atomic_t autoflush_timer_active; + struct mutex reader_lock; wait_queue_head_t event_queue; #if MALI_UNIT_TEST - atomic_t bytes_collected; + atomic_t bytes_collected; #endif /* MALI_UNIT_TEST */ - atomic_t *timeline_flags; - size_t obj_header_btc; - size_t aux_header_btc; - ktime_t last_acquire_time; + atomic_t *timeline_flags; + size_t obj_header_btc; + size_t aux_header_btc; + ktime_t last_acquire_time; #if MALI_USE_CSF struct kbase_csf_tl_reader csf_tl_reader; #endif diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h index b6aaadedc6b9..cefca4c7a4f9 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,7 @@ #include /* The number of nanoseconds in a second. */ -#define NSECS_IN_SEC 1000000000ull /* ns */ +#define NSECS_IN_SEC 1000000000ull /* ns */ /** * kbasep_serialize_bytes - serialize bytes to the message buffer @@ -41,11 +41,7 @@ * * Return: updated position in the buffer */ -static inline size_t kbasep_serialize_bytes( - char *buffer, - size_t pos, - const void *bytes, - size_t len) +static inline size_t kbasep_serialize_bytes(char *buffer, size_t pos, const void *bytes, size_t len) { KBASE_DEBUG_ASSERT(buffer); KBASE_DEBUG_ASSERT(bytes); @@ -68,11 +64,8 @@ static inline size_t kbasep_serialize_bytes( * * Return: updated position in the buffer */ -static inline size_t kbasep_serialize_string( - char *buffer, - size_t pos, - const char *string, - size_t max_write_size) +static inline size_t kbasep_serialize_string(char *buffer, size_t pos, const char *string, + size_t max_write_size) { u32 string_len; @@ -84,10 +77,7 @@ static inline size_t kbasep_serialize_string( KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char)); max_write_size -= sizeof(string_len); - string_len = strscpy( - &buffer[pos + sizeof(string_len)], - string, - max_write_size); + string_len = strscpy(&buffer[pos + sizeof(string_len)], string, max_write_size); string_len += sizeof(char); /* Make sure that the source string fit into the buffer. */ @@ -112,12 +102,10 @@ static inline size_t kbasep_serialize_string( */ static inline size_t kbasep_serialize_timestamp(void *buffer, size_t pos) { - u64 timestamp; + u64 timestamp; timestamp = ktime_get_raw_ns(); - return kbasep_serialize_bytes( - buffer, pos, - ×tamp, sizeof(timestamp)); + return kbasep_serialize_bytes(buffer, pos, ×tamp, sizeof(timestamp)); } #endif /* _KBASE_TL_SERIALIZE_H */ diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c index 47059deb4da2..ddbddcbc5968 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,13 +34,9 @@ * * Function sets up immutable part of packet header in the given buffer. */ -static void kbasep_packet_header_setup( - char *buffer, - enum tl_packet_family pkt_family, - enum tl_packet_class pkt_class, - enum tl_packet_type pkt_type, - unsigned int stream_id, - int numbered) +static void kbasep_packet_header_setup(char *buffer, enum tl_packet_family pkt_family, + enum tl_packet_class pkt_class, enum tl_packet_type pkt_type, + unsigned int stream_id, int numbered) { u32 words[2] = { MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id), @@ -58,10 +54,7 @@ static void kbasep_packet_header_setup( * Function updates mutable part of packet header in the given buffer. * Note that value of data_size must not include size of the header. */ -static void kbasep_packet_header_update( - char *buffer, - size_t data_size, - int numbered) +static void kbasep_packet_header_update(char *buffer, size_t data_size, int numbered) { u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered); @@ -92,10 +85,8 @@ void kbase_tlstream_reset(struct kbase_tlstream *stream) for (i = 0; i < PACKET_COUNT; i++) { if (stream->numbered) - atomic_set( - &stream->buffer[i].size, - PACKET_HEADER_SIZE + - PACKET_NUMBER_SIZE); + atomic_set(&stream->buffer[i].size, + PACKET_HEADER_SIZE + PACKET_NUMBER_SIZE); else atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE); } @@ -107,9 +98,9 @@ void kbase_tlstream_reset(struct kbase_tlstream *stream) /* Configuration of timeline streams generated by kernel. */ static const struct { enum tl_packet_family pkt_family; - enum tl_packet_class pkt_class; - enum tl_packet_type pkt_type; - enum tl_stream_id stream_id; + enum tl_packet_class pkt_class; + enum tl_packet_type pkt_type; + enum tl_stream_id stream_id; } tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { { TL_PACKET_FAMILY_TL, @@ -139,10 +130,8 @@ static const struct { #endif }; -void kbase_tlstream_init( - struct kbase_tlstream *stream, - enum tl_stream_type stream_type, - wait_queue_head_t *ready_read) +void kbase_tlstream_init(struct kbase_tlstream *stream, enum tl_stream_type stream_type, + wait_queue_head_t *ready_read) { unsigned int i; @@ -158,13 +147,11 @@ void kbase_tlstream_init( stream->numbered = 0; for (i = 0; i < PACKET_COUNT; i++) - kbasep_packet_header_setup( - stream->buffer[i].data, - tl_stream_cfg[stream_type].pkt_family, - tl_stream_cfg[stream_type].pkt_class, - tl_stream_cfg[stream_type].pkt_type, - tl_stream_cfg[stream_type].stream_id, - stream->numbered); + kbasep_packet_header_setup(stream->buffer[i].data, + tl_stream_cfg[stream_type].pkt_family, + tl_stream_cfg[stream_type].pkt_class, + tl_stream_cfg[stream_type].pkt_type, + tl_stream_cfg[stream_type].stream_id, stream->numbered); #if MALI_UNIT_TEST atomic_set(&stream->bytes_generated, 0); @@ -193,25 +180,19 @@ void kbase_tlstream_term(struct kbase_tlstream *stream) * * Warning: the user must update the stream structure with returned value. */ -static size_t kbasep_tlstream_msgbuf_submit( - struct kbase_tlstream *stream, - unsigned int wb_idx_raw, - unsigned int wb_size) +static size_t kbasep_tlstream_msgbuf_submit(struct kbase_tlstream *stream, unsigned int wb_idx_raw, + unsigned int wb_size) { unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; /* Set stream as flushed. */ atomic_set(&stream->autoflush_counter, -1); - kbasep_packet_header_update( - stream->buffer[wb_idx].data, - wb_size - PACKET_HEADER_SIZE, - stream->numbered); + kbasep_packet_header_update(stream->buffer[wb_idx].data, wb_size - PACKET_HEADER_SIZE, + stream->numbered); if (stream->numbered) - kbasep_packet_number_update( - stream->buffer[wb_idx].data, - wb_idx_raw); + kbasep_packet_number_update(stream->buffer[wb_idx].data, wb_idx_raw); /* Increasing write buffer index will expose this packet to the reader. * As stream->lock is not taken on reader side we must make sure memory @@ -230,30 +211,25 @@ static size_t kbasep_tlstream_msgbuf_submit( return wb_size; } -char *kbase_tlstream_msgbuf_acquire( - struct kbase_tlstream *stream, - size_t msg_size, - unsigned long *flags) __acquires(&stream->lock) +char *kbase_tlstream_msgbuf_acquire(struct kbase_tlstream *stream, size_t msg_size, + unsigned long *flags) __acquires(&stream->lock) { - unsigned int wb_idx_raw; - unsigned int wb_idx; - size_t wb_size; + unsigned int wb_idx_raw; + unsigned int wb_idx; + size_t wb_size; - KBASE_DEBUG_ASSERT( - PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= - msg_size); + KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= msg_size); spin_lock_irqsave(&stream->lock, *flags); wb_idx_raw = atomic_read(&stream->wbi); - wb_idx = wb_idx_raw % PACKET_COUNT; - wb_size = atomic_read(&stream->buffer[wb_idx].size); + wb_idx = wb_idx_raw % PACKET_COUNT; + wb_size = atomic_read(&stream->buffer[wb_idx].size); /* Select next buffer if data will not fit into current one. */ if (wb_size + msg_size > PACKET_SIZE) { - wb_size = kbasep_tlstream_msgbuf_submit( - stream, wb_idx_raw, wb_size); - wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + wb_size = kbasep_tlstream_msgbuf_submit(stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; } /* Reserve space in selected buffer. */ @@ -266,9 +242,8 @@ char *kbase_tlstream_msgbuf_acquire( return &stream->buffer[wb_idx].data[wb_size]; } -void kbase_tlstream_msgbuf_release( - struct kbase_tlstream *stream, - unsigned long flags) __releases(&stream->lock) +void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream, unsigned long flags) + __releases(&stream->lock) { /* Mark stream as containing unflushed data. */ atomic_set(&stream->autoflush_counter, 0); @@ -276,15 +251,13 @@ void kbase_tlstream_msgbuf_release( spin_unlock_irqrestore(&stream->lock, flags); } -size_t kbase_tlstream_flush_stream( - struct kbase_tlstream *stream) +size_t kbase_tlstream_flush_stream(struct kbase_tlstream *stream) { - unsigned long flags; - unsigned int wb_idx_raw; - unsigned int wb_idx; - size_t wb_size; - size_t min_size = PACKET_HEADER_SIZE; - + unsigned long flags; + unsigned int wb_idx_raw; + unsigned int wb_idx; + size_t wb_size; + size_t min_size = PACKET_HEADER_SIZE; if (stream->numbered) min_size += PACKET_NUMBER_SIZE; @@ -292,12 +265,11 @@ size_t kbase_tlstream_flush_stream( spin_lock_irqsave(&stream->lock, flags); wb_idx_raw = atomic_read(&stream->wbi); - wb_idx = wb_idx_raw % PACKET_COUNT; - wb_size = atomic_read(&stream->buffer[wb_idx].size); + wb_idx = wb_idx_raw % PACKET_COUNT; + wb_size = atomic_read(&stream->buffer[wb_idx].size); if (wb_size > min_size) { - wb_size = kbasep_tlstream_msgbuf_submit( - stream, wb_idx_raw, wb_size); + wb_size = kbasep_tlstream_msgbuf_submit(stream, wb_idx_raw, wb_size); wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; atomic_set(&stream->buffer[wb_idx].size, wb_size); } else { diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h index c1428495b11c..fe3430ee5c6d 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -80,7 +80,7 @@ struct kbase_tlstream { atomic_t wbi; atomic_t rbi; - int numbered; + int numbered; atomic_t autoflush_counter; wait_queue_head_t *ready_read; #if MALI_UNIT_TEST @@ -107,9 +107,8 @@ enum tl_stream_type { * @ready_read: Pointer to a wait queue to signal when * timeline messages are ready for collection. */ -void kbase_tlstream_init(struct kbase_tlstream *stream, - enum tl_stream_type stream_type, - wait_queue_head_t *ready_read); +void kbase_tlstream_init(struct kbase_tlstream *stream, enum tl_stream_type stream_type, + wait_queue_head_t *ready_read); /** * kbase_tlstream_term - terminate timeline stream @@ -140,8 +139,8 @@ void kbase_tlstream_reset(struct kbase_tlstream *stream); * Only atomic operations are allowed while the stream is locked * (i.e. do not use any operation that may sleep). */ -char *kbase_tlstream_msgbuf_acquire(struct kbase_tlstream *stream, - size_t msg_size, unsigned long *flags) __acquires(&stream->lock); +char *kbase_tlstream_msgbuf_acquire(struct kbase_tlstream *stream, size_t msg_size, + unsigned long *flags) __acquires(&stream->lock); /** * kbase_tlstream_msgbuf_release - unlock selected stream @@ -151,8 +150,8 @@ char *kbase_tlstream_msgbuf_acquire(struct kbase_tlstream *stream, * Release the stream that has been previously * locked with a call to kbase_tlstream_msgbuf_acquire(). */ -void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream, - unsigned long flags) __releases(&stream->lock); +void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream, unsigned long flags) + __releases(&stream->lock); /** * kbase_tlstream_flush_stream - flush stream diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c index f62c75583566..742735846d49 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c @@ -358,8 +358,8 @@ enum tl_msg_id_obj { "atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \ "New KBase Device", \ - "@IIIIIII", \ - "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep") \ + "@IIIIIIII", \ + "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep,kbase_device_has_vd54d34dbb40917c8cea48cca407a8789413be0db") \ TRACEPOINT_DESC(KBASE_TL_KBASE_GPUCMDQUEUE_KICK, \ "Kernel receives a request to process new GPU queue instructions", \ "@IL", \ @@ -2093,7 +2093,8 @@ void __kbase_tlstream_tl_kbase_new_device( u32 kbase_device_as_count, u32 kbase_device_sb_entry_count, u32 kbase_device_has_cross_stream_sync, - u32 kbase_device_supports_gpu_sleep + u32 kbase_device_supports_gpu_sleep, + u32 kbase_device_has_vd54d34dbb40917c8cea48cca407a8789413be0db ) { const u32 msg_id = KBASE_TL_KBASE_NEW_DEVICE; @@ -2105,6 +2106,7 @@ void __kbase_tlstream_tl_kbase_new_device( + sizeof(kbase_device_sb_entry_count) + sizeof(kbase_device_has_cross_stream_sync) + sizeof(kbase_device_supports_gpu_sleep) + + sizeof(kbase_device_has_vd54d34dbb40917c8cea48cca407a8789413be0db) ; char *buffer; unsigned long acq_flags; @@ -2128,6 +2130,8 @@ void __kbase_tlstream_tl_kbase_new_device( pos, &kbase_device_has_cross_stream_sync, sizeof(kbase_device_has_cross_stream_sync)); pos = kbasep_serialize_bytes(buffer, pos, &kbase_device_supports_gpu_sleep, sizeof(kbase_device_supports_gpu_sleep)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_has_vd54d34dbb40917c8cea48cca407a8789413be0db, sizeof(kbase_device_has_vd54d34dbb40917c8cea48cca407a8789413be0db)); kbase_tlstream_msgbuf_release(stream, acq_flags); } diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h index 06e4ca4a6bc2..8e09c286a066 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h @@ -77,6 +77,11 @@ extern const size_t aux_desc_header_size; #define TL_JS_EVENT_STOP GATOR_JOB_SLOT_STOP #define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED +#define TL_PM_STATE_SHADER 0x100 +#define TL_PM_STATE_TILER 0x110 +#define TL_PM_STATE_L2 0x120 +#define TL_PM_STATE_STACK 0xE00 + #define TLSTREAM_ENABLED (1u << 31) void __kbase_tlstream_tl_new_ctx( @@ -393,7 +398,8 @@ void __kbase_tlstream_tl_kbase_new_device( u32 kbase_device_as_count, u32 kbase_device_sb_entry_count, u32 kbase_device_has_cross_stream_sync, - u32 kbase_device_supports_gpu_sleep + u32 kbase_device_supports_gpu_sleep, + u32 kbase_device_has_vd54d34dbb40917c8cea48cca407a8789413be0db ); void __kbase_tlstream_tl_kbase_gpucmdqueue_kick( @@ -1996,6 +2002,7 @@ struct kbase_tlstream; * @kbase_device_sb_entry_count: The number of entries each scoreboard set in the physical hardware has available * @kbase_device_has_cross_stream_sync: Whether cross-stream synchronization is supported * @kbase_device_supports_gpu_sleep: Whether GPU sleep is supported + * @kbase_device_has_vd54d34dbb40917c8cea48cca407a8789413be0db: Whether v34932631451e2dea4ed0fab0025a0d2767d5e427 is supported */ #if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ @@ -2006,7 +2013,8 @@ struct kbase_tlstream; kbase_device_as_count, \ kbase_device_sb_entry_count, \ kbase_device_has_cross_stream_sync, \ - kbase_device_supports_gpu_sleep \ + kbase_device_supports_gpu_sleep, \ + kbase_device_has_vd54d34dbb40917c8cea48cca407a8789413be0db \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ @@ -2019,7 +2027,8 @@ struct kbase_tlstream; kbase_device_as_count, \ kbase_device_sb_entry_count, \ kbase_device_has_cross_stream_sync, \ - kbase_device_supports_gpu_sleep \ + kbase_device_supports_gpu_sleep, \ + kbase_device_has_vd54d34dbb40917c8cea48cca407a8789413be0db \ ); \ } while (0) #else @@ -2031,7 +2040,8 @@ struct kbase_tlstream; kbase_device_as_count, \ kbase_device_sb_entry_count, \ kbase_device_has_cross_stream_sync, \ - kbase_device_supports_gpu_sleep \ + kbase_device_supports_gpu_sleep, \ + kbase_device_has_vd54d34dbb40917c8cea48cca407a8789413be0db \ ) \ do { } while (0) #endif /* MALI_USE_CSF */ diff --git a/drivers/hwtracing/coresight/mali/Kconfig b/drivers/hwtracing/coresight/mali/Kconfig index 283e2b56b641..6ea8786e604d 100644 --- a/drivers/hwtracing/coresight/mali/Kconfig +++ b/drivers/hwtracing/coresight/mali/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software diff --git a/drivers/hwtracing/coresight/mali/Makefile b/drivers/hwtracing/coresight/mali/Makefile index a6b5622c92ae..923cb0c910d9 100644 --- a/drivers/hwtracing/coresight/mali/Makefile +++ b/drivers/hwtracing/coresight/mali/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -20,29 +20,34 @@ KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build KDIR ?= $(KERNEL_SRC) +M ?= $(shell pwd) ifeq ($(KDIR),) $(error Must specify KDIR to point to the kernel to target)) endif -CONFIG_MALI_CORESIGHT ?= n -ifeq ($(CONFIG_MALI_CORESIGHT),y) +CONFIGS := - ifeq ($(CONFIG_ARM64), y) - CONFIG_CORESIGHT_MALI_SOURCES_ETM ?= y +ifeq ($(MALI_KCONFIG_EXT_PREFIX),) + CONFIG_MALI_CORESIGHT ?= n + ifeq ($(CONFIG_MALI_CORESIGHT),y) + + ifeq ($(CONFIG_ARM64), y) + CONFIG_CORESIGHT_MALI_SOURCES_ETM ?= y + endif + + CONFIG_CORESIGHT_MALI_SOURCES_ITM ?= y + CONFIG_CORESIGHT_MALI_SOURCES_ELA ?= y endif - CONFIG_CORESIGHT_MALI_SOURCES_ITM ?= y - CONFIG_CORESIGHT_MALI_SOURCES_ELA ?= y + CONFIGS += \ + CONFIG_MALI_CORESIGHT \ + CONFIG_CORESIGHT_MALI_SOURCES_ETM \ + CONFIG_CORESIGHT_MALI_SOURCES_ITM \ + CONFIG_CORESIGHT_MALI_SOURCES_ELA + endif -CONFIGS := \ - CONFIG_MALI_CORESIGHT \ - CONFIG_CORESIGHT_MALI_SOURCES_ETM \ - CONFIG_CORESIGHT_MALI_SOURCES_ITM \ - CONFIG_CORESIGHT_MALI_SOURCES_ELA - - # # MAKE_ARGS to pass the custom CONFIGs on out-of-tree build # @@ -68,34 +73,40 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ # # KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions # -EXTRA_SYMBOLS += $(CURDIR)/../../../../../drivers/gpu/arm/midgard/Module.symvers -EXTRA_SYMBOLS += $(CURDIR)/../../../../../drivers/hwtracing/coresight/mali/Module.symvers +GPU_SYMBOLS = $(M)/../../../gpu/arm/Module.symvers +ifeq ($(MALI_KCONFIG_EXT_PREFIX),) + GPU_SYMBOLS = $(M)/../../../gpu/arm/midgard/Module.symvers +endif + +EXTRA_SYMBOLS += \ + $(M)/../../../base/arm/Module.symvers \ + $(GPU_SYMBOLS) # The following were added to align with W=1 in scripts/Makefile.extrawarn # from the Linux source tree -KBUILD_CFLAGS += -Wall -Werror -KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter -KBUILD_CFLAGS += -Wmissing-declarations -KBUILD_CFLAGS += -Wmissing-format-attribute -KBUILD_CFLAGS += -Wmissing-prototypes -KBUILD_CFLAGS += -Wold-style-definition -KBUILD_CFLAGS += -Wmissing-include-dirs -KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) -KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) -KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) -KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) +CFLAGS_MODULE += -Wall -Werror +CFLAGS_MODULE += -Wextra -Wunused -Wno-unused-parameter +CFLAGS_MODULE += -Wmissing-declarations +CFLAGS_MODULE += -Wmissing-format-attribute +CFLAGS_MODULE += -Wmissing-prototypes +CFLAGS_MODULE += -Wold-style-definition +CFLAGS_MODULE += -Wmissing-include-dirs +CFLAGS_MODULE += $(call cc-option, -Wunused-but-set-variable) +CFLAGS_MODULE += $(call cc-option, -Wunused-const-variable) +CFLAGS_MODULE += $(call cc-option, -Wpacked-not-aligned) +CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra -KBUILD_CFLAGS += -Wno-missing-field-initializers -KBUILD_CFLAGS += -Wno-sign-compare -KBUILD_CFLAGS += -Wno-type-limits +CFLAGS_MODULE += -Wno-missing-field-initializers +CFLAGS_MODULE += -Wno-sign-compare +CFLAGS_MODULE += -Wno-type-limits KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 all: - $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules modules_install: - $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) modules_install clean: - $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) clean diff --git a/drivers/hwtracing/coresight/mali/build.bp b/drivers/hwtracing/coresight/mali/build.bp index 824ae54c9e43..33dcd22fa364 100644 --- a/drivers/hwtracing/coresight/mali/build.bp +++ b/drivers/hwtracing/coresight/mali/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -87,7 +87,7 @@ bob_kernel_module { ], srcs: [ "sources/ela/coresight_mali_source_ela_core.c", - "sources/ela/coresight-ela600.h" + "sources/ela/coresight-ela600.h", ], extra_symbols: [ "mali_kbase", diff --git a/drivers/hwtracing/coresight/mali/coresight_mali_common.h b/drivers/hwtracing/coresight/mali/coresight_mali_common.h index 43154c1f639d..9f928f038cdc 100644 --- a/drivers/hwtracing/coresight/mali/coresight_mali_common.h +++ b/drivers/hwtracing/coresight/mali/coresight_mali_common.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,58 +26,58 @@ #include /* Macros for CoreSight OP types. */ -#define WRITE_IMM_OP(_reg_addr, _val) \ - { \ - .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM, \ - .op.write_imm.reg_addr = _reg_addr, .op.write_imm.val = _val \ +#define WRITE_IMM_OP(_reg_addr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM, \ + .op.write_imm.reg_addr = _reg_addr, .op.write_imm.val = _val \ } -#define WRITE_RANGE_OP(_reg_start, _reg_end, _val) \ - { \ - .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE, \ - .op.write_imm_range.reg_start = _reg_start, \ - .op.write_imm_range.reg_end = _reg_end, .op.write_imm_range.val = _val \ +#define WRITE_RANGE_OP(_reg_start, _reg_end, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE, \ + .op.write_imm_range.reg_start = _reg_start, \ + .op.write_imm_range.reg_end = _reg_end, .op.write_imm_range.val = _val \ } -#define WRITE_PTR_OP(_reg_addr, _ptr) \ - { \ - .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE, .op.write.reg_addr = _reg_addr, \ - .op.write.ptr = _ptr \ +#define WRITE_PTR_OP(_reg_addr, _ptr) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE, .op.write.reg_addr = _reg_addr, \ + .op.write.ptr = _ptr \ } -#define READ_OP(_reg_addr, _ptr) \ - { \ - .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ, .op.read.reg_addr = _reg_addr, \ - .op.read.ptr = _ptr \ +#define READ_OP(_reg_addr, _ptr) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ, .op.read.reg_addr = _reg_addr, \ + .op.read.ptr = _ptr \ } -#define POLL_OP(_reg_addr, _mask, _val) \ - { \ - .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL, .op.poll.reg_addr = _reg_addr, \ - .op.poll.mask = _mask, .op.poll.val = _val \ +#define POLL_OP(_reg_addr, _mask, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL, .op.poll.reg_addr = _reg_addr, \ + .op.poll.mask = _mask, .op.poll.val = _val \ } -#define BIT_OR_OP(_ptr, _val) \ - { \ - .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR, .op.bitw.ptr = _ptr, \ - .op.bitw.val = _val \ +#define BIT_OR_OP(_ptr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR, .op.bitw.ptr = _ptr, \ + .op.bitw.val = _val \ } -#define BIT_XOR_OP(_ptr, _val) \ - { \ - .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR, .op.bitw.ptr = _ptr, \ - .op.bitw.val = _val \ +#define BIT_XOR_OP(_ptr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR, .op.bitw.ptr = _ptr, \ + .op.bitw.val = _val \ } -#define BIT_AND_OP(_ptr, _val) \ - { \ - .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND, .op.bitw.ptr = _ptr, \ - .op.bitw.val = _val \ +#define BIT_AND_OP(_ptr, _val) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND, .op.bitw.ptr = _ptr, \ + .op.bitw.val = _val \ } -#define BIT_NOT_OP(_ptr) \ - { \ - .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT, .op.bitw.ptr = _ptr, \ +#define BIT_NOT_OP(_ptr) \ + { \ + .type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT, .op.bitw.ptr = _ptr, \ } #ifndef CS_MALI_UNLOCK_COMPONENT diff --git a/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c b/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c index 0da37a75ecfb..286c93a95083 100644 --- a/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c +++ b/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,103 +35,177 @@ #define CS_ELA_BASE_ADDR 0xE0043000 #define CS_GPU_COMMAND_ADDR 0x40003030 #define CS_GPU_COMMAND_TRACE_CONTROL_EN 0x000001DC -#define CS_ELA_MAX_SIGNAL_GROUPS 12U -#define CS_SG_NAME_MAX_LEN 10U -#define CS_ELA_NR_SIG_REGS 8U #define NELEMS(s) (sizeof(s) / sizeof((s)[0])) -#define CS_ELA_SIGREGS_ATTR_RW(_a, _b) \ - static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ - char *const buf) \ - { \ - return sprintf_regs(buf, CS_ELA_##_b##_0, CS_ELA_##_b##_7); \ - } \ - static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ - const char *buf, size_t count) \ - { \ - return verify_store_8_regs(dev, buf, count, CS_ELA_##_b##_0); \ - } \ - static DEVICE_ATTR_RW(_a) +#define CS_ELA_DYN_REGS_ATTR_RW(_regname) \ + static ssize_t _regname##_show(struct device *dev, struct device_attribute *attr, \ + char *const buf) \ + { \ + return sprintf_reg(buf, CS_ELA_##_regname); \ + } \ + static ssize_t _regname##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + return verify_store_reg(dev, buf, count, CS_ELA_##_regname); \ + } \ + static DEVICE_ATTR_RW(_regname) + +#define CS_ELA_DYN_REGS_ATTR_RW_TRIG_STATE(_signo) \ + CS_ELA_DYN_REGS_ATTR_RW(SIGSEL##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(TRIGCTRL##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(NEXTSTATE##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(ACTION##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(ALTNEXTSTATE##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(ALTACTION##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(COMPCTRL##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(ALTCOMPCTRL##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(COUNTCOMP##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(TWBSEL##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(EXTMASK##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(EXTCOMP##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(QUALMASK##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(QUALCOMP##_signo); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGMASK##_signo##_0); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGMASK##_signo##_1); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGMASK##_signo##_2); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGMASK##_signo##_3); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGMASK##_signo##_4); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGMASK##_signo##_5); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGMASK##_signo##_6); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGMASK##_signo##_7); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGCOMP##_signo##_0); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGCOMP##_signo##_1); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGCOMP##_signo##_2); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGCOMP##_signo##_3); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGCOMP##_signo##_4); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGCOMP##_signo##_5); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGCOMP##_signo##_6); \ + CS_ELA_DYN_REGS_ATTR_RW(SIGCOMP##_signo##_7) + +#define CS_ELA_DYN_REGS_ATTR(_regname) &dev_attr_##_regname.attr + +#define CS_ELA_DYN_REGS_ATTR_TRIG_STATE(_signo) \ + CS_ELA_DYN_REGS_ATTR(SIGSEL##_signo), CS_ELA_DYN_REGS_ATTR(TRIGCTRL##_signo), \ + CS_ELA_DYN_REGS_ATTR(NEXTSTATE##_signo), CS_ELA_DYN_REGS_ATTR(ACTION##_signo), \ + CS_ELA_DYN_REGS_ATTR(ALTNEXTSTATE##_signo), \ + CS_ELA_DYN_REGS_ATTR(ALTACTION##_signo), CS_ELA_DYN_REGS_ATTR(COMPCTRL##_signo), \ + CS_ELA_DYN_REGS_ATTR(ALTCOMPCTRL##_signo), \ + CS_ELA_DYN_REGS_ATTR(COUNTCOMP##_signo), CS_ELA_DYN_REGS_ATTR(TWBSEL##_signo), \ + CS_ELA_DYN_REGS_ATTR(EXTMASK##_signo), CS_ELA_DYN_REGS_ATTR(EXTCOMP##_signo), \ + CS_ELA_DYN_REGS_ATTR(QUALMASK##_signo), CS_ELA_DYN_REGS_ATTR(QUALCOMP##_signo), \ + CS_ELA_DYN_REGS_ATTR(SIGMASK##_signo##_0), \ + CS_ELA_DYN_REGS_ATTR(SIGMASK##_signo##_1), \ + CS_ELA_DYN_REGS_ATTR(SIGMASK##_signo##_2), \ + CS_ELA_DYN_REGS_ATTR(SIGMASK##_signo##_3), \ + CS_ELA_DYN_REGS_ATTR(SIGMASK##_signo##_4), \ + CS_ELA_DYN_REGS_ATTR(SIGMASK##_signo##_5), \ + CS_ELA_DYN_REGS_ATTR(SIGMASK##_signo##_6), \ + CS_ELA_DYN_REGS_ATTR(SIGMASK##_signo##_7), \ + CS_ELA_DYN_REGS_ATTR(SIGCOMP##_signo##_0), \ + CS_ELA_DYN_REGS_ATTR(SIGCOMP##_signo##_1), \ + CS_ELA_DYN_REGS_ATTR(SIGCOMP##_signo##_2), \ + CS_ELA_DYN_REGS_ATTR(SIGCOMP##_signo##_3), \ + CS_ELA_DYN_REGS_ATTR(SIGCOMP##_signo##_4), \ + CS_ELA_DYN_REGS_ATTR(SIGCOMP##_signo##_5), \ + CS_ELA_DYN_REGS_ATTR(SIGCOMP##_signo##_6), \ + CS_ELA_DYN_REGS_ATTR(SIGCOMP##_signo##_7) + +#define WRITE_PTR_OP_CS_ELA_DYN_REGS_TRIG_STATE(_signo) \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(_signo), \ + &ela_state.regs[CS_ELA_SIGSEL##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TRIGCTRL(_signo), \ + &ela_state.regs[CS_ELA_TRIGCTRL##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_NEXTSTATE(_signo), \ + &ela_state.regs[CS_ELA_NEXTSTATE##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ACTION(_signo), \ + &ela_state.regs[CS_ELA_ACTION##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTNEXTSTATE(_signo), \ + &ela_state.regs[CS_ELA_ALTNEXTSTATE##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTACTION(_signo), \ + &ela_state.regs[CS_ELA_ALTACTION##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(_signo), \ + &ela_state.regs[CS_ELA_COMPCTRL##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(_signo), \ + &ela_state.regs[CS_ELA_ALTCOMPCTRL##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_COUNTCOMP(_signo), \ + &ela_state.regs[CS_ELA_COUNTCOMP##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TWBSEL(_signo), \ + &ela_state.regs[CS_ELA_TWBSEL##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_EXTMASK(_signo), \ + &ela_state.regs[CS_ELA_EXTMASK##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_EXTCOMP(_signo), \ + &ela_state.regs[CS_ELA_EXTCOMP##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_QUALMASK(_signo), \ + &ela_state.regs[CS_ELA_QUALMASK##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_QUALCOMP(_signo), \ + &ela_state.regs[CS_ELA_QUALCOMP##_signo]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(_signo, 0), \ + &ela_state.regs[CS_ELA_SIGMASK##_signo##_0]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(_signo, 1), \ + &ela_state.regs[CS_ELA_SIGMASK##_signo##_1]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(_signo, 2), \ + &ela_state.regs[CS_ELA_SIGMASK##_signo##_2]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(_signo, 3), \ + &ela_state.regs[CS_ELA_SIGMASK##_signo##_3]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(_signo, 4), \ + &ela_state.regs[CS_ELA_SIGMASK##_signo##_4]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(_signo, 5), \ + &ela_state.regs[CS_ELA_SIGMASK##_signo##_5]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(_signo, 6), \ + &ela_state.regs[CS_ELA_SIGMASK##_signo##_6]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(_signo, 7), \ + &ela_state.regs[CS_ELA_SIGMASK##_signo##_7]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(_signo, 0), \ + &ela_state.regs[CS_ELA_SIGCOMP##_signo##_0]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(_signo, 1), \ + &ela_state.regs[CS_ELA_SIGCOMP##_signo##_1]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(_signo, 2), \ + &ela_state.regs[CS_ELA_SIGCOMP##_signo##_2]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(_signo, 3), \ + &ela_state.regs[CS_ELA_SIGCOMP##_signo##_3]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(_signo, 4), \ + &ela_state.regs[CS_ELA_SIGCOMP##_signo##_4]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(_signo, 5), \ + &ela_state.regs[CS_ELA_SIGCOMP##_signo##_5]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(_signo, 6), \ + &ela_state.regs[CS_ELA_SIGCOMP##_signo##_6]), \ + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(_signo, 7), \ + &ela_state.regs[CS_ELA_SIGCOMP##_signo##_7]) + +#define CS_ELA_DYN_REG_ENUM_TRIG_STATE(_signo) \ + CS_ELA_SIGSEL##_signo, CS_ELA_TRIGCTRL##_signo, CS_ELA_NEXTSTATE##_signo, \ + CS_ELA_ACTION##_signo, CS_ELA_ALTNEXTSTATE##_signo, CS_ELA_ALTACTION##_signo, \ + CS_ELA_COMPCTRL##_signo, CS_ELA_ALTCOMPCTRL##_signo, CS_ELA_COUNTCOMP##_signo, \ + CS_ELA_TWBSEL##_signo, CS_ELA_EXTMASK##_signo, CS_ELA_EXTCOMP##_signo, \ + CS_ELA_QUALMASK##_signo, CS_ELA_QUALCOMP##_signo, CS_ELA_SIGMASK##_signo##_0, \ + CS_ELA_SIGMASK##_signo##_1, CS_ELA_SIGMASK##_signo##_2, \ + CS_ELA_SIGMASK##_signo##_3, CS_ELA_SIGMASK##_signo##_4, \ + CS_ELA_SIGMASK##_signo##_5, CS_ELA_SIGMASK##_signo##_6, \ + CS_ELA_SIGMASK##_signo##_7, CS_ELA_SIGCOMP##_signo##_0, \ + CS_ELA_SIGCOMP##_signo##_1, CS_ELA_SIGCOMP##_signo##_2, \ + CS_ELA_SIGCOMP##_signo##_3, CS_ELA_SIGCOMP##_signo##_4, \ + CS_ELA_SIGCOMP##_signo##_5, CS_ELA_SIGCOMP##_signo##_6, CS_ELA_SIGCOMP##_signo##_7 enum cs_ela_dynamic_regs { CS_ELA_TIMECTRL, CS_ELA_TSSR, + CS_ELA_ATBCTRL, + CS_ELA_PTACTION, + CS_ELA_AUXCTRL, + CS_ELA_CNTSEL, - CS_ELA_SIGSEL0, - CS_ELA_COMPCTRL0, - CS_ELA_ALTCOMPCTRL0, - CS_ELA_TWBSEL0, - CS_ELA_QUALMASK0, - CS_ELA_QUALCOMP0, - CS_ELA_SIGMASK0_0, - CS_ELA_SIGMASK0_1, - CS_ELA_SIGMASK0_2, - CS_ELA_SIGMASK0_3, - CS_ELA_SIGMASK0_4, - CS_ELA_SIGMASK0_5, - CS_ELA_SIGMASK0_6, - CS_ELA_SIGMASK0_7, - CS_ELA_SIGCOMP0_0, - CS_ELA_SIGCOMP0_1, - CS_ELA_SIGCOMP0_2, - CS_ELA_SIGCOMP0_3, - CS_ELA_SIGCOMP0_4, - CS_ELA_SIGCOMP0_5, - CS_ELA_SIGCOMP0_6, - CS_ELA_SIGCOMP0_7, - - CS_ELA_SIGSEL4, - CS_ELA_NEXTSTATE4, - CS_ELA_ACTION4, - CS_ELA_ALTNEXTSTATE4, - CS_ELA_COMPCTRL4, - CS_ELA_TWBSEL4, - CS_ELA_SIGMASK4_0, - CS_ELA_SIGMASK4_1, - CS_ELA_SIGMASK4_2, - CS_ELA_SIGMASK4_3, - CS_ELA_SIGMASK4_4, - CS_ELA_SIGMASK4_5, - CS_ELA_SIGMASK4_6, - CS_ELA_SIGMASK4_7, - CS_ELA_SIGCOMP4_0, - CS_ELA_SIGCOMP4_1, - CS_ELA_SIGCOMP4_2, - CS_ELA_SIGCOMP4_3, - CS_ELA_SIGCOMP4_4, - CS_ELA_SIGCOMP4_5, - CS_ELA_SIGCOMP4_6, - CS_ELA_SIGCOMP4_7, + CS_ELA_DYN_REG_ENUM_TRIG_STATE(0), + CS_ELA_DYN_REG_ENUM_TRIG_STATE(1), + CS_ELA_DYN_REG_ENUM_TRIG_STATE(2), + CS_ELA_DYN_REG_ENUM_TRIG_STATE(3), + CS_ELA_DYN_REG_ENUM_TRIG_STATE(4), CS_ELA_NR_DYN_REGS }; -enum cs_ela_tracemodes { - CS_ELA_TRACEMODE_NONE, - CS_ELA_TRACEMODE_JCN, - CS_ELA_TRACEMODE_CEU_EXEC, - CS_ELA_TRACEMODE_CEU_CMDS, - CS_ELA_TRACEMODE_MCU_AHBP, - CS_ELA_TRACEMODE_HOST_AXI, - CS_ELA_NR_TRACEMODE -}; - -enum cs_ela_signal_types { - CS_ELA_SIGTYPE_JCN_REQ, - CS_ELA_SIGTYPE_JCN_RES, - CS_ELA_SIGTYPE_CEU_EXEC, - CS_ELA_SIGTYPE_CEU_CMDS, - CS_ELA_SIGTYPE_MCU_AHBP, - CS_ELA_SIGTYPE_HOST_AXI, - CS_ELA_NR_SIGTYPE, -}; - struct cs_ela_state { - enum cs_ela_tracemodes tracemode; - u32 supported_tracemodes; int enabled; - u32 signal_types[CS_ELA_NR_SIGTYPE]; u32 regs[CS_ELA_NR_DYN_REGS]; }; @@ -141,258 +215,91 @@ static char *type_name = "mali-source-ela"; static struct cs_ela_state ela_state = { 0 }; -/* Setup ELA sysfs attributes */ -static char *tracemode_names[] = { - [CS_ELA_TRACEMODE_NONE] = "NONE", [CS_ELA_TRACEMODE_JCN] = "JCN", - [CS_ELA_TRACEMODE_CEU_EXEC] = "CEU_EXEC", [CS_ELA_TRACEMODE_CEU_CMDS] = "CEU_CMDS", - [CS_ELA_TRACEMODE_MCU_AHBP] = "MCU_AHBP", [CS_ELA_TRACEMODE_HOST_AXI] = "HOST_AXI", -}; - -static char *signal_type_names[] = { - [CS_ELA_SIGTYPE_JCN_REQ] = "jcn-request", [CS_ELA_SIGTYPE_JCN_RES] = "jcn-response", - [CS_ELA_SIGTYPE_CEU_EXEC] = "ceu-execution", [CS_ELA_SIGTYPE_CEU_CMDS] = "ceu-commands", - [CS_ELA_SIGTYPE_MCU_AHBP] = "mcu-ahbp", [CS_ELA_SIGTYPE_HOST_AXI] = "host-axi", -}; - -static int signal_type_tracemode_map[] = { - [CS_ELA_SIGTYPE_JCN_REQ] = CS_ELA_TRACEMODE_JCN, - [CS_ELA_SIGTYPE_JCN_RES] = CS_ELA_TRACEMODE_JCN, - [CS_ELA_SIGTYPE_CEU_EXEC] = CS_ELA_TRACEMODE_CEU_EXEC, - [CS_ELA_SIGTYPE_CEU_CMDS] = CS_ELA_TRACEMODE_CEU_CMDS, - [CS_ELA_SIGTYPE_MCU_AHBP] = CS_ELA_TRACEMODE_MCU_AHBP, - [CS_ELA_SIGTYPE_HOST_AXI] = CS_ELA_TRACEMODE_HOST_AXI, -}; - -static void setup_tracemode_registers(int tracemode) +static void reset_dynamic_registers(void) { - switch (tracemode) { - case CS_ELA_TRACEMODE_NONE: - /* Perform full reset of all dynamic registers */ - memset(ela_state.regs, 0x00000000, sizeof(u32) * CS_ELA_NR_DYN_REGS); - - ela_state.tracemode = CS_ELA_TRACEMODE_NONE; - break; - case CS_ELA_TRACEMODE_JCN: - - if (ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ] == - ela_state.signal_types[CS_ELA_SIGTYPE_JCN_RES]) { - ela_state.regs[CS_ELA_TSSR] = 0x00000000; - - ela_state.regs[CS_ELA_SIGSEL0] = - ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ]; - - ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000010; - ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x00001000; - ela_state.regs[CS_ELA_TWBSEL0] = 0x0000FFFF; - ela_state.regs[CS_ELA_QUALMASK0] = 0x00000000; - ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000000; - - memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, - sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); - ela_state.regs[CS_ELA_SIGMASK0_1] = 0x80000000; - ela_state.regs[CS_ELA_SIGMASK0_3] = 0x80000000; - ela_state.regs[CS_ELA_SIGCOMP0_1] = 0x80000000; - ela_state.regs[CS_ELA_SIGCOMP0_3] = 0x80000000; - - memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000, - sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1)); - - ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111; - - } else { - ela_state.regs[CS_ELA_TSSR] = 0x00000010; - - ela_state.regs[CS_ELA_SIGSEL0] = - ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ]; - - ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000100; - ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111; - ela_state.regs[CS_ELA_TWBSEL0] = 0x00000FFF; - ela_state.regs[CS_ELA_QUALMASK0] = 0x00000000; - ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000000; - - memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, - sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); - ela_state.regs[CS_ELA_SIGMASK0_2] |= 0x80000000; - ela_state.regs[CS_ELA_SIGCOMP0_2] |= 0x80000000; - - ela_state.regs[CS_ELA_SIGSEL4] = - ela_state.signal_types[CS_ELA_SIGTYPE_JCN_RES]; - ela_state.regs[CS_ELA_NEXTSTATE4] = 0x00000010; - ela_state.regs[CS_ELA_ACTION4] = 0x00000008; - ela_state.regs[CS_ELA_ALTNEXTSTATE4] = 0x00000001; - ela_state.regs[CS_ELA_COMPCTRL4] = 0x00000100; - ela_state.regs[CS_ELA_TWBSEL4] = 0x00000FFF; - - memset(&ela_state.regs[CS_ELA_SIGMASK4_0], 0x00000000, - sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGMASK4_0 + 1)); - ela_state.regs[CS_ELA_SIGMASK4_2] |= 0x80000000; - ela_state.regs[CS_ELA_SIGCOMP4_2] |= 0x80000000; - } - - break; - case CS_ELA_TRACEMODE_CEU_EXEC: - case CS_ELA_TRACEMODE_CEU_CMDS: - ela_state.regs[CS_ELA_TSSR] = 0x00000000; - - if (tracemode == CS_ELA_TRACEMODE_CEU_EXEC) { - ela_state.regs[CS_ELA_SIGSEL0] = - ela_state.signal_types[CS_ELA_SIGTYPE_CEU_EXEC]; - ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x00001000; - } else if (tracemode == CS_ELA_TRACEMODE_CEU_CMDS) { - ela_state.regs[CS_ELA_SIGSEL0] = - ela_state.signal_types[CS_ELA_SIGTYPE_CEU_CMDS]; - ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111; - } - - ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000001; - ela_state.regs[CS_ELA_TWBSEL0] = 0x0000FFFF; - ela_state.regs[CS_ELA_QUALMASK0] = 0x0000000F; - ela_state.regs[CS_ELA_QUALCOMP0] = 0x0000000F; - - memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, - sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); - - memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000, - sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1)); - - ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111; - - break; - case CS_ELA_TRACEMODE_MCU_AHBP: - case CS_ELA_TRACEMODE_HOST_AXI: - ela_state.regs[CS_ELA_TSSR] = 0x00000000; - - if (tracemode == CS_ELA_TRACEMODE_MCU_AHBP) - ela_state.regs[CS_ELA_SIGSEL0] = - ela_state.signal_types[CS_ELA_SIGTYPE_MCU_AHBP]; - else if (tracemode == CS_ELA_TRACEMODE_HOST_AXI) - ela_state.regs[CS_ELA_SIGSEL0] = - ela_state.signal_types[CS_ELA_SIGTYPE_HOST_AXI]; - - ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000001; - ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111; - ela_state.regs[CS_ELA_TWBSEL0] = 0x000000FF; - ela_state.regs[CS_ELA_QUALMASK0] = 0x00000003; - ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000003; - - memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000, - sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1)); - - memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000, - sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1)); - - ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111; - - break; - } - ela_state.tracemode = tracemode; + memset(ela_state.regs, 0x00000000, sizeof(u32) * CS_ELA_NR_DYN_REGS); } -static ssize_t select_show(struct device *dev, struct device_attribute *attr, char *const buf) -{ - ssize_t ret = 0; - unsigned int mode; - - for (mode = CS_ELA_TRACEMODE_NONE; mode < CS_ELA_NR_TRACEMODE; mode++) { - if (ela_state.supported_tracemodes & (1U << mode)) { - if (ela_state.tracemode == mode) - ret += sprintf(buf + ret, "[%s]\n", tracemode_names[mode]); - else - ret += sprintf(buf + ret, "%s\n", tracemode_names[mode]); - } - } - return ret; -} - -static ssize_t select_store(struct device *dev, struct device_attribute *attr, const char *buf, - size_t count) -{ - struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); - unsigned int mode = 0; - - /* Check if enabled and return error */ - if (ela_state.enabled == 1) { - dev_err(drvdata->base.dev, - "Config needs to be disabled before modifying registers"); - return -EINVAL; - } - - for (mode = CS_ELA_TRACEMODE_NONE; mode < CS_ELA_NR_TRACEMODE; mode++) { - if (sysfs_streq(tracemode_names[mode], buf) && - (ela_state.supported_tracemodes & (1U << mode))) { - setup_tracemode_registers(mode); - return count; - } - } - - dev_err(drvdata->base.dev, "Invalid tracemode: %s", buf); - return -EINVAL; -} - -static DEVICE_ATTR_RW(select); - static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr, char *const buf) { return sprintf(buf, "%d\n", ela_state.enabled); } - static DEVICE_ATTR_RO(is_enabled); -static ssize_t sprintf_regs(char *const buf, int from_reg, int to_reg) +static ssize_t reset_regs_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) +{ + struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); + if (ela_state.enabled == 1) { + dev_err(drvdata->base.dev, + "Config needs to be disabled before modifying registers"); + return -EINVAL; + } + reset_dynamic_registers(); + return count; +} +static DEVICE_ATTR_WO(reset_regs); + +/* show and store functions for dynamic registers */ +static ssize_t sprintf_reg(char *const buf, int reg) { ssize_t ret = 0; - unsigned int i = 0; - for (i = from_reg; i <= to_reg; i++) - ret += sprintf(buf + ret, "0x%08X ", ela_state.regs[i]); - - ret += sprintf(buf + ret, "\n"); + ret += sprintf(buf + ret, "0x%08X\n", ela_state.regs[reg]); return ret; } -static ssize_t verify_store_8_regs(struct device *dev, const char *buf, size_t count, int from_reg) +static ssize_t verify_store_reg(struct device *dev, const char *buf, size_t count, int reg) { struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent); - u32 regs[CS_ELA_NR_SIG_REGS] = { 0 }; int items; - unsigned int i; - + u64 value; if (ela_state.enabled == 1) { dev_err(drvdata->base.dev, "Config needs to be disabled before modifying registers"); return -EINVAL; } - items = sscanf(buf, "%x %x %x %x %x %x %x %x", ®s[0], ®s[1], ®s[2], ®s[3], - ®s[4], ®s[5], ®s[6], ®s[7]); - if (items <= 0) { + items = sscanf(buf, "%llx", &value); + if (items <= 0 || value > U32_MAX) { dev_err(drvdata->base.dev, "Invalid register value"); return -EINVAL; } - if (items != CS_ELA_NR_SIG_REGS) { - dev_err(drvdata->base.dev, "Incorrect number of registers set (%d != %d)", items, - CS_ELA_NR_SIG_REGS); - return -EINVAL; - } - for (i = 0; i < CS_ELA_NR_SIG_REGS; i++) - ela_state.regs[from_reg + i] = regs[i]; + ela_state.regs[reg] = (u32)value; return count; } -CS_ELA_SIGREGS_ATTR_RW(sigmask0, SIGMASK0); -CS_ELA_SIGREGS_ATTR_RW(sigcomp0, SIGCOMP0); -CS_ELA_SIGREGS_ATTR_RW(sigmask4, SIGMASK4); -CS_ELA_SIGREGS_ATTR_RW(sigcomp4, SIGCOMP4); +CS_ELA_DYN_REGS_ATTR_RW(TIMECTRL); +CS_ELA_DYN_REGS_ATTR_RW(TSSR); +CS_ELA_DYN_REGS_ATTR_RW(ATBCTRL); +CS_ELA_DYN_REGS_ATTR_RW(PTACTION); +CS_ELA_DYN_REGS_ATTR_RW(AUXCTRL); +CS_ELA_DYN_REGS_ATTR_RW(CNTSEL); + +CS_ELA_DYN_REGS_ATTR_RW_TRIG_STATE(0); +CS_ELA_DYN_REGS_ATTR_RW_TRIG_STATE(1); +CS_ELA_DYN_REGS_ATTR_RW_TRIG_STATE(2); +CS_ELA_DYN_REGS_ATTR_RW_TRIG_STATE(3); +CS_ELA_DYN_REGS_ATTR_RW_TRIG_STATE(4); + +static struct attribute *coresight_ela_reg_attrs[] = { + CS_ELA_DYN_REGS_ATTR(TIMECTRL), CS_ELA_DYN_REGS_ATTR(TSSR), + CS_ELA_DYN_REGS_ATTR(ATBCTRL), CS_ELA_DYN_REGS_ATTR(PTACTION), + CS_ELA_DYN_REGS_ATTR(AUXCTRL), CS_ELA_DYN_REGS_ATTR(CNTSEL), + CS_ELA_DYN_REGS_ATTR_TRIG_STATE(0), CS_ELA_DYN_REGS_ATTR_TRIG_STATE(1), + CS_ELA_DYN_REGS_ATTR_TRIG_STATE(2), CS_ELA_DYN_REGS_ATTR_TRIG_STATE(3), + CS_ELA_DYN_REGS_ATTR_TRIG_STATE(4), NULL, +}; + +static struct attribute_group coresight_ela_reg_group = { + .name = "regs", + .attrs = coresight_ela_reg_attrs, +}; static struct attribute *coresight_ela_attrs[] = { - &dev_attr_select.attr, &dev_attr_is_enabled.attr, - &dev_attr_sigmask0.attr, - &dev_attr_sigcomp0.attr, - &dev_attr_sigmask4.attr, - &dev_attr_sigcomp4.attr, + &dev_attr_reset_regs.attr, NULL, }; @@ -402,6 +309,7 @@ static struct attribute_group coresight_ela_group = { static const struct attribute_group *coresight_ela_groups[] = { &coresight_ela_group, + &coresight_ela_reg_group, NULL, }; @@ -428,101 +336,17 @@ static struct kbase_debug_coresight_csf_op ela_enable_ops[] = { /* ATID[6:0] = 4; valid range 0x1-0x6F, value must be unique and needs to be * known for trace extraction */ - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ATBCTRL, 0x00000400), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_PTACTION, ELA_ACTION_TRACE), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_AUXCTRL, 0x00000000), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CNTSEL, 0x00000000), - /* Trigger State 0 */ - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(0), &ela_state.regs[CS_ELA_SIGSEL0]), - /* May need to be configurable in future. */ - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_TRIGCTRL(0), 0x00000000), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ATBCTRL, &ela_state.regs[CS_ELA_ATBCTRL]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_PTACTION, &ela_state.regs[CS_ELA_PTACTION]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_AUXCTRL, &ela_state.regs[CS_ELA_AUXCTRL]), + WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_CNTSEL, &ela_state.regs[CS_ELA_CNTSEL]), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_NEXTSTATE(0), 0x00000001), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ACTION(0), ELA_ACTION_TRACE), - - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTNEXTSTATE(0), 0x00000001), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTACTION(0), ELA_ACTION_TRACE), - - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(0), &ela_state.regs[CS_ELA_COMPCTRL0]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(0), &ela_state.regs[CS_ELA_ALTCOMPCTRL0]), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COUNTCOMP(0), 0x00000000), - - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TWBSEL(0), &ela_state.regs[CS_ELA_TWBSEL0]), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTMASK(0), 0x00000000), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTCOMP(0), 0x00000000), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_QUALMASK(0), &ela_state.regs[CS_ELA_QUALMASK0]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_QUALCOMP(0), &ela_state.regs[CS_ELA_QUALCOMP0]), - - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 0), &ela_state.regs[CS_ELA_SIGMASK0_0]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 1), &ela_state.regs[CS_ELA_SIGMASK0_1]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 2), &ela_state.regs[CS_ELA_SIGMASK0_2]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 3), &ela_state.regs[CS_ELA_SIGMASK0_3]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 4), &ela_state.regs[CS_ELA_SIGMASK0_4]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 5), &ela_state.regs[CS_ELA_SIGMASK0_5]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 6), &ela_state.regs[CS_ELA_SIGMASK0_6]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 7), &ela_state.regs[CS_ELA_SIGMASK0_7]), - - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 0), &ela_state.regs[CS_ELA_SIGCOMP0_0]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 1), &ela_state.regs[CS_ELA_SIGCOMP0_1]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 2), &ela_state.regs[CS_ELA_SIGCOMP0_2]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 3), &ela_state.regs[CS_ELA_SIGCOMP0_3]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 4), &ela_state.regs[CS_ELA_SIGCOMP0_4]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 5), &ela_state.regs[CS_ELA_SIGCOMP0_5]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 6), &ela_state.regs[CS_ELA_SIGCOMP0_6]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 7), &ela_state.regs[CS_ELA_SIGCOMP0_7]), - - WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(1), CS_ELA_BASE_ADDR + ELA_SIGCOMP(1, 7), - 0x00000000), - WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(2), CS_ELA_BASE_ADDR + ELA_SIGCOMP(2, 7), - 0x00000000), - WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(3), CS_ELA_BASE_ADDR + ELA_SIGCOMP(3, 7), - 0x00000000), - - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(1), 0x11111111), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(2), 0x11111111), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(3), 0x11111111), - - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(1), 0x11111111), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(2), 0x11111111), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(3), 0x11111111), - - /* Trigger State 4 */ - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(4), &ela_state.regs[CS_ELA_SIGSEL4]), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_TRIGCTRL(4), 0x00000000), - - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_NEXTSTATE(4), &ela_state.regs[CS_ELA_NEXTSTATE4]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ACTION(4), &ela_state.regs[CS_ELA_ACTION4]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTNEXTSTATE(4), &ela_state.regs[CS_ELA_ALTNEXTSTATE4]), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTACTION(4), ELA_ACTION_TRACE), - - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(4), &ela_state.regs[CS_ELA_COMPCTRL4]), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(4), 0x11111111), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COUNTCOMP(4), 0x00000000), - - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TWBSEL(4), &ela_state.regs[CS_ELA_TWBSEL4]), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTMASK(4), 0x00000000), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTCOMP(4), 0x00000000), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_QUALMASK(4), 0x00000000), - WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_QUALCOMP(4), 0x00000000), - - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 0), &ela_state.regs[CS_ELA_SIGMASK4_0]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 1), &ela_state.regs[CS_ELA_SIGMASK4_1]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 2), &ela_state.regs[CS_ELA_SIGMASK4_2]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 3), &ela_state.regs[CS_ELA_SIGMASK4_3]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 4), &ela_state.regs[CS_ELA_SIGMASK4_4]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 5), &ela_state.regs[CS_ELA_SIGMASK4_5]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 6), &ela_state.regs[CS_ELA_SIGMASK4_6]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 7), &ela_state.regs[CS_ELA_SIGMASK4_7]), - - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 0), &ela_state.regs[CS_ELA_SIGCOMP4_0]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 1), &ela_state.regs[CS_ELA_SIGCOMP4_1]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 2), &ela_state.regs[CS_ELA_SIGCOMP4_2]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 3), &ela_state.regs[CS_ELA_SIGCOMP4_3]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 4), &ela_state.regs[CS_ELA_SIGCOMP4_4]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 5), &ela_state.regs[CS_ELA_SIGCOMP4_5]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 6), &ela_state.regs[CS_ELA_SIGCOMP4_6]), - WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 7), &ela_state.regs[CS_ELA_SIGCOMP4_7]), + WRITE_PTR_OP_CS_ELA_DYN_REGS_TRIG_STATE(0), + WRITE_PTR_OP_CS_ELA_DYN_REGS_TRIG_STATE(1), + WRITE_PTR_OP_CS_ELA_DYN_REGS_TRIG_STATE(2), + WRITE_PTR_OP_CS_ELA_DYN_REGS_TRIG_STATE(3), + WRITE_PTR_OP_CS_ELA_DYN_REGS_TRIG_STATE(4), WRITE_IMM_OP(CS_GPU_COMMAND_ADDR, CS_GPU_COMMAND_TRACE_CONTROL_EN), @@ -539,56 +363,8 @@ static struct kbase_debug_coresight_csf_op ela_disable_ops[] = { BIT_AND_OP(&ela_state.enabled, 0x0), }; -static int parse_signal_groups(struct coresight_mali_source_drvdata *drvdata) -{ - struct device_node *signal_groups = NULL; - unsigned int siggrp_idx; - - if (drvdata->base.dev->of_node) - signal_groups = of_get_child_by_name(drvdata->base.dev->of_node, "signal-groups"); - - if (!signal_groups) { - dev_err(drvdata->base.dev, "Failed to find signal groups OF node"); - return -EINVAL; - } - - for (siggrp_idx = 0; siggrp_idx < CS_ELA_MAX_SIGNAL_GROUPS; siggrp_idx++) { - char buf[CS_SG_NAME_MAX_LEN]; - ssize_t res; - const char *name; - struct property *prop; - - res = snprintf(buf, CS_SG_NAME_MAX_LEN, "sg%d", siggrp_idx); - if (res <= 0) { - dev_err(drvdata->base.dev, - "Signal group name %d snprintf failed unexpectedly", siggrp_idx); - return -EINVAL; - } - - of_property_for_each_string(signal_groups, buf, prop, name) { - int sig_type; - - for (sig_type = 0; sig_type < CS_ELA_NR_SIGTYPE; sig_type++) { - if (!strncmp(signal_type_names[sig_type], name, - strlen(signal_type_names[sig_type]))) { - ela_state.signal_types[sig_type] = (1U << siggrp_idx); - ela_state.supported_tracemodes |= - (1U << signal_type_tracemode_map[sig_type]); - } - } - } - } - - /* Add TRACEMODE_NONE as supported to allow printing */ - ela_state.supported_tracemodes |= (1U << CS_ELA_TRACEMODE_NONE); - - return 0; -} - int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata) { - int res = 0; - #if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE drvdata->type_name = type_name; #endif @@ -615,11 +391,7 @@ int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *dr return -EINVAL; } - res = parse_signal_groups(drvdata); - if (res) { - dev_err(drvdata->base.dev, "Failed to parse signal groups"); - return res; - } + reset_dynamic_registers(); return 0; } diff --git a/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c b/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c index ae9c2f7f2a8c..6906ca4b2d33 100644 --- a/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c +++ b/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -206,17 +206,17 @@ static int verify_store_reg(struct device *dev, const char *buf, size_t count, i return count; } -#define CS_ETM_REG_ATTR_RW(_a, _b) \ - static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ - char *const buf) \ - { \ - return sprintf(buf, "%#x\n", etm_state.regs[CS_ETM_##_b]); \ - } \ - static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ - const char *buf, size_t count) \ - { \ - return verify_store_reg(dev, buf, count, CS_ETM_##_b); \ - } \ +#define CS_ETM_REG_ATTR_RW(_a, _b) \ + static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ + char *const buf) \ + { \ + return sprintf(buf, "%#x\n", etm_state.regs[CS_ETM_##_b]); \ + } \ + static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + return verify_store_reg(dev, buf, count, CS_ETM_##_b); \ + } \ static DEVICE_ATTR_RW(_a) CS_ETM_REG_ATTR_RW(trcconfigr, TRCCONFIGR); diff --git a/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c index 9f60192a682b..59d5cd314c2f 100644 --- a/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c +++ b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -148,17 +148,17 @@ static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr } static DEVICE_ATTR_RO(is_enabled); -#define CS_ITM_DWT_REG_ATTR_RW(_a, _b) \ - static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ - char *const buf) \ - { \ - return sprintf(buf, "%#x\n", itm_state.regs[CS_##_b]); \ - } \ - static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ - const char *buf, size_t count) \ - { \ - return verify_store_reg(dev, buf, count, CS_##_b); \ - } \ +#define CS_ITM_DWT_REG_ATTR_RW(_a, _b) \ + static ssize_t _a##_show(struct device *dev, struct device_attribute *attr, \ + char *const buf) \ + { \ + return sprintf(buf, "%#x\n", itm_state.regs[CS_##_b]); \ + } \ + static ssize_t _a##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + return verify_store_reg(dev, buf, count, CS_##_b); \ + } \ static DEVICE_ATTR_RW(_a) CS_ITM_DWT_REG_ATTR_RW(dwt_ctrl, DWT_CTRL); diff --git a/drivers/xen/arm/Kconfig b/drivers/xen/arm/Kconfig new file mode 100644 index 000000000000..fb4fbc65bfd3 --- /dev/null +++ b/drivers/xen/arm/Kconfig @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +config MALI_XEN + tristate "Enable Xen Interface reference code" + depends on MALI_ARBITER_SUPPORT && XEN + help + Enables the build of xen interface modules used in the reference + virtualization setup for Mali + If unsure, say N. diff --git a/drivers/xen/arm/Makefile b/drivers/xen/arm/Makefile new file mode 100644 index 000000000000..b2ee53723428 --- /dev/null +++ b/drivers/xen/arm/Makefile @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2023 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build +KDIR ?= $(KERNEL_SRC) +M ?= $(shell pwd) + +ifeq ($(KDIR),) + $(error Must specify KDIR to point to the kernel to target)) +endif + +CONFIGS := + +ifeq ($(MALI_KCONFIG_EXT_PREFIX),) + ifeq ($(CONFIG_XEN), y) + ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) + CONFIG_MALI_XEN ?= y + endif + endif + + CONFIGS += \ + CONFIG_MALI_XEN +endif + +# +# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build +# +# Generate the list of CONFIGs and values. +# $(value config) is the name of the CONFIG option. +# $(value $(value config)) is its value (y, m). +# When the CONFIG is not set to y or m, it defaults to n. +MAKE_ARGS := $(foreach config,$(CONFIGS), \ + $(if $(filter y m,$(value $(value config))), \ + $(value config)=$(value $(value config)), \ + $(value config)=n)) + +# +# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build +# +# Generate the list of CONFIGs defines with values from CONFIGS. +# $(value config) is the name of the CONFIG option. +# When set to y or m, the CONFIG gets defined to 1. +EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ + $(if $(filter y m,$(value $(value config))), \ + -D$(value config)=1)) + +# The following were added to align with W=1 in scripts/Makefile.extrawarn +# from the Linux source tree +CFLAGS_MODULE += -Wall -Werror +CFLAGS_MODULE += -Wextra -Wunused -Wno-unused-parameter +CFLAGS_MODULE += -Wmissing-declarations +CFLAGS_MODULE += -Wmissing-format-attribute +CFLAGS_MODULE += -Wmissing-prototypes +CFLAGS_MODULE += -Wold-style-definition +CFLAGS_MODULE += -Wmissing-include-dirs +CFLAGS_MODULE += $(call cc-option, -Wunused-but-set-variable) +CFLAGS_MODULE += $(call cc-option, -Wunused-const-variable) +CFLAGS_MODULE += $(call cc-option, -Wpacked-not-aligned) +CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation) +# The following turn off the warnings enabled by -Wextra +CFLAGS_MODULE += -Wno-missing-field-initializers +CFLAGS_MODULE += -Wno-sign-compare +CFLAGS_MODULE += -Wno-type-limits + +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 + +all: + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + +modules_install: + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) modules_install + +clean: + $(MAKE) -C $(KDIR) M=$(M) $(MAKE_ARGS) clean diff --git a/drivers/xen/arm/Mconfig b/drivers/xen/arm/Mconfig new file mode 100644 index 000000000000..8986b7b3774d --- /dev/null +++ b/drivers/xen/arm/Mconfig @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +config MALI_XEN + bool "Enable Xen Interface reference code" + depends on MALI_ARBITER_SUPPORT + help + Enables the build of xen interface modules used in the reference + virtualization setup for Mali + If unsure, say N. diff --git a/include/linux/mali_arbiter_interface.h b/include/linux/mali_arbiter_interface.h index 8e675ec2ad3b..b4162f86ebb4 100644 --- a/include/linux/mali_arbiter_interface.h +++ b/include/linux/mali_arbiter_interface.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -142,15 +142,14 @@ struct arbiter_if_arb_vm_ops { * (via arbiter_if_arb_vm_ops above) in the context of these callbacks. */ struct arbiter_if_vm_arb_ops { - int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev, - struct device *dev, struct arbiter_if_arb_vm_ops *ops); + int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev, struct device *dev, + struct arbiter_if_arb_vm_ops *ops); void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev); void (*vm_arb_get_max_config)(struct arbiter_if_dev *arbif_dev); void (*vm_arb_gpu_request)(struct arbiter_if_dev *arbif_dev); void (*vm_arb_gpu_active)(struct arbiter_if_dev *arbif_dev); void (*vm_arb_gpu_idle)(struct arbiter_if_dev *arbif_dev); - void (*vm_arb_gpu_stopped)(struct arbiter_if_dev *arbif_dev, - u8 gpu_required); + void (*vm_arb_gpu_stopped)(struct arbiter_if_dev *arbif_dev, u8 gpu_required); }; /** diff --git a/include/linux/memory_group_manager.h b/include/linux/memory_group_manager.h index 786e3b995f29..557853d72718 100644 --- a/include/linux/memory_group_manager.h +++ b/include/linux/memory_group_manager.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -63,9 +63,8 @@ struct memory_group_manager_ops { * * Return: Pointer to allocated page, or NULL if allocation failed. */ - struct page *(*mgm_alloc_page)( - struct memory_group_manager_device *mgm_dev, int group_id, - gfp_t gfp_mask, unsigned int order); + struct page *(*mgm_alloc_page)(struct memory_group_manager_device *mgm_dev, int group_id, + gfp_t gfp_mask, unsigned int order); /* * mgm_free_page - Free a physical memory page in a group @@ -82,9 +81,8 @@ struct memory_group_manager_ops { * @order: Page order for physical page size (order=0 means 4 KiB, * order=9 means 2 MiB). */ - void (*mgm_free_page)( - struct memory_group_manager_device *mgm_dev, int group_id, - struct page *page, unsigned int order); + void (*mgm_free_page)(struct memory_group_manager_device *mgm_dev, int group_id, + struct page *page, unsigned int order); /* * mgm_get_import_memory_id - Get the physical memory group ID for the @@ -101,9 +99,8 @@ struct memory_group_manager_ops { * Return: The memory group ID to use when mapping pages from this * imported memory. */ - int (*mgm_get_import_memory_id)( - struct memory_group_manager_device *mgm_dev, - struct memory_group_manager_import_data *import_data); + int (*mgm_get_import_memory_id)(struct memory_group_manager_device *mgm_dev, + struct memory_group_manager_import_data *import_data); /* * mgm_update_gpu_pte - Modify a GPU page table entry for a memory group @@ -127,8 +124,8 @@ struct memory_group_manager_ops { * * Return: A modified GPU page table entry to be stored in a page table. */ - u64 (*mgm_update_gpu_pte)(struct memory_group_manager_device *mgm_dev, - int group_id, int mmu_level, u64 pte); + u64 (*mgm_update_gpu_pte)(struct memory_group_manager_device *mgm_dev, int group_id, + int mmu_level, u64 pte); /* * mgm_pte_to_original_pte - Undo any modification done during mgm_update_gpu_pte() @@ -172,10 +169,10 @@ struct memory_group_manager_ops { * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page * table entry was successfully installed. */ - vm_fault_t (*mgm_vmf_insert_pfn_prot)( - struct memory_group_manager_device *mgm_dev, int group_id, - struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, pgprot_t pgprot); + vm_fault_t (*mgm_vmf_insert_pfn_prot)(struct memory_group_manager_device *mgm_dev, + int group_id, struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, + pgprot_t pgprot); }; /** @@ -199,10 +196,7 @@ struct memory_group_manager_device { struct module *owner; }; - -enum memory_group_manager_import_type { - MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF -}; +enum memory_group_manager_import_type { MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF }; /** * struct memory_group_manager_import_data - Structure describing the imported diff --git a/include/linux/priority_control_manager.h b/include/linux/priority_control_manager.h index a6b151916b37..9f28b1b8582a 100644 --- a/include/linux/priority_control_manager.h +++ b/include/linux/priority_control_manager.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,9 +52,8 @@ struct priority_control_manager_ops { * * Return: The priority that would actually be given, could be lower than requested_priority */ - int (*pcm_scheduler_priority_check)( - struct priority_control_manager_device *pcm_dev, - struct task_struct *task, int requested_priority); + int (*pcm_scheduler_priority_check)(struct priority_control_manager_device *pcm_dev, + struct task_struct *task, int requested_priority); }; /** diff --git a/include/linux/protected_memory_allocator.h b/include/linux/protected_memory_allocator.h index 825af48edb90..0c83845a35e0 100644 --- a/include/linux/protected_memory_allocator.h +++ b/include/linux/protected_memory_allocator.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,8 +56,7 @@ struct protected_memory_allocator_ops { * Return: Pointer to allocated memory, or NULL if allocation failed. */ struct protected_memory_allocation *(*pma_alloc_page)( - struct protected_memory_allocator_device *pma_dev, - unsigned int order); + struct protected_memory_allocator_device *pma_dev, unsigned int order); /* * pma_get_phys_addr - Get the physical address of the protected memory @@ -70,9 +69,8 @@ struct protected_memory_allocator_ops { * * Return: The physical address of the given allocation. */ - phys_addr_t (*pma_get_phys_addr)( - struct protected_memory_allocator_device *pma_dev, - struct protected_memory_allocation *pma); + phys_addr_t (*pma_get_phys_addr)(struct protected_memory_allocator_device *pma_dev, + struct protected_memory_allocation *pma); /* * pma_free_page - Free a page of memory @@ -81,9 +79,8 @@ struct protected_memory_allocator_ops { * through. * @pma: The protected memory allocation to free. */ - void (*pma_free_page)( - struct protected_memory_allocator_device *pma_dev, - struct protected_memory_allocation *pma); + void (*pma_free_page)(struct protected_memory_allocator_device *pma_dev, + struct protected_memory_allocation *pma); }; /** diff --git a/include/linux/protected_mode_switcher.h b/include/linux/protected_mode_switcher.h index 0f1e6ab1ddc2..5559b184c8bb 100644 --- a/include/linux/protected_mode_switcher.h +++ b/include/linux/protected_mode_switcher.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,8 +37,7 @@ struct protected_mode_ops { * * Return: 0 on success, non-zero on error */ - int (*protected_mode_enable)( - struct protected_mode_device *protected_dev); + int (*protected_mode_enable)(struct protected_mode_device *protected_dev); /* * protected_mode_disable() - Disable protected mode on device, and @@ -47,8 +46,7 @@ struct protected_mode_ops { * * Return: 0 on success, non-zero on error */ - int (*protected_mode_disable)( - struct protected_mode_device *protected_dev); + int (*protected_mode_disable)(struct protected_mode_device *protected_dev); }; /** diff --git a/include/linux/version_compat_defs.h b/include/linux/version_compat_defs.h index c9b1f62684d1..3f46e852bdc9 100644 --- a/include/linux/version_compat_defs.h +++ b/include/linux/version_compat_defs.h @@ -23,10 +23,29 @@ #define _VERSION_COMPAT_DEFS_H_ #include +#include +#include + +#if (KERNEL_VERSION(4, 4, 267) < LINUX_VERSION_CODE) +#include +#endif + +#include +#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) +#include +#endif + +#ifndef BITS_PER_TYPE +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#endif #if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE typedef unsigned int __poll_t; + +#ifndef HRTIMER_MODE_REL_SOFT +#define HRTIMER_MODE_REL_SOFT HRTIMER_MODE_REL #endif +#endif /* KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE */ #if KERNEL_VERSION(4, 9, 78) >= LINUX_VERSION_CODE @@ -62,18 +81,220 @@ typedef unsigned int __poll_t; /* Replace the default definition with CONFIG_LSM_MMAP_MIN_ADDR */ #undef kbase_mmap_min_addr #define kbase_mmap_min_addr CONFIG_LSM_MMAP_MIN_ADDR -#pragma message "kbase_mmap_min_addr compiled to CONFIG_LSM_MMAP_MIN_ADDR, no runtime update!" +#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG \ + "* MALI kbase_mmap_min_addr compiled to CONFIG_LSM_MMAP_MIN_ADDR, no runtime update possible! *" #endif /* (CONFIG_LSM_MMAP_MIN_ADDR > CONFIG_DEFAULT_MMAP_MIN_ADDR) */ #endif /* CONFIG_LSM_MMAP_MIN_ADDR */ #if (kbase_mmap_min_addr == CONFIG_DEFAULT_MMAP_MIN_ADDR) -#pragma message "kbase_mmap_min_addr compiled to CONFIG_DEFAULT_MMAP_MIN_ADDR, no runtime update!" +#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG \ + "* MALI kbase_mmap_min_addr compiled to CONFIG_DEFAULT_MMAP_MIN_ADDR, no runtime update possible! *" #endif #else /* CONFIG_MMU */ #define kbase_mmap_min_addr (0UL) -#pragma message "kbase_mmap_min_addr compiled to (0UL), no runtime update!" +#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG \ + "* MALI kbase_mmap_min_addr compiled to (0UL), no runtime update possible! *" #endif /* CONFIG_MMU */ #endif /* KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE */ +static inline void kbase_timer_setup(struct timer_list *timer, + void (*callback)(struct timer_list *timer)) +{ +#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE + setup_timer(timer, (void (*)(unsigned long))callback, (unsigned long)timer); +#else + timer_setup(timer, callback, 0); +#endif +} + +#ifndef WRITE_ONCE +#ifdef ASSIGN_ONCE +#define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x) +#else +#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val)) +#endif +#endif + +#ifndef READ_ONCE +#define READ_ONCE(x) ACCESS_ONCE(x) +#endif + +#ifndef CSTD_UNUSED +#define CSTD_UNUSED(x) ((void)(x)) +#endif + +static inline void *kbase_kmap(struct page *p) +{ +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + return kmap_local_page(p); +#else + return kmap(p); +#endif /* KERNEL_VERSION(5, 11, 0) */ +} + +static inline void *kbase_kmap_atomic(struct page *p) +{ +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + return kmap_local_page(p); +#else + return kmap_atomic(p); +#endif /* KERNEL_VERSION(5, 11, 0) */ +} + +static inline void kbase_kunmap(struct page *p, void *address) +{ +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + CSTD_UNUSED(p); + kunmap_local(address); +#else + CSTD_UNUSED(address); + kunmap(p); +#endif /* KERNEL_VERSION(5, 11, 0) */ +} + +static inline void kbase_kunmap_atomic(void *address) +{ +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + kunmap_local(address); +#else + kunmap_atomic(address); +#endif /* KERNEL_VERSION(5, 11, 0) */ +} + +/* Some of the older 4.4 kernel patch versions do + * not contain the overflow check functions. However, + * they are based on compiler instrinsics, so they + * are simple to reproduce. + */ +#if (KERNEL_VERSION(4, 4, 267) >= LINUX_VERSION_CODE) +/* Some of the older 4.4 kernel patch versions do + * not contain the overflow check functions. However, + * they are based on compiler instrinsics, so they + * are simple to reproduce. + */ +#define check_mul_overflow(a, b, d) __builtin_mul_overflow(a, b, d) +#define check_add_overflow(a, b, d) __builtin_add_overflow(a, b, d) +#endif + +/* + * There was a big rename in the 4.10 kernel (fence* -> dma_fence*), + * with most of the related functions keeping the same signatures. + */ + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + +#include + +#define dma_fence fence +#define dma_fence_ops fence_ops +#define dma_fence_context_alloc(a) fence_context_alloc(a) +#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) +#define dma_fence_get(a) fence_get(a) +#define dma_fence_put(a) fence_put(a) +#define dma_fence_signal(a) fence_signal(a) +#define dma_fence_is_signaled(a) fence_is_signaled(a) +#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) +#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) +#define dma_fence_default_wait fence_default_wait + +#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) +#else +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) +#endif + +#else + +#include + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) +#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? (a)->status ?: 1 : 0) +#endif + +#endif /* < 4.10.0 */ + +static inline void dma_fence_set_error_helper( +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence, +#else + struct dma_fence *fence, +#endif + int error) +{ +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) + dma_fence_set_error(fence, error); +#elif (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) + fence_set_error(fence, error); +#else + fence->status = error; +#endif +} + +#include +#if !((KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) || \ + ((KERNEL_VERSION(6, 1, 25) <= LINUX_VERSION_CODE) && defined(__ANDROID_COMMON_KERNEL__))) +static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) +{ + vma->vm_flags |= flags; +} +static inline void vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags) +{ + vma->vm_flags &= ~flags; +} +#endif + +static inline void kbase_unpin_user_buf_page(struct page *page) +{ +#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE + put_page(page); +#else + unpin_user_page(page); +#endif +} + +static inline long kbase_get_user_pages(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas) +{ +#if ((KERNEL_VERSION(6, 5, 0) > LINUX_VERSION_CODE) && !defined(__ANDROID_COMMON_KERNEL__)) || \ + ((KERNEL_VERSION(6, 4, 0) > LINUX_VERSION_CODE) && defined(__ANDROID_COMMON_KERNEL__)) + return get_user_pages(start, nr_pages, gup_flags, pages, vmas); +#else + return get_user_pages(start, nr_pages, gup_flags, pages); +#endif +} + +static inline long kbase_pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas, int *locked) +{ +#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE + return get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, pages, vmas); +#elif KERNEL_VERSION(5, 6, 0) > LINUX_VERSION_CODE + return get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, pages, vmas, locked); +#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE + return pin_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, pages, vmas, locked); +#elif ((KERNEL_VERSION(6, 5, 0) > LINUX_VERSION_CODE) && !defined(__ANDROID_COMMON_KERNEL__)) || \ + ((KERNEL_VERSION(6, 4, 0) > LINUX_VERSION_CODE) && defined(__ANDROID_COMMON_KERNEL__)) + return pin_user_pages_remote(mm, start, nr_pages, gup_flags, pages, vmas, locked); +#else + return pin_user_pages_remote(mm, start, nr_pages, gup_flags, pages, locked); +#endif +} + +#if (KERNEL_VERSION(6, 4, 0) <= LINUX_VERSION_CODE) +#define KBASE_CLASS_CREATE(owner, name) class_create(name) +#else +#define KBASE_CLASS_CREATE(owner, name) class_create(owner, name) +#endif /* (KERNEL_VERSION(6, 4, 0) <= LINUX_VERSION_CODE) */ + +#if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE +#define kbase_totalram_pages() totalram_pages +#else +#define kbase_totalram_pages() totalram_pages() +#endif /* KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE */ + #endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/include/uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h b/include/uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h new file mode 100644 index 000000000000..46627c416baa --- /dev/null +++ b/include/uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_DMA_BUF_TEST_EXPORTER_H_ +#define _UAPI_DMA_BUF_TEST_EXPORTER_H_ + +#include +#include + +#define DMA_BUF_TE_ENQ 0x642d7465 +#define DMA_BUF_TE_ACK 0x68692100 + +struct dma_buf_te_ioctl_version { + /** Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */ + int op; + /** Major version */ + int major; + /** Minor version */ + int minor; +}; + +struct dma_buf_te_ioctl_alloc { + __u64 size; /* size of buffer to allocate, in pages */ +}; + +struct dma_buf_te_ioctl_status { + /* in */ + int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */ + /* out */ + int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */ + int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */ + int cpu_mappings; /* number of cpu mappings (active 'mmap's) */ +}; + +struct dma_buf_te_ioctl_set_failing { + /* in */ + int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */ + + /* zero = no fail injection, non-zero = inject failure */ + int fail_attach; + int fail_map; + int fail_mmap; +}; + +struct dma_buf_te_ioctl_fill { + int fd; + unsigned int value; +}; + +#define DMA_BUF_TE_IOCTL_BASE 'E' +/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */ +#define DMA_BUF_TE_VERSION _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version) +#define DMA_BUF_TE_ALLOC _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc) +#define DMA_BUF_TE_QUERY _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status) +#define DMA_BUF_TE_SET_FAILING \ + _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing) +#define DMA_BUF_TE_ALLOC_CONT _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc) +#define DMA_BUF_TE_FILL _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill) + +#endif /* _UAPI_DMA_BUF_TEST_EXPORTER_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h index a44da7beb041..b45e32fc3d33 100644 --- a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h +++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,34 +30,31 @@ #define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4) #if MALI_USE_CSF -#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (65) +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (65) #else /* MALI_USE_CSF */ -#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60) -#endif /* !MALI_USE_CSF */ -#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4) +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60) +#endif /* MALI_USE_CSF */ +#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4) #define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \ (enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT))) #define KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK 4 -#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK 60 -#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK \ - (KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK + \ - KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK) -#define KBASE_DUMMY_MODEL_BLOCK_SIZE \ - (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32)) -#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8 -#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32 +#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK KBASE_DUMMY_MODEL_COUNTER_PER_CORE +#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK \ + (KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK + KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK) +#define KBASE_DUMMY_MODEL_BLOCK_SIZE (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32)) +#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8 +#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32 #define KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS 0 -#define KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS \ +#define KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS \ (1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES) -#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \ +#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \ (KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS + KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS) -#define KBASE_DUMMY_MODEL_COUNTER_TOTAL \ - (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * \ - KBASE_DUMMY_MODEL_COUNTER_PER_CORE) -#define KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE \ +#define KBASE_DUMMY_MODEL_COUNTER_TOTAL \ + (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_COUNTER_PER_CORE) +#define KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE \ (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_VALUES_PER_BLOCK) -#define KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE \ +#define KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE \ (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE) /* @@ -70,8 +67,10 @@ #define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX (0x3FFull) #define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX (0x7FFull) #define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX (0xFFFull) -#define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull) +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TKRX (0x1FFFull) #define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull) +#define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull) #define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull) + #endif /* _UAPI_KBASE_MODEL_DUMMY_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h index c18c6fc8a7ae..013924887142 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -48,7 +48,6 @@ #define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20) - /* Must be FIXABLE memory: its GPU VA will be determined at a later point, * at which time it will be at a fixed GPU VA. */ @@ -61,8 +60,7 @@ /* A mask of all the flags which are only valid for allocations within kbase, * and may not be passed from user space. */ -#define BASEP_MEM_FLAGS_KERNEL_ONLY \ - (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) +#define BASEP_MEM_FLAGS_KERNEL_ONLY (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) /* A mask of all currently reserved flags */ @@ -74,8 +72,7 @@ #define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << LOCAL_PAGE_SHIFT) #define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \ - ((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \ - LOCAL_PAGE_SHIFT) + ((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> LOCAL_PAGE_SHIFT) /* Valid set of just-in-time memory allocation flags */ #define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0) @@ -92,9 +89,8 @@ /* Bitpattern describing the ::base_context_create_flags that can be * passed to base_context_init() */ -#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ - (BASE_CONTEXT_CCTX_EMBEDDED | \ - BASE_CONTEXT_CSF_EVENT_THREAD | \ +#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ + (BASE_CONTEXT_CCTX_EMBEDDED | BASE_CONTEXT_CSF_EVENT_THREAD | \ BASEP_CONTEXT_CREATE_KERNEL_FLAGS) /* Flags for base tracepoint specific to CSF */ @@ -105,10 +101,9 @@ /* Enable additional CSF Firmware side tracepoints */ #define BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS (1 << 3) -#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ - BASE_TLSTREAM_JOB_DUMPING_ENABLED | \ - BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS | \ - BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) +#define BASE_TLSTREAM_FLAGS_MASK \ + (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | BASE_TLSTREAM_JOB_DUMPING_ENABLED | \ + BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS | BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) /* Number of pages mapped into the process address space for a bound GPU * command queue. A pair of input/output pages and a Hw doorbell page @@ -177,7 +172,7 @@ enum base_kcpu_command_type { BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, BASE_KCPU_COMMAND_TYPE_JIT_FREE, BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, - BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER + BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, }; /** diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h index 7c37cfc077f8..9db2146e2fd5 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h @@ -82,10 +82,23 @@ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE * before allocating GPU memory for the context. * - CPU mappings of USER_BUFFER imported memory handles must be cached. + * 1.19: + * - Add NE support in queue_group_create IOCTL fields + * - Previous version retained as KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 for + * backward compatibility. + * 1.20: + * - Restrict child process from doing supported file operations (like mmap, ioctl, + * read, poll) on the file descriptor of mali device file that was inherited + * from the parent process. + * 1.21: + * - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls. + * 1.22: + * - Add comp_pri_threshold and comp_pri_ratio attributes to + * kbase_ioctl_cs_queue_group_create. */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 18 +#define BASE_UK_VERSION_MINOR 22 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -134,8 +147,7 @@ struct kbase_ioctl_cs_queue_kick { __u64 buffer_gpu_addr; }; -#define KBASE_IOCTL_CS_QUEUE_KICK \ - _IOW(KBASE_IOCTL_TYPE, 37, struct kbase_ioctl_cs_queue_kick) +#define KBASE_IOCTL_CS_QUEUE_KICK _IOW(KBASE_IOCTL_TYPE, 37, struct kbase_ioctl_cs_queue_kick) /** * union kbase_ioctl_cs_queue_bind - Bind a GPU command queue to a group @@ -161,8 +173,7 @@ union kbase_ioctl_cs_queue_bind { } out; }; -#define KBASE_IOCTL_CS_QUEUE_BIND \ - _IOWR(KBASE_IOCTL_TYPE, 39, union kbase_ioctl_cs_queue_bind) +#define KBASE_IOCTL_CS_QUEUE_BIND _IOWR(KBASE_IOCTL_TYPE, 39, union kbase_ioctl_cs_queue_bind) /** * struct kbase_ioctl_cs_queue_register_ex - Register a GPU command queue with the @@ -254,9 +265,59 @@ union kbase_ioctl_cs_queue_group_create_1_6 { } out; }; -#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6 \ +#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6 \ _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create_1_6) +/** + * union kbase_ioctl_cs_queue_group_create_1_18 - Create a GPU command queue group + * @in: Input parameters + * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use. + * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use. + * @in.compute_mask: Mask of compute endpoints the group is allowed to use. + * @in.cs_min: Minimum number of CSs required. + * @in.priority: Queue group's priority within a process. + * @in.tiler_max: Maximum number of tiler endpoints the group is allowed + * to use. + * @in.fragment_max: Maximum number of fragment endpoints the group is + * allowed to use. + * @in.compute_max: Maximum number of compute endpoints the group is allowed + * to use. + * @in.csi_handlers: Flags to signal that the application intends to use CSI + * exception handlers in some linear buffers to deal with + * the given exception types. + * @in.padding: Currently unused, must be zero + * @out: Output parameters + * @out.group_handle: Handle of a newly created queue group. + * @out.padding: Currently unused, must be zero + * @out.group_uid: UID of the queue group available to base. + */ +union kbase_ioctl_cs_queue_group_create_1_18 { + struct { + __u64 tiler_mask; + __u64 fragment_mask; + __u64 compute_mask; + __u8 cs_min; + __u8 priority; + __u8 tiler_max; + __u8 fragment_max; + __u8 compute_max; + __u8 csi_handlers; + __u8 padding[2]; + /** + * @in.dvs_buf: buffer for deferred vertex shader + */ + __u64 dvs_buf; + } in; + struct { + __u8 group_handle; + __u8 padding[3]; + __u32 group_uid; + } out; +}; + +#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 \ + _IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create_1_18) + /** * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group * @in: Input parameters @@ -291,11 +352,15 @@ union kbase_ioctl_cs_queue_group_create { __u8 fragment_max; __u8 compute_max; __u8 csi_handlers; - __u8 padding[2]; + /** + * @in.reserved: Reserved, currently unused, must be zero. + */ + __u16 reserved; /** * @in.dvs_buf: buffer for deferred vertex shader */ __u64 dvs_buf; + __u64 padding[9]; } in; struct { __u8 group_handle; @@ -304,7 +369,7 @@ union kbase_ioctl_cs_queue_group_create { } out; }; -#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \ +#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \ _IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create) /** @@ -321,8 +386,7 @@ struct kbase_ioctl_cs_queue_group_term { #define KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE \ _IOW(KBASE_IOCTL_TYPE, 43, struct kbase_ioctl_cs_queue_group_term) -#define KBASE_IOCTL_CS_EVENT_SIGNAL \ - _IO(KBASE_IOCTL_TYPE, 44) +#define KBASE_IOCTL_CS_EVENT_SIGNAL _IO(KBASE_IOCTL_TYPE, 44) typedef __u8 base_kcpu_queue_id; /* We support up to 256 active KCPU queues */ @@ -337,8 +401,7 @@ struct kbase_ioctl_kcpu_queue_new { __u8 padding[7]; }; -#define KBASE_IOCTL_KCPU_QUEUE_CREATE \ - _IOR(KBASE_IOCTL_TYPE, 45, struct kbase_ioctl_kcpu_queue_new) +#define KBASE_IOCTL_KCPU_QUEUE_CREATE _IOR(KBASE_IOCTL_TYPE, 45, struct kbase_ioctl_kcpu_queue_new) /** * struct kbase_ioctl_kcpu_queue_delete - Destroy a KCPU command queue @@ -444,7 +507,7 @@ union kbase_ioctl_cs_tiler_heap_init_1_13 { } out; }; -#define KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13 \ +#define KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13 \ _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init_1_13) /** @@ -503,16 +566,14 @@ union kbase_ioctl_cs_get_glb_iface { } out; }; -#define KBASE_IOCTL_CS_GET_GLB_IFACE \ - _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface) +#define KBASE_IOCTL_CS_GET_GLB_IFACE _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface) struct kbase_ioctl_cs_cpu_queue_info { __u64 buffer; __u64 size; }; -#define KBASE_IOCTL_VERSION_CHECK \ - _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) +#define KBASE_IOCTL_VERSION_CHECK _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) #define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \ _IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info) diff --git a/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h index 0ca5d902f174..eaa4b2d12477 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,11 +22,6 @@ #ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_ #define _UAPI_KBASE_GPU_REGMAP_CSF_H_ -/* IPA control registers */ -#define IPA_CONTROL_BASE 0x40000 -#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r)) -#define STATUS 0x004 /* (RO) Status register */ - /* USER base address */ #define USER_BASE 0x0010000 #define USER_REG(r) (USER_BASE + (r)) diff --git a/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h index 9bfd6d2fc529..d24afccf49ee 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,29 +22,4 @@ #ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_ #define _UAPI_KBASE_GPU_REGMAP_JM_H_ -/* GPU control registers */ - -#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest clean-and-invalidate operation */ - -/* Job control registers */ - -#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ -#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ -#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ -#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ -#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ -#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ -#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ - -#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ -#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ -#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ -#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ -#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ -#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ - -#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ - -#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) - #endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h index 83d84137a034..de392a5c506f 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h +++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,8 +23,8 @@ #define _UAPI_KBASE_GPU_COHERENCY_H_ #define COHERENCY_ACE_LITE 0 -#define COHERENCY_ACE 1 -#define COHERENCY_NONE 31 +#define COHERENCY_ACE 1 +#define COHERENCY_NONE 31 #define COHERENCY_FEATURE_BIT(x) (1 << (x)) #endif /* _UAPI_KBASE_GPU_COHERENCY_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h index 784e09a7edc2..d3478546e244 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h +++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,124 +22,156 @@ #ifndef _UAPI_KBASE_GPU_ID_H_ #define _UAPI_KBASE_GPU_ID_H_ +#if defined(__linux) #include +#endif -/* GPU_ID register */ -#define KBASE_GPU_ID_VERSION_STATUS_SHIFT 0 -#define KBASE_GPU_ID_VERSION_MINOR_SHIFT 4 -#define KBASE_GPU_ID_VERSION_MAJOR_SHIFT 12 -#define KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 - -#define GPU_ID_VERSION_STATUS (0xFu << KBASE_GPU_ID_VERSION_STATUS_SHIFT) -#define GPU_ID_VERSION_MINOR (0xFFu << KBASE_GPU_ID_VERSION_MINOR_SHIFT) -#define GPU_ID_VERSION_MAJOR (0xFu << KBASE_GPU_ID_VERSION_MAJOR_SHIFT) -#define GPU_ID_VERSION_PRODUCT_ID (0xFFFFu << KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT) - -#define GPU_ID2_VERSION_STATUS_SHIFT 0 -#define GPU_ID2_VERSION_MINOR_SHIFT 4 -#define GPU_ID2_VERSION_MAJOR_SHIFT 12 -#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 -#define GPU_ID2_ARCH_REV_SHIFT 20 -#define GPU_ID2_ARCH_MINOR_SHIFT 24 -#define GPU_ID2_ARCH_MAJOR_SHIFT 28 -#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT) -#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT) -#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT) -#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT) -#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT) -#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT) -#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT) -#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) -#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | \ - GPU_ID2_VERSION_MINOR | \ - GPU_ID2_VERSION_STATUS) +#define GPU_ID2_VERSION_STATUS_SHIFT 0 +#define GPU_ID2_VERSION_MINOR_SHIFT 4 +#define GPU_ID2_VERSION_MAJOR_SHIFT 12 +#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 +#define GPU_ID2_ARCH_REV_SHIFT 20 +#define GPU_ID2_ARCH_MINOR_SHIFT 24 +#define GPU_ID2_ARCH_MAJOR_SHIFT 28 +#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT) +#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT) +#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT) +#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT) +#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT) +#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) +#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | GPU_ID2_VERSION_MINOR | GPU_ID2_VERSION_STATUS) +#define GPU_ID2_ARCH_REV_GET(gpu_id) \ + ((((__u32)gpu_id) & GPU_ID2_ARCH_REV) >> GPU_ID2_ARCH_REV_SHIFT) +#define GPU_ID2_ARCH_MINOR_GET(gpu_id) \ + ((((__u32)gpu_id) & GPU_ID2_ARCH_MINOR) >> GPU_ID2_ARCH_MINOR_SHIFT) +#define GPU_ID2_ARCH_MAJOR_GET(gpu_id) \ + ((((__u32)gpu_id) & GPU_ID2_ARCH_MAJOR) >> GPU_ID2_ARCH_MAJOR_SHIFT) +#define GPU_ID2_VERSION_MINOR_GET(gpu_id) \ + ((((__u32)gpu_id) & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT) +#define GPU_ID2_VERSION_MAJOR_GET(gpu_id) \ + ((((__u32)gpu_id) & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MAJOR_GET(gpu_id) \ + ((((__u32)gpu_id) & GPU_ID2_PRODUCT_MAJOR) >> GPU_ID2_PRODUCT_MAJOR_SHIFT) /* Helper macro to construct a value consisting of arch major and revision * using the value of gpu_id. */ -#define ARCH_MAJOR_REV_REG(gpu_id) \ - ((((__u32)gpu_id) & GPU_ID2_ARCH_MAJOR) | \ - (((__u32)gpu_id) & GPU_ID2_ARCH_REV)) +#define GPU_ID2_ARCH_MAJOR_REV_REG(gpu_id) \ + ((((__u32)gpu_id) & GPU_ID2_ARCH_MAJOR) | (((__u32)gpu_id) & GPU_ID2_ARCH_REV)) /* Helper macro to create a partial GPU_ID (new format) that defines * a arch major and revision. */ -#define GPU_ID2_ARCH_MAJOR_REV_MAKE(arch_major, arch_rev) \ - ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ +#define GPU_ID2_ARCH_MAJOR_REV_MAKE(arch_major, arch_rev) \ + ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT)) /* Helper macro to create a partial GPU_ID (new format) that defines * a product ignoring its version. */ #define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ - ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ - (((__u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ - (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ - (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((__u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ + (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ + (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) /* Helper macro to create a partial GPU_ID (new format) that specifies the * revision (major, minor, status) of a product */ #define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ - ((((__u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ - (((__u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ - (((__u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) + ((((__u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ + (((__u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ + (((__u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) /* Helper macro to create a complete GPU_ID (new format) */ -#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ - version_major, version_minor, version_status) \ - (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ - product_major) | \ - GPU_ID2_VERSION_MAKE(version_major, version_minor, \ - version_status)) +#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, version_major, \ + version_minor, version_status) \ + (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) | \ + GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status)) /* Helper macro to create a partial GPU_ID (new format) that identifies * a particular GPU model by its arch_major and product_major. */ -#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ - ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ - (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) +#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ + ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) /* Strip off the non-relevant bits from a product_id value and make it suitable * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU * model. */ #define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ - ((((__u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ - GPU_ID2_PRODUCT_MODEL) + ((((__u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & GPU_ID2_PRODUCT_MODEL) -#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) -#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1) -#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0) -#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3) -#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1) -#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2) -#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0) -#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) -#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) -#define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4) -#define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5) -#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) -#define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3) -#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) -#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) -#define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2) -#define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3) -#define GPU_ID2_PRODUCT_TTIX GPU_ID2_MODEL_MAKE(12, 0) -#define GPU_ID2_PRODUCT_LTIX GPU_ID2_MODEL_MAKE(12, 1) +#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) +#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1) +#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0) +#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3) +#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1) +#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2) +#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0) +#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) +#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) +#define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4) +#define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5) +#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) +#define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3) +#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) +#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) +#define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2) +#define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3) +#define GPU_ID2_PRODUCT_TTIX GPU_ID2_MODEL_MAKE(12, 0) +#define GPU_ID2_PRODUCT_LTIX GPU_ID2_MODEL_MAKE(12, 1) +#define GPU_ID2_PRODUCT_TKRX GPU_ID2_MODEL_MAKE(13, 0) +#define GPU_ID2_PRODUCT_LKRX GPU_ID2_MODEL_MAKE(13, 1) -/** - * GPU_ID_MAKE - Helper macro to generate GPU_ID using id, major, minor, status - * - * @id: Product Major of GPU ID - * @major: Version major of GPU ID - * @minor: Version minor of GPU ID - * @status: Version status of GPU ID + + +#define GPU_ID_U8_COMP(val3, val2, val1, val0) \ + ((((__u32)val3) << 24U) | (((__u32)val2) << 16U) | (((__u32)val1) << 8U) | ((__u32)val0)) +#define GPU_ID_U8_COMP_SHIFT(comp, idx) (((__u32)comp) >> (idx * 8U)) +#define GPU_ID_U8_COMP_GET(comp, idx) (GPU_ID_U8_COMP_SHIFT(comp, idx) & 0xFF) + +#define GPU_ID_PRODUCT_ID_MAKE(arch_major, arch_minor, arch_rev, product_major) \ + GPU_ID_U8_COMP(arch_major, arch_minor, arch_rev, product_major) +#define GPU_ID_MODEL_MAKE(arch_major, product_major) GPU_ID_U8_COMP(arch_major, 0, 0, product_major) +#define GPU_ID_VERSION_MAKE(version_major, version_minor, version_status) \ + GPU_ID_U8_COMP(0, version_major, version_minor, version_status) +#define GPU_ID_ARCH_MAKE(arch_major, arch_minor, arch_rev) \ + GPU_ID_U8_COMP(0, arch_major, arch_minor, arch_rev) + +/* Convert ID created from GPU_ID_PRODUCT_ID_MAKE() to match the format of + * GPU_ID_MODEL_MAKE() */ -#define GPU_ID_MAKE(id, major, minor, status) \ - ((((__u32)id) << KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ - (((__u32)major) << KBASE_GPU_ID_VERSION_MAJOR_SHIFT) | \ - (((__u32)minor) << KBASE_GPU_ID_VERSION_MINOR_SHIFT) | \ - (((__u32)status) << KBASE_GPU_ID_VERSION_STATUS_SHIFT)) +#define GPU_ID_MODEL_MATCH_VALUE(product_id) (((__u32)product_id) & GPU_ID_MODEL_MAKE(0xFF, 0xFF)) + +#define GPU_ID_VERSION_ID_MAJOR_MINOR_GET(version_id) GPU_ID_U8_COMP_SHIFT(version_id, 1) +#define GPU_ID_VERSION_ID_STATUS_GET(version_id) GPU_ID_U8_COMP_GET(version_id, 0) +#define GPU_ID_VERSION_ID_MINOR_GET(version_id) GPU_ID_U8_COMP_GET(version_id, 1) +#define GPU_ID_VERSION_ID_MAJOR_GET(version_id) GPU_ID_U8_COMP_GET(version_id, 2) + +#define GPU_ID_PRODUCT_TMIX GPU_ID_MODEL_MAKE(6, 0) +#define GPU_ID_PRODUCT_THEX GPU_ID_MODEL_MAKE(6, 1) +#define GPU_ID_PRODUCT_TSIX GPU_ID_MODEL_MAKE(7, 0) +#define GPU_ID_PRODUCT_TDVX GPU_ID_MODEL_MAKE(7, 3) +#define GPU_ID_PRODUCT_TNOX GPU_ID_MODEL_MAKE(7, 1) +#define GPU_ID_PRODUCT_TGOX GPU_ID_MODEL_MAKE(7, 2) +#define GPU_ID_PRODUCT_TTRX GPU_ID_MODEL_MAKE(9, 0) +#define GPU_ID_PRODUCT_TNAX GPU_ID_MODEL_MAKE(9, 1) +#define GPU_ID_PRODUCT_TBEX GPU_ID_MODEL_MAKE(9, 2) +#define GPU_ID_PRODUCT_LBEX GPU_ID_MODEL_MAKE(9, 4) +#define GPU_ID_PRODUCT_TBAX GPU_ID_MODEL_MAKE(9, 5) +#define GPU_ID_PRODUCT_TODX GPU_ID_MODEL_MAKE(10, 2) +#define GPU_ID_PRODUCT_TGRX GPU_ID_MODEL_MAKE(10, 3) +#define GPU_ID_PRODUCT_TVAX GPU_ID_MODEL_MAKE(10, 4) +#define GPU_ID_PRODUCT_LODX GPU_ID_MODEL_MAKE(10, 7) +#define GPU_ID_PRODUCT_TTUX GPU_ID_MODEL_MAKE(11, 2) +#define GPU_ID_PRODUCT_LTUX GPU_ID_MODEL_MAKE(11, 3) +#define GPU_ID_PRODUCT_TTIX GPU_ID_MODEL_MAKE(12, 0) +#define GPU_ID_PRODUCT_LTIX GPU_ID_MODEL_MAKE(12, 1) +#define GPU_ID_PRODUCT_TKRX GPU_ID_MODEL_MAKE(13, 0) +#define GPU_ID_PRODUCT_LKRX GPU_ID_MODEL_MAKE(13, 1) #endif /* _UAPI_KBASE_GPU_ID_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h index 1f331671b6b4..8256191f331b 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h +++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,71 +28,4 @@ #include "backend/mali_kbase_gpu_regmap_jm.h" #endif /* !MALI_USE_CSF */ -/* Begin Register Offsets */ -/* GPU control registers */ - -#define GPU_CONTROL_BASE 0x0000 -#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) - -#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ - -#define GPU_IRQ_CLEAR 0x024 /* (WO) */ -#define GPU_IRQ_STATUS 0x02C /* (RO) */ - -#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ -#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ - -#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ -#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ - -#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ -#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ - -#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ -#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ - -#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ -#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ - -#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ -#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ - -/* Job control registers */ - -#define JOB_CONTROL_BASE 0x1000 - -#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) - -#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ -#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ -#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ - -/* MMU control registers */ - -#define MEMORY_MANAGEMENT_BASE 0x2000 - -#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) - -#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ -#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ -#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ -#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ - -#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ - -/* MMU address space control registers */ - -#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) - -#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ -#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ -#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ -#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ -#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ - -/* (RW) Translation table configuration for address space n, low word */ -#define AS_TRANSCFG_LO 0x30 -/* (RW) Translation table configuration for address space n, high word */ -#define AS_TRANSCFG_HI 0x34 - #endif /* _UAPI_KBASE_GPU_REGMAP_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h index 1a3098d6cad8..9478334ce667 100644 --- a/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h +++ b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -60,28 +60,26 @@ /* A mask of all the flags which are only valid for allocations within kbase, * and may not be passed from user space. */ -#define BASEP_MEM_FLAGS_KERNEL_ONLY \ - (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ - BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM) +#define BASEP_MEM_FLAGS_KERNEL_ONLY \ + (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | BASE_MEM_FLAG_MAP_FIXED | \ + BASEP_MEM_PERFORM_JIT_TRIM) /* A mask of all currently reserved flags */ -#define BASE_MEM_FLAGS_RESERVED \ - (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) - +#define BASE_MEM_FLAGS_RESERVED (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) /* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the * initial commit is aligned to 'extension' pages, where 'extension' must be a power * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES */ -#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) +#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) /** * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE - If set, the heap info address points * to a __u32 holding the used size in bytes; * otherwise it points to a __u64 holding the lowest address of unused memory. */ -#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) +#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) /** * BASE_JIT_ALLOC_VALID_FLAGS - Valid set of just-in-time memory allocation flags @@ -109,26 +107,25 @@ */ /* Private flag tracking whether job descriptor dumping is disabled */ -#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ - ((base_context_create_flags)(1 << 31)) +#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((base_context_create_flags)(1 << 31)) /* Flags for base tracepoint specific to JM */ -#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ - BASE_TLSTREAM_JOB_DUMPING_ENABLED) +#define BASE_TLSTREAM_FLAGS_MASK \ + (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | BASE_TLSTREAM_JOB_DUMPING_ENABLED) /* * Dependency stuff, keep it private for now. May want to expose it if * we decide to make the number of semaphores a configurable * option. */ -#define BASE_JD_ATOM_COUNT 256 +#define BASE_JD_ATOM_COUNT 256 /* Maximum number of concurrent render passes. */ #define BASE_JD_RP_COUNT (256) /* Set/reset values for a software event */ -#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) -#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) +#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) +#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) /** * struct base_jd_udata - Per-job data @@ -156,9 +153,9 @@ struct base_jd_udata { */ typedef __u8 base_jd_dep_type; -#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ -#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ -#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ +#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ +#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ +#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ /** * typedef base_jd_core_req - Job chain hardware requirements. @@ -180,7 +177,7 @@ typedef __u32 base_jd_core_req; /* Requires fragment shaders */ -#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) +#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) /* Requires compute shaders * @@ -196,20 +193,20 @@ typedef __u32 base_jd_core_req; #define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) /* Requires tiling */ -#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) +#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) /* Requires cache flushes */ #define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) /* Requires value writeback */ -#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) +#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) /* SW-only requirements - the HW does not expose these as part of the job slot * capabilities */ /* Requires fragment job with AFBC encoding */ -#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) +#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) /* SW-only requirement: coalesce completion events. * If this bit is set then completion of this atom will not cause an event to @@ -223,29 +220,29 @@ typedef __u32 base_jd_core_req; /* SW Only requirement: the job chain requires a coherent core group. We don't * mind which coherent core group is used. */ -#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) +#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) /* SW Only requirement: The performance counters should be enabled only when * they are needed, to reduce power consumption. */ -#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) +#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) /* SW Only requirement: External resources are referenced by this atom. * * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and * BASE_JD_REQ_SOFT_EVENT_WAIT. */ -#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) +#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) /* SW Only requirement: Software defined job. Jobs with this bit set will not be * submitted to the hardware but will cause some action to happen within the * driver */ -#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) +#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) -#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) -#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) -#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) +#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) +#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) +#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) /* 0x4 RESERVED for now */ @@ -257,11 +254,11 @@ typedef __u32 base_jd_core_req; * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it * possible for other jobs to wait upon. It completes immediately. */ -#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) -#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) -#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) +#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) +#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) +#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) -#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) +#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) /* SW only requirement: Just In Time allocation * @@ -278,7 +275,7 @@ typedef __u32 base_jd_core_req; * * The job will complete immediately. */ -#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) +#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) /* SW only requirement: Just In Time free * @@ -288,7 +285,7 @@ typedef __u32 base_jd_core_req; * * The job will complete immediately. */ -#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) +#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) /* SW only requirement: Map external resource * @@ -297,7 +294,7 @@ typedef __u32 base_jd_core_req; * passed via the jc element of the atom which is a pointer to a * base_external_resource_list. */ -#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) +#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) /* SW only requirement: Unmap external resource * @@ -306,7 +303,7 @@ typedef __u32 base_jd_core_req; * passed via the jc element of the atom which is a pointer to a * base_external_resource_list. */ -#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) +#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) /* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) * @@ -316,7 +313,7 @@ typedef __u32 base_jd_core_req; * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. */ -#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) +#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) /* HW Requirement: Use the base_jd_atom::device_nr field to specify a * particular core group @@ -331,7 +328,7 @@ typedef __u32 base_jd_core_req; /* SW Flag: If this bit is set then the successful completion of this atom * will not cause an event to be sent to userspace */ -#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) /* SW Flag: If this bit is set then completion of this atom will not cause an * event to be sent to userspace, whether successful or not. @@ -408,23 +405,22 @@ typedef __u32 base_jd_core_req; /* These requirement bits are currently unused in base_jd_core_req */ -#define BASEP_JD_REQ_RESERVED \ - (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ - BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ - BASE_JD_REQ_EVENT_COALESCE | \ - BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ - BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ - BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ - BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \ - BASE_JD_REQ_END_RENDERPASS | BASE_JD_REQ_LIMITED_CORE_MASK)) +#define BASEP_JD_REQ_RESERVED \ + (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ + BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ + BASE_JD_REQ_EVENT_COALESCE | BASE_JD_REQ_COHERENT_GROUP | \ + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ + BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | BASE_JD_REQ_JOB_SLOT | \ + BASE_JD_REQ_START_RENDERPASS | BASE_JD_REQ_END_RENDERPASS | \ + BASE_JD_REQ_LIMITED_CORE_MASK)) /* Mask of all bits in base_jd_core_req that control the type of the atom. * * This allows dependency only atoms to have flags set */ -#define BASE_JD_REQ_ATOM_TYPE \ - (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ - BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) +#define BASE_JD_REQ_ATOM_TYPE \ + (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | BASE_JD_REQ_V | \ + BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) /** * BASE_JD_REQ_SOFT_JOB_TYPE - Mask of all bits in base_jd_core_req that @@ -436,8 +432,7 @@ typedef __u32 base_jd_core_req; * a dependency only job. */ #define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ - (((core_req) & BASE_JD_REQ_SOFT_JOB) || \ - ((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) + (((core_req)&BASE_JD_REQ_SOFT_JOB) || ((core_req)&BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) /** * enum kbase_jd_atom_state - Atom states @@ -571,17 +566,17 @@ struct base_jd_fragment { typedef __u8 base_jd_prio; /* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ -#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) +#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) /* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and * BASE_JD_PRIO_LOW */ -#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) +#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) /* Low atom priority. */ -#define BASE_JD_PRIO_LOW ((base_jd_prio)2) +#define BASE_JD_PRIO_LOW ((base_jd_prio)2) /* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH, * BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW */ -#define BASE_JD_PRIO_REALTIME ((base_jd_prio)3) +#define BASE_JD_PRIO_REALTIME ((base_jd_prio)3) /* Invalid atom priority (max uint8_t value) */ #define BASE_JD_PRIO_INVALID ((base_jd_prio)255) @@ -709,7 +704,7 @@ enum { BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */ BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */ BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */ - BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */ + BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */ /* Mask to extract the type from an event code */ BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) }; @@ -924,34 +919,29 @@ enum base_jd_event_code { BASE_JD_EVENT_ACCESS_FLAG = 0xD8, /* SW defined exceptions */ - BASE_JD_EVENT_MEM_GROWTH_FAILED = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_JOB_CANCELLED = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, - BASE_JD_EVENT_JOB_INVALID = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, + BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, + BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_RESERVED | 0x3FF, + BASE_JD_SW_EVENT_RESERVED | 0x3FF, - BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_SUCCESS | 0x000, + BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000, - BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, + BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | + BASE_JD_SW_EVENT_INFO | 0x000, - BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | + BASE_JD_SW_EVENT_RESERVED | 0x3FF, - BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_KERNEL | 0x000, - BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x001, + BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000, + BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | + BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | + BASE_JD_SW_EVENT_JOB | 0x001, - BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF + BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | + BASE_JD_SW_EVENT_RESERVED | 0x3FF }; /** diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h index ac6affe8c76c..1827d6ec4e1b 100644 --- a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h @@ -143,9 +143,16 @@ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE * before allocating GPU memory for the context. * - CPU mappings of USER_BUFFER imported memory handles must be cached. + * 11.39: + * - Restrict child process from doing supported file operations (like mmap, ioctl, + * read, poll) on the file descriptor of mali device file that was inherited + * from the parent process. + * 11.40: + * - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls. */ + #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 38 +#define BASE_UK_VERSION_MINOR 40 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -159,9 +166,7 @@ struct kbase_ioctl_version_check { __u16 minor; }; -#define KBASE_IOCTL_VERSION_CHECK \ - _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) - +#define KBASE_IOCTL_VERSION_CHECK _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) /** * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel @@ -176,11 +181,9 @@ struct kbase_ioctl_job_submit { __u32 stride; }; -#define KBASE_IOCTL_JOB_SUBMIT \ - _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) +#define KBASE_IOCTL_JOB_SUBMIT _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) -#define KBASE_IOCTL_POST_TERM \ - _IO(KBASE_IOCTL_TYPE, 4) +#define KBASE_IOCTL_POST_TERM _IO(KBASE_IOCTL_TYPE, 4) /** * struct kbase_ioctl_soft_event_update - Update the status of a soft-event @@ -237,9 +240,7 @@ union kbase_kinstr_jm_fd { struct kbase_kinstr_jm_fd_out out; }; -#define KBASE_IOCTL_KINSTR_JM_FD \ - _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd) - +#define KBASE_IOCTL_KINSTR_JM_FD _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd) #define KBASE_IOCTL_VERSION_CHECK_RESERVED \ _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) diff --git a/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h b/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h index f8378146aceb..82e651f67b71 100644 --- a/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h +++ b/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -162,7 +162,7 @@ struct base_mem_handle { /* A mask for all input bits, including IN/OUT bits. */ -#define BASE_MEM_FLAGS_INPUT_MASK \ +#define BASE_MEM_FLAGS_INPUT_MASK \ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) /* Special base mem handles. @@ -206,13 +206,13 @@ typedef __u32 base_context_create_flags; /* Bitmask used to encode a memory group ID in base_context_create_flags */ -#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ +#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) /* Bitpattern describing the base_context_create_flags that can be * passed to the kernel */ -#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ +#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | BASEP_CONTEXT_MMU_GROUP_ID_MASK) /* Flags for base tracepoint diff --git a/include/uapi/gpu/arm/bifrost/mali_base_kernel.h b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h index e6cac0eb2a1a..8e507f0f14aa 100644 --- a/include/uapi/gpu/arm/bifrost/mali_base_kernel.h +++ b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,12 +27,11 @@ #define _UAPI_BASE_KERNEL_H_ #include +#include "mali_gpu_props.h" #include "mali_base_mem_priv.h" #include "gpu/mali_kbase_gpu_id.h" #include "gpu/mali_kbase_gpu_coherency.h" -#define BASE_MAX_COHERENT_GROUPS 16 - #if defined(PAGE_MASK) && defined(PAGE_SHIFT) #define LOCAL_PAGE_SHIFT PAGE_SHIFT #define LOCAL_PAGE_LSB ~PAGE_MASK @@ -71,21 +70,23 @@ */ typedef __u32 base_mem_alloc_flags; +#define BASE_MEM_FLAGS_MODIFIABLE_NATIVE (BASE_MEM_DONT_NEED) + +#define BASE_MEM_FLAGS_MODIFIABLE_IMPORTED_UMM (BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL) + /* A mask for all the flags which are modifiable via the base_mem_set_flags * interface. */ #define BASE_MEM_FLAGS_MODIFIABLE \ - (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ - BASE_MEM_COHERENT_LOCAL) + (BASE_MEM_FLAGS_MODIFIABLE_NATIVE | BASE_MEM_FLAGS_MODIFIABLE_IMPORTED_UMM) /* A mask of all the flags that can be returned via the base_mem_get_flags() * interface. */ -#define BASE_MEM_FLAGS_QUERYABLE \ - (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \ - BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \ - BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \ - BASEP_MEM_FLAGS_KERNEL_ONLY)) +#define BASE_MEM_FLAGS_QUERYABLE \ + (BASE_MEM_FLAGS_INPUT_MASK & \ + ~(BASE_MEM_SAME_VA | BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | \ + BASE_MEM_FLAGS_RESERVED | BASEP_MEM_FLAGS_KERNEL_ONLY)) /** * enum base_mem_import_type - Memory types supported by @a base_mem_import @@ -127,22 +128,21 @@ struct base_mem_import_user_buffer { }; /* Mask to detect 4GB boundary alignment */ -#define BASE_MEM_MASK_4GB 0xfffff000UL +#define BASE_MEM_MASK_4GB 0xfffff000UL /* Mask to detect 4GB boundary (in page units) alignment */ -#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) +#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) /* Limit on the 'extension' parameter for an allocation with the * BASE_MEM_TILER_ALIGN_TOP flag set * * This is the same as the maximum limit for a Buffer Descriptor's chunk size */ -#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 \ - (21u - (LOCAL_PAGE_SHIFT)) -#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \ +#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 (21u - (LOCAL_PAGE_SHIFT)) +#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \ (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2)) /* Bit mask of cookies used for memory allocation setup */ -#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ +#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ /* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ #define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ @@ -243,10 +243,7 @@ struct base_jit_alloc_info { __u64 heap_info_gpu_addr; }; -enum base_external_resource_access { - BASE_EXT_RES_ACCESS_SHARED, - BASE_EXT_RES_ACCESS_EXCLUSIVE -}; +enum base_external_resource_access { BASE_EXT_RES_ACCESS_SHARED, BASE_EXT_RES_ACCESS_EXCLUSIVE }; struct base_external_resource { __u64 ext_resource; @@ -276,8 +273,6 @@ struct base_jd_debug_copy_buffer { struct base_external_resource extres; }; -#define GPU_MAX_JOB_SLOTS 16 - /** * DOC: User-side Base GPU Property Queries * @@ -402,8 +397,8 @@ struct mali_base_gpu_l2_cache_props { }; struct mali_base_gpu_tiler_props { - __u32 bin_size_bytes; /* Max is 4*2^15 */ - __u32 max_active_levels; /* Max is 2^15 */ + __u32 bin_size_bytes; /* Max is 4*2^15 */ + __u32 max_active_levels; /* Max is 2^15 */ }; /** @@ -428,11 +423,11 @@ struct mali_base_gpu_thread_props { __u32 max_threads; __u32 max_workgroup_size; __u32 max_barrier_size; - __u16 max_registers; + __u32 max_registers; __u8 max_task_queue; __u8 max_thread_group_split; __u8 impl_tech; - __u8 padding[3]; + __u8 padding; __u32 tls_alloc; }; @@ -591,24 +586,20 @@ struct base_gpu_props { struct mali_base_gpu_coherent_group_info coherency_info; }; -#define BASE_MEM_GROUP_ID_GET(flags) \ - ((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT) +#define BASE_MEM_GROUP_ID_GET(flags) ((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT) -#define BASE_MEM_GROUP_ID_SET(id) \ - (((base_mem_alloc_flags)((id < 0 || id >= BASE_MEM_GROUP_COUNT) ? \ - BASE_MEM_GROUP_DEFAULT : \ - id) \ - << BASEP_MEM_GROUP_ID_SHIFT) & \ +#define BASE_MEM_GROUP_ID_SET(id) \ + (((base_mem_alloc_flags)((id < 0 || id >= BASE_MEM_GROUP_COUNT) ? BASE_MEM_GROUP_DEFAULT : \ + id) \ + << BASEP_MEM_GROUP_ID_SHIFT) & \ BASE_MEM_GROUP_ID_MASK) -#define BASE_CONTEXT_MMU_GROUP_ID_SET(group_id) \ - (BASEP_CONTEXT_MMU_GROUP_ID_MASK & \ - ((base_context_create_flags)(group_id) \ - << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)) +#define BASE_CONTEXT_MMU_GROUP_ID_SET(group_id) \ + (BASEP_CONTEXT_MMU_GROUP_ID_MASK & \ + ((base_context_create_flags)(group_id) << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)) -#define BASE_CONTEXT_MMU_GROUP_ID_GET(flags) \ - ((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> \ - BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) +#define BASE_CONTEXT_MMU_GROUP_ID_GET(flags) \ + ((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) /* * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These @@ -617,22 +608,20 @@ struct base_gpu_props { */ /* For monotonic (counter) timefield */ -#define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0) +#define BASE_TIMEINFO_MONOTONIC_FLAG (1U << 0) /* For system wide timestamp */ -#define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1) +#define BASE_TIMEINFO_TIMESTAMP_FLAG (1U << 1) /* For GPU cycle counter */ -#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2) +#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1U << 2) /* Specify kernel GPU register timestamp */ -#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1UL << 30) +#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1U << 30) /* Specify userspace cntvct_el0 timestamp source */ -#define BASE_TIMEINFO_USER_SOURCE_FLAG (1UL << 31) +#define BASE_TIMEINFO_USER_SOURCE_FLAG (1U << 31) -#define BASE_TIMEREQUEST_ALLOWED_FLAGS (\ - BASE_TIMEINFO_MONOTONIC_FLAG | \ - BASE_TIMEINFO_TIMESTAMP_FLAG | \ - BASE_TIMEINFO_CYCLE_COUNTER_FLAG | \ - BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ - BASE_TIMEINFO_USER_SOURCE_FLAG) +#define BASE_TIMEREQUEST_ALLOWED_FLAGS \ + (BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG | \ + BASE_TIMEINFO_CYCLE_COUNTER_FLAG | BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ + BASE_TIMEINFO_USER_SOURCE_FLAG) /* Maximum number of source allocations allowed to create an alias allocation. * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array diff --git a/include/uapi/gpu/arm/bifrost/mali_base_mem_priv.h b/include/uapi/gpu/arm/bifrost/mali_base_mem_priv.h index 70f5b0977520..994da42739c5 100644 --- a/include/uapi/gpu/arm/bifrost/mali_base_mem_priv.h +++ b/include/uapi/gpu/arm/bifrost/mali_base_mem_priv.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2015, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,8 +25,8 @@ #include #include "mali_base_common_kernel.h" -#define BASE_SYNCSET_OP_MSYNC (1U << 0) -#define BASE_SYNCSET_OP_CSYNC (1U << 1) +#define BASE_SYNCSET_OP_MSYNC (1U << 0) +#define BASE_SYNCSET_OP_CSYNC (1U << 1) /* * This structure describe a basic memory coherency operation. diff --git a/include/uapi/gpu/arm/bifrost/mali_gpu_props.h b/include/uapi/gpu/arm/bifrost/mali_gpu_props.h new file mode 100644 index 000000000000..3640ad02b0e2 --- /dev/null +++ b/include/uapi/gpu/arm/bifrost/mali_gpu_props.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_MALI_GPUPROPS_H_ +#define _UAPI_MALI_GPUPROPS_H_ + +#include +#include "mali_base_common_kernel.h" + +#define BASE_MAX_COHERENT_GROUPS 16 +#define GPU_MAX_JOB_SLOTS 16 + +/** + * struct gpu_props_user_data - structure for gpu props user buffer. + * @core_props: Core props. + * @l2_props: L2 props. + * @tiler_props: Tiler props. + * @thread_props: Thread props. + * @raw_props: Raw register values kept for backwards compatibility. Kbase + * and base should never reference values within this struct. + * @coherency_info: Coherency information. + * + * This structure is used solely for the encoding and decoding of the prop_buffer + * returned by kbase. + */ +struct gpu_props_user_data { + struct { + __u32 product_id; + __u16 version_status; + __u16 minor_revision; + __u16 major_revision; + __u32 gpu_freq_khz_max; + __u32 log2_program_counter_size; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u64 gpu_available_memory_size; + __u8 num_exec_engines; + } core_props; + struct { + __u8 log2_line_size; + __u8 log2_cache_size; + __u8 num_l2_slices; + } l2_props; + struct { + __u32 bin_size_bytes; + __u32 max_active_levels; + } tiler_props; + struct { + __u32 max_threads; + __u32 max_workgroup_size; + __u32 max_barrier_size; + __u32 max_registers; + __u8 max_task_queue; + __u8 max_thread_group_split; + __u8 impl_tech; + __u32 tls_alloc; + } thread_props; + + /* kept for backward compatibility, should not be used in the future. */ + struct { + __u64 shader_present; + __u64 tiler_present; + __u64 l2_present; + __u64 stack_present; + __u64 l2_features; + __u64 core_features; + __u64 mem_features; + __u64 mmu_features; + __u32 as_present; + __u32 js_present; + __u32 js_features[GPU_MAX_JOB_SLOTS]; + __u64 tiler_features; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u64 gpu_id; + __u32 thread_max_threads; + __u32 thread_max_workgroup_size; + __u32 thread_max_barrier_size; + __u32 thread_features; + __u32 coherency_mode; + __u32 thread_tls_alloc; + __u64 gpu_features; + } raw_props; + struct { + __u32 num_groups; + __u32 num_core_groups; + __u32 coherency; + struct { + __u64 core_mask; + __u32 num_cores; + } group[BASE_MAX_COHERENT_GROUPS]; + } coherency_info; +}; + +#endif /* _UAPI_MALI_GPUPROPS_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h index 5089bf249528..81e3980c3973 100644 --- a/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,25 +27,26 @@ /* The ids of ioctl commands. */ #define KBASE_HWCNT_READER 0xBE -#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, __u32) +#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, __u32) #define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, __u32) -#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, __u32) -#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, __u32) -#define KBASE_HWCNT_READER_GET_BUFFER _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\ - offsetof(struct kbase_hwcnt_reader_metadata, cycles)) -#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES _IOR(KBASE_HWCNT_READER, 0x20,\ - struct kbase_hwcnt_reader_metadata) -#define KBASE_HWCNT_READER_PUT_BUFFER _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\ - offsetof(struct kbase_hwcnt_reader_metadata, cycles)) -#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES _IOW(KBASE_HWCNT_READER, 0x21,\ - struct kbase_hwcnt_reader_metadata) -#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, __u32) -#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, __u32) -#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, __u32) +#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, __u32) +#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, __u32) +#define KBASE_HWCNT_READER_GET_BUFFER \ + _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20, \ + offsetof(struct kbase_hwcnt_reader_metadata, cycles)) +#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES \ + _IOR(KBASE_HWCNT_READER, 0x20, struct kbase_hwcnt_reader_metadata) +#define KBASE_HWCNT_READER_PUT_BUFFER \ + _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21, \ + offsetof(struct kbase_hwcnt_reader_metadata, cycles)) +#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES \ + _IOW(KBASE_HWCNT_READER, 0x21, struct kbase_hwcnt_reader_metadata) +#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, __u32) +#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, __u32) +#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, __u32) #define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, __u32) #define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \ - _IOW(KBASE_HWCNT_READER, 0xFF, \ - struct kbase_hwcnt_reader_api_version) + _IOW(KBASE_HWCNT_READER, 0xFF, struct kbase_hwcnt_reader_api_version) /** * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles @@ -117,8 +118,7 @@ enum prfcnt_list_type { PRFCNT_LIST_TYPE_SAMPLE_META, }; -#define FLEX_LIST_TYPE(type, subtype) \ - ((__u16)(((type & 0xf) << 12) | (subtype & 0xfff))) +#define FLEX_LIST_TYPE(type, subtype) ((__u16)(((type & 0xf) << 12) | (subtype & 0xfff))) #define FLEX_LIST_TYPE_NONE FLEX_LIST_TYPE(0, 0) #define PRFCNT_ENUM_TYPE_BLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 0) @@ -129,12 +129,9 @@ enum prfcnt_list_type { #define PRFCNT_REQUEST_TYPE_ENABLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 1) #define PRFCNT_REQUEST_TYPE_SCOPE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 2) -#define PRFCNT_SAMPLE_META_TYPE_SAMPLE \ - FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 0) -#define PRFCNT_SAMPLE_META_TYPE_CLOCK \ - FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 1) -#define PRFCNT_SAMPLE_META_TYPE_BLOCK \ - FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 2) +#define PRFCNT_SAMPLE_META_TYPE_SAMPLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 0) +#define PRFCNT_SAMPLE_META_TYPE_CLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 1) +#define PRFCNT_SAMPLE_META_TYPE_BLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 2) /** * struct prfcnt_item_header - Header for an item of the list. @@ -152,6 +149,8 @@ struct prfcnt_item_header { * @PRFCNT_BLOCK_TYPE_TILER: Tiler. * @PRFCNT_BLOCK_TYPE_MEMORY: Memory System. * @PRFCNT_BLOCK_TYPE_SHADER_CORE: Shader Core. + * @PRFCNT_BLOCK_TYPE_FW: Firmware. + * @PRFCNT_BLOCK_TYPE_CSG: CSG. * @PRFCNT_BLOCK_TYPE_RESERVED: Reserved. */ enum prfcnt_block_type { @@ -159,6 +158,8 @@ enum prfcnt_block_type { PRFCNT_BLOCK_TYPE_TILER, PRFCNT_BLOCK_TYPE_MEMORY, PRFCNT_BLOCK_TYPE_SHADER_CORE, + PRFCNT_BLOCK_TYPE_FW, + PRFCNT_BLOCK_TYPE_CSG, PRFCNT_BLOCK_TYPE_RESERVED = 255, }; @@ -491,13 +492,13 @@ struct prfcnt_sample_access { /* The ids of ioctl commands, on a reader file descriptor, magic number */ #define KBASE_KINSTR_PRFCNT_READER 0xBF /* Ioctl ID for issuing a session operational command */ -#define KBASE_IOCTL_KINSTR_PRFCNT_CMD \ +#define KBASE_IOCTL_KINSTR_PRFCNT_CMD \ _IOW(KBASE_KINSTR_PRFCNT_READER, 0x00, struct prfcnt_control_cmd) /* Ioctl ID for fetching a dumpped sample */ -#define KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE \ +#define KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE \ _IOR(KBASE_KINSTR_PRFCNT_READER, 0x01, struct prfcnt_sample_access) /* Ioctl ID for release internal buffer of the previously fetched sample */ -#define KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE \ +#define KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE \ _IOW(KBASE_KINSTR_PRFCNT_READER, 0x10, struct prfcnt_sample_access) #endif /* _UAPI_KBASE_HWCNT_READER_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h index c8a54f91165e..d60745f564b0 100644 --- a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,7 +22,7 @@ #ifndef _UAPI_KBASE_IOCTL_H_ #define _UAPI_KBASE_IOCTL_H_ -#ifdef __cpluscplus +#ifdef __cplusplus extern "C" { #endif @@ -162,7 +162,7 @@ struct kbase_ioctl_hwcnt_reader_setup { __u32 mmu_l2_bm; }; -#define KBASE_IOCTL_HWCNT_READER_SETUP \ +#define KBASE_IOCTL_HWCNT_READER_SETUP \ _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) /** @@ -276,7 +276,7 @@ union kbase_ioctl_mem_find_cpu_offset { } out; }; -#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ +#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset) /** @@ -445,7 +445,7 @@ struct kbase_ioctl_sticky_resource_map { __u64 address; }; -#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ +#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ _IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map) /** @@ -459,7 +459,7 @@ struct kbase_ioctl_sticky_resource_unmap { __u64 address; }; -#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ +#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ _IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap) /** @@ -487,7 +487,7 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { } out; }; -#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ +#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) #define KBASE_IOCTL_CINSTR_GWT_START _IO(KBASE_IOCTL_TYPE, 33) @@ -565,7 +565,7 @@ union kbase_ioctl_get_cpu_gpu_timeinfo { } out; }; -#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ +#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo) /** @@ -577,7 +577,7 @@ struct kbase_ioctl_context_priority_check { __u8 priority; }; -#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ +#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ _IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check) /** @@ -589,7 +589,7 @@ struct kbase_ioctl_set_limited_core_count { __u8 max_core_count; }; -#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ +#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ _IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count) /** @@ -610,7 +610,7 @@ struct kbase_ioctl_kinstr_prfcnt_enum_info { __u64 info_list_ptr; }; -#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO \ +#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO \ _IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info) /** @@ -639,7 +639,7 @@ union kbase_ioctl_kinstr_prfcnt_setup { } out; }; -#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP \ +#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP \ _IOWR(KBASE_IOCTL_TYPE, 57, union kbase_ioctl_kinstr_prfcnt_setup) /*************** @@ -782,7 +782,7 @@ struct kbase_ioctl_tlstream_stats { #define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 #define KBASE_GPUPROP_TLS_ALLOC 84 #define KBASE_GPUPROP_RAW_GPU_FEATURES 85 -#ifdef __cpluscplus +#ifdef __cplusplus } #endif From 6b494eb5a1af72f9495fc0bffa46c76a1691dcc1 Mon Sep 17 00:00:00 2001 From: Zhen Chen Date: Thu, 30 Nov 2023 11:15:32 +0800 Subject: [PATCH 10/12] Mali: bifrost: Fix: When calling devfreq_add_device(), restore passing of '&ondemand_data' Change-Id: Ia2987763b90223f7333e2a24dacc05840309709c Signed-off-by: Zhen Chen --- drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c index 6b29228765cd..3667ce6d14b5 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c @@ -714,7 +714,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) &ondemand_data.upthreshold); of_property_read_u32(np, "downdifferential", &ondemand_data.downdifferential); - kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, "simple_ondemand", NULL); + kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, "simple_ondemand", &ondemand_data); if (IS_ERR(kbdev->devfreq)) { err = PTR_ERR(kbdev->devfreq); kbdev->devfreq = NULL; From 44ee8c59b5db9d4421709a581d30ceea24ad5b87 Mon Sep 17 00:00:00 2001 From: Zhen Chen Date: Wed, 27 Dec 2023 17:47:27 +0800 Subject: [PATCH 11/12] MALI: bifrost: log: Disable WARNINGs in validate_tracepoint_data() After integrating DDK g21(r46), the driver outputs the following WARNING after waking from sleep: [ 293.271414][ T209] prev_wp_active_end_time 18446744073605310680 > start_time 1119644647 for aid 1000 active_cnt 0 [ 293.224787][ T375] WARNING: CPU: 3 PID: 375 at drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c:84 emit_tracepoint_for_active_gpu_metrics_ctx+0x138/0x1c4 [ 293.224825][ T375] Modules linked in: bcmdhd dhd_static_buf r8168 [ 293.224873][ T375] CPU: 3 PID: 375 Comm: kworker/u17:2 Not tainted 5.10.160 #1050 [ 293.224888][ T375] Hardware name: Rockchip RK3588 EVB1 LP4 V10 Board (DT) [ 293.224911][ T375] Workqueue: csf_scheduler_gpu_idle_ gpu_idle_worker [ 293.224936][ T375] pstate: 60c00009 (nZCv daif +PAN +UAO -TCO BTYPE=--) [ 293.224955][ T375] pc : emit_tracepoint_for_active_gpu_metrics_ctx+0x138/0x1c4 [ 293.224973][ T375] lr : emit_tracepoint_for_active_gpu_metrics_ctx+0x138/0x1c4 ... [ 293.225343][ T375] Call trace: [ 293.225364][ T375] emit_tracepoint_for_active_gpu_metrics_ctx+0x138/0x1c4 [ 293.225382][ T375] kbase_gpu_metrics_emit_tracepoint+0x54/0xf4 [ 293.225403][ T375] cleanup_csg_slot+0x20c/0x4f8 [ 293.225423][ T375] wait_csg_slots_suspend+0x320/0x3bc [ 293.225440][ T375] suspend_active_groups_on_powerdown+0xa8/0x1cc [ 293.225458][ T375] scheduler_suspend_on_idle+0x24/0x828 [ 293.225474][ T375] gpu_idle_worker+0x384/0x5c4 [ 293.225495][ T375] process_one_work+0x1f0/0x478 [ 293.225511][ T375] worker_thread+0x270/0x4cc [ 293.225531][ T375] kthread+0x138/0x340 [ 293.225552][ T375] ret_from_fork+0x10/0x18 The WARNING is due to the driver expecting 'gpu_ts' (gpu_timestamp) from the FW to be monotonically increasing, but here "after waking up, 'gpu_ts' is smaller than before sleep." 'gpu_ts' comes from the GPU's input signal CNTVALUEB. CNTVALUEB comes from the 'stimer' timer. derrick.huang@rock-chips.com confirmed that this timer loses power during sleep, and its count is reset when the system wakes up. This causes "'gpu_ts' to be smaller after waking up than before sleep." derrick.huang also mentioned that "for RK3588, it is not possible to make 'stimer' not lose power in the product." Here, we are temporarily disabling this WARNING. It is speculated that sleep-wake cycles might lead to certain profile features of the GPU not functioning correctly. Further testing will be conducted to check for other potential issues. Signed-off-by: Zhen Chen Change-Id: I9979bff7c7fb946437889561e90440de1037bb35 --- drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c index 7b33f86644ea..8e00c0dc830e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c @@ -67,7 +67,7 @@ static inline void gpu_metrics_ctx_flag_clear(struct kbase_gpu_metrics_ctx *gpu_ static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 start_time, u64 end_time, u64 total_active) { -#ifdef CONFIG_MALI_BIFROST_DEBUG +#if 0 WARN(total_active > NSEC_PER_SEC, "total_active %llu > 1 second for aid %u active_cnt %u", total_active, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); From 5f0de7c5d524f0934c83848c6d20526364fc5bb9 Mon Sep 17 00:00:00 2001 From: Finley Xiao Date: Wed, 28 Jun 2023 16:58:09 +0800 Subject: [PATCH 12/12] drm/rockchip: vop2: Add devfreq support Signed-off-by: Finley Xiao Signed-off-by: Sandy Huang Change-Id: I0bde28f52dd3d734aa3f26adfe9ca8ece8febd65 --- drivers/gpu/drm/rockchip/rockchip_drm_drv.h | 9 + drivers/gpu/drm/rockchip/rockchip_drm_fb.c | 41 +++ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 264 ++++++++++++++++++- 3 files changed, 311 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h index 2e8739b4fcab..0a23f29c147a 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h @@ -70,6 +70,7 @@ enum rockchip_drm_debug_category { VOP_DEBUG_OVERLAY = BIT(1), VOP_DEBUG_WB = BIT(2), VOP_DEBUG_CFG_DONE = BIT(3), + VOP_DEBUG_CLK = BIT(4), VOP_DEBUG_VSYNC = BIT(7), }; @@ -113,6 +114,12 @@ enum rockchip_drm_split_area { ROCKCHIP_DRM_SPLIT_RIGHT_SIDE = 2, }; +enum rockchip_drm_vop_aclk_mode { + ROCKCHIP_VOP_ACLK_NORMAL_MODE = 0, + ROCKCHIP_VOP_ACLK_ADVANCED_MODE = 1, + ROCKCHIP_VOP_ACLK_MAX_MODE = 2, +}; + struct rockchip_drm_sub_dev { struct list_head list; struct drm_connector *connector; @@ -459,6 +466,7 @@ struct rockchip_crtc_funcs { int (*wait_vact_end)(struct drm_crtc *crtc, unsigned int mstimeout); void (*crtc_standby)(struct drm_crtc *crtc, bool standby); int (*crtc_set_color_bar)(struct drm_crtc *crtc, enum rockchip_color_bar_mode mode); + int (*set_aclk)(struct drm_crtc *crtc, enum rockchip_drm_vop_aclk_mode aclk_mode); }; struct rockchip_dclk_pll { @@ -524,6 +532,7 @@ struct rockchip_drm_private { dma_addr_t cubic_lut_dma_addr; void *cubic_lut_kvaddr; + int aclk_adjust_frame_num; struct drm_mm_node *clut_reserved_node; struct loader_cubic_lut cubic_lut[ROCKCHIP_MAX_CRTC]; }; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index 91cb11985db8..69c9369bb68a 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -175,6 +175,45 @@ static int rockchip_drm_bandwidth_atomic_check(struct drm_device *dev, return 0; } +static int rockchip_drm_aclk_adjust(struct drm_device *dev, + struct drm_atomic_state *state, + struct dmcfreq_vop_info *vop_bw_info) +{ + struct rockchip_drm_private *priv = dev->dev_private; + const struct rockchip_crtc_funcs *funcs; + struct drm_crtc *crtc; + int crtc_num = 0; + + drm_for_each_crtc(crtc, dev) { + if (!crtc->state->active) + continue; + crtc_num++; + } + + drm_for_each_crtc(crtc, dev) { + if (!crtc->state->active) + continue; + + funcs = priv->crtc_funcs[drm_crtc_index(crtc)]; + if (funcs && funcs->set_aclk) { + if (vop_bw_info->plane_num_4k || crtc_num > 1 || + crtc->state->adjusted_mode.crtc_hdisplay > 4096) { + funcs->set_aclk(crtc, ROCKCHIP_VOP_ACLK_ADVANCED_MODE); + priv->aclk_adjust_frame_num = 2; + } else { + if (priv->aclk_adjust_frame_num >= 1) { + funcs->set_aclk(crtc, ROCKCHIP_VOP_ACLK_ADVANCED_MODE); + priv->aclk_adjust_frame_num--; + } else { + funcs->set_aclk(crtc, ROCKCHIP_VOP_ACLK_NORMAL_MODE); + } + } + } + } + + return 0; +} + static void drm_atomic_helper_connector_commit(struct drm_device *dev, struct drm_atomic_state *old_state) { @@ -215,6 +254,8 @@ static void rockchip_drm_atomic_helper_commit_tail_rpm(struct drm_atomic_state * rockchip_drm_bandwidth_atomic_check(dev, old_state, &vop_bw_info); + rockchip_drm_aclk_adjust(dev, old_state, &vop_bw_info); + rockchip_dmcfreq_vop_bandwidth_update(&vop_bw_info); mutex_lock(&prv->ovl_lock); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 2c7a66adfb75..7d86537d8403 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -47,9 +47,12 @@ #include #include #include +#include +#include #include #include +#include <../drivers/devfreq/governor.h> #include "../drm_crtc_internal.h" #include "../drm_internal.h" @@ -836,7 +839,7 @@ struct vop2 { bool loader_protect; bool aclk_rate_reset; - unsigned long aclk_rate; + unsigned long aclk_current_freq; const struct vop2_data *data; /* Number of win that registered as plane, @@ -896,6 +899,16 @@ struct vop2 { struct workqueue_struct *workqueue; struct vop2_layer layers[ROCKCHIP_MAX_LAYER]; + +#ifdef CONFIG_PM_DEVFREQ + struct rockchip_opp_info opp_info; + struct devfreq *devfreq; + struct monitor_dev_info *mdev_info; + struct opp_table *opp_table; + unsigned long aclk_target_freq; + u32 aclk_mode_rate[ROCKCHIP_VOP_ACLK_MAX_MODE]; +#endif + /* must put at the end of the struct */ struct vop2_win win[]; }; @@ -944,6 +957,7 @@ static const struct drm_bus_format_enum_list drm_bus_format_enum_list[] = { }; static DRM_ENUM_NAME_FN(drm_get_bus_format_name, drm_bus_format_enum_list) +static int vop2_devfreq_set_aclk(struct drm_crtc *crtc, enum rockchip_drm_vop_aclk_mode aclk_mode); static inline struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc) { @@ -4335,7 +4349,7 @@ static void vop2_crtc_atomic_enter_psr(struct drm_crtc *crtc, struct drm_crtc_st adjust_aclk_rate = (pre_scan_hblank + pre_scan_hactive) * dclk_rate * 12 / 10 / htotal; - vop2->aclk_rate = clk_get_rate(vop2->aclk); + vop2->aclk_current_freq = clk_get_rate(vop2->aclk); clk_set_rate(vop2->aclk, adjust_aclk_rate * 1000000L); vop2->aclk_rate_reset = true; } @@ -4351,7 +4365,7 @@ static void vop2_crtc_atomic_exit_psr(struct drm_crtc *crtc, struct drm_crtc_sta drm_crtc_vblank_on(crtc); if (vop2->aclk_rate_reset) - clk_set_rate(vop2->aclk, vop2->aclk_rate); + clk_set_rate(vop2->aclk, vop2->aclk_current_freq); vop2->aclk_rate_reset = false; for_each_set_bit(phys_id, &enabled_win_mask, ROCKCHIP_MAX_LAYER) { @@ -6831,6 +6845,7 @@ static const struct rockchip_crtc_funcs private_crtc_funcs = { .wait_vact_end = vop2_crtc_wait_vact_end, .crtc_standby = vop2_crtc_standby, .crtc_set_color_bar = vop2_crtc_set_color_bar, + .set_aclk = vop2_devfreq_set_aclk, }; static bool vop2_crtc_mode_fixup(struct drm_crtc *crtc, @@ -11885,6 +11900,247 @@ static void vop2_plane_mask_assign(struct vop2 *vop2, struct device_node *vop_ou } } +#ifdef CONFIG_PM_DEVFREQ +static struct monitor_dev_profile vop2_mdevp = { + .type = MONITOR_TYPE_DEV, + .low_temp_adjust = rockchip_monitor_dev_low_temp_adjust, + .high_temp_adjust = rockchip_monitor_dev_high_temp_adjust, + .update_volt = rockchip_monitor_check_rate_volt, +}; + +static int devfreq_vop2_ondemand_func(struct devfreq *df, unsigned long *freq) +{ + struct vop2 *vop2 = df->data; + + if (vop2) + *freq = vop2->aclk_target_freq; + else + *freq = df->previous_freq; + + return 0; +} + +static int devfreq_vop2_ondemand_handler(struct devfreq *devfreq, + unsigned int event, void *data) +{ + return 0; +} + +static struct devfreq_governor devfreq_vop2_ondemand = { + .name = "vop2_ondemand", + .get_target_freq = devfreq_vop2_ondemand_func, + .event_handler = devfreq_vop2_ondemand_handler, +}; + +static int vop2_devfreq_set_aclk(struct drm_crtc *crtc, enum rockchip_drm_vop_aclk_mode aclk_mode) +{ + struct vop2_video_port *vp = to_vop2_video_port(crtc); + struct vop2 *vop2 = vp->vop2; + struct drm_crtc *first_active_crtc = NULL; + int i = 0, ret = 0; + + if (!vop2->devfreq) + return 0; + + /* all vp/crtc share one vop aclk, so only need to set once */ + for (i = 0; i < vop2->data->nr_vps; i++) { + if (vop2->vps[i].rockchip_crtc.crtc.state->active) { + first_active_crtc = &vop2->vps[i].rockchip_crtc.crtc; + break; + } + } + if (first_active_crtc != crtc) + return 0; + + vop2->aclk_target_freq = vop2->aclk_mode_rate[aclk_mode]; + + mutex_lock(&vop2->devfreq->lock); + ret = update_devfreq(vop2->devfreq); + mutex_unlock(&vop2->devfreq->lock); + if (ret) + dev_err(vop2->dev, "failed to set rate %lu\n", vop2->aclk_target_freq); + + return 0; +} + +static int vop2_devfreq_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct vop2 *vop2 = dev_get_drvdata(dev); + struct dev_pm_opp *opp; + int ret = 0; + + if (!vop2_mdevp.is_checked) + return -EINVAL; + + opp = devfreq_recommended_opp(dev, freq, flags); + if (IS_ERR(opp)) { + dev_err(dev, "Failed to find opp for %lu Hz\n", *freq); + return PTR_ERR(opp); + } + dev_pm_opp_put(opp); + + if (*freq == vop2->aclk_current_freq) + return 0; + rockchip_monitor_volt_adjust_lock(vop2->mdev_info); + ret = dev_pm_opp_set_rate(dev, *freq); + if (!ret) { + rockchip_drm_dbg(vop2->dev, VOP_DEBUG_CLK, + "Set VOP aclk from %ld to %ld\n", vop2->aclk_current_freq, *freq); + vop2->aclk_current_freq = *freq; + vop2->devfreq->last_status.current_frequency = *freq; + } + rockchip_monitor_volt_adjust_unlock(vop2->mdev_info); + + return ret; +} + +static int vop2_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + return 0; +} + +static int vop2_devfreq_get_cur_freq(struct device *dev, + unsigned long *freq) +{ + struct vop2 *vop2 = dev_get_drvdata(dev); + + *freq = vop2->aclk_current_freq; + + return 0; +} + +static struct devfreq_dev_profile vop2_devfreq_profile = { + .target = vop2_devfreq_target, + .get_dev_status = vop2_devfreq_get_dev_status, + .get_cur_freq = vop2_devfreq_get_cur_freq, +}; + +static int rockchip_vop2_devfreq_init(struct vop2 *vop2) +{ + struct devfreq_dev_profile *dev_profile = &vop2_devfreq_profile; + const char *const reg_names[] = { "vop" }; + struct dev_pm_opp *opp; + int ret = 0; + + if (!of_find_property(vop2->dev->of_node, "vop-supply", NULL)) { + dev_dbg(vop2->dev, "failed to get vop supply\n"); + return 0; + } + + vop2->opp_table = dev_pm_opp_set_regulators(vop2->dev, reg_names, 1); + if (IS_ERR(vop2->opp_table)) { + dev_err(vop2->dev, "failed to set regulators\n"); + vop2->opp_table = NULL; + return -EINVAL; + } + + ret = rockchip_init_opp_table(vop2->dev, &vop2->opp_info, + "leakage", "vop"); + if (ret) { + dev_err(vop2->dev, "failed to init_opp_table\n"); + dev_pm_opp_put_regulators(vop2->opp_table); + vop2->opp_table = NULL; + return ret; + } + + vop2->aclk_current_freq = clk_get_rate(vop2->aclk); + opp = devfreq_recommended_opp(vop2->dev, &vop2->aclk_current_freq, 0); + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + goto err_remove_table; + } + dev_pm_opp_put(opp); + dev_profile->initial_freq = vop2->aclk_current_freq; + + ret = devfreq_add_governor(&devfreq_vop2_ondemand); + if (ret) { + dev_err(vop2->dev, "failed to add vop2_ondemand governor\n"); + goto err_remove_table; + } + + vop2->devfreq = devm_devfreq_add_device(vop2->dev, dev_profile, "vop2_ondemand", + (void *)vop2); + if (IS_ERR(vop2->devfreq)) { + dev_err(vop2->dev, "failed to add devfreq\n"); + ret = PTR_ERR(vop2->devfreq); + goto err_remove_governor; + } + devm_devfreq_register_opp_notifier(vop2->dev, vop2->devfreq); + + vop2->devfreq->last_status.current_frequency = dev_profile->initial_freq; + vop2->devfreq->last_status.total_time = 1; + vop2->devfreq->last_status.busy_time = 1; + + vop2_mdevp.data = vop2->devfreq; + vop2_mdevp.opp_info = &vop2->opp_info; + vop2->mdev_info = rockchip_system_monitor_register(vop2->dev, &vop2_mdevp); + if (IS_ERR(vop2->mdev_info)) { + dev_dbg(vop2->dev, "without system monitor\n"); + vop2->mdev_info = NULL; + } + vop2->aclk_current_freq = clk_get_rate(vop2->aclk); + + of_property_read_u32(vop2->dev->of_node, "rockchip,aclk-normal-mode-rates", + &vop2->aclk_mode_rate[ROCKCHIP_VOP_ACLK_NORMAL_MODE]); + + of_property_read_u32(vop2->dev->of_node, "rockchip,aclk-advanced-mode-rates", + &vop2->aclk_mode_rate[ROCKCHIP_VOP_ACLK_ADVANCED_MODE]); + + dev_err(vop2->dev, "Supported VOP aclk dvfs, normal mode:%d, advanced mode:%d\n", + vop2->aclk_mode_rate[ROCKCHIP_VOP_ACLK_NORMAL_MODE], + vop2->aclk_mode_rate[ROCKCHIP_VOP_ACLK_ADVANCED_MODE]); + + return 0; + +err_remove_governor: + devfreq_remove_governor(&devfreq_vop2_ondemand); +err_remove_table: + rockchip_uninit_opp_table(vop2->dev, &vop2->opp_info); + + return ret; +} + +static void rockchip_vop2_devfreq_uninit(struct vop2 *vop2) +{ + if (vop2->mdev_info) { + rockchip_system_monitor_unregister(vop2->mdev_info); + vop2->mdev_info = NULL; + } + if (vop2->devfreq) { + devm_devfreq_unregister_opp_notifier(vop2->dev, vop2->devfreq); + devm_devfreq_remove_device(vop2->dev, vop2->devfreq); + vop2->devfreq = NULL; + devfreq_remove_governor(&devfreq_vop2_ondemand); + if (vop2_devfreq_profile.freq_table) { + devm_kfree(vop2->dev, vop2_devfreq_profile.freq_table); + vop2_devfreq_profile.freq_table = NULL; + vop2_devfreq_profile.max_state = 0; + } + } + if (vop2->opp_table) { + rockchip_uninit_opp_table(vop2->dev, &vop2->opp_info); + dev_pm_opp_put_regulators(vop2->opp_table); + vop2->opp_table = NULL; + } +} +#else +static inline int vop2_devfreq_set_aclk(struct drm_crtc *crtc, enum rockchip_drm_vop_aclk_mode aclk_mode) +{ + return 0; +} + +static inline int rockchip_vop2_devfreq_init(struct vop2 *vop2) +{ + return 0; +} + +static inline void rockchip_vop2_devfreq_uninit(struct vop2 *vop2) +{ +} +#endif + static int vop2_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); @@ -12109,6 +12365,7 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) vop2_cubic_lut_init(vop2); vop2_wb_connector_init(vop2, registered_num_crtcs); pm_runtime_enable(&pdev->dev); + rockchip_vop2_devfreq_init(vop2); return 0; } @@ -12122,6 +12379,7 @@ static void vop2_unbind(struct device *dev, struct device *master, void *data) struct drm_crtc *crtc, *tmpc; struct drm_plane *plane, *tmpp; + rockchip_vop2_devfreq_uninit(vop2); pm_runtime_disable(dev); list_for_each_entry_safe(plane, tmpp, plane_list, head)