From d182dc6de93225cd853de4db68a1a77501bedb6e Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 24 Oct 2022 13:39:23 +0900 Subject: [PATCH 01/18] cpufreq: Generalize of_perf_domain_get_sharing_cpumask phandle format of_perf_domain_get_sharing_cpumask currently assumes a 1-argument phandle format, and directly returns the argument. Generalize this to return the full of_phandle_args, so it can be used by drivers which use other phandle styles (e.g. separate nodes). This also requires changing the CPU sharing match to compare the full args structure. Also, make sure to of_node_put(args.np) (the original code was leaking a reference). Signed-off-by: Hector Martin Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq-hw.c | 14 +++++++++----- include/linux/cpufreq.h | 28 +++++++++++++++------------ 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index f0e0a35c7f21..f80339779084 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -160,6 +160,7 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, struct mtk_cpufreq_data *data; struct device *dev = &pdev->dev; struct resource *res; + struct of_phandle_args args; void __iomem *base; int ret, i; int index; @@ -168,11 +169,14 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, if (!data) return -ENOMEM; - index = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains", - "#performance-domain-cells", - policy->cpus); - if (index < 0) - return index; + ret = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains", + "#performance-domain-cells", + policy->cpus, &args); + if (ret < 0) + return ret; + + index = args.args[0]; + of_node_put(args.np); res = platform_get_resource(pdev, IORESOURCE_MEM, index); if (!res) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d5595d57f4e5..6a94a6eaad27 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1110,10 +1110,10 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy, } static inline int parse_perf_domain(int cpu, const char *list_name, - const char *cell_name) + const char *cell_name, + struct of_phandle_args *args) { struct device_node *cpu_np; - struct of_phandle_args args; int ret; cpu_np = of_cpu_device_node_get(cpu); @@ -1121,41 +1121,44 @@ static inline int parse_perf_domain(int cpu, const char *list_name, return -ENODEV; ret = of_parse_phandle_with_args(cpu_np, list_name, cell_name, 0, - &args); + args); if (ret < 0) return ret; of_node_put(cpu_np); - return args.args[0]; + return 0; } static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, - const char *cell_name, struct cpumask *cpumask) + const char *cell_name, struct cpumask *cpumask, + struct of_phandle_args *pargs) { - int target_idx; int cpu, ret; + struct of_phandle_args args; - ret = parse_perf_domain(pcpu, list_name, cell_name); + ret = parse_perf_domain(pcpu, list_name, cell_name, pargs); if (ret < 0) return ret; - target_idx = ret; cpumask_set_cpu(pcpu, cpumask); for_each_possible_cpu(cpu) { if (cpu == pcpu) continue; - ret = parse_perf_domain(cpu, list_name, cell_name); + ret = parse_perf_domain(cpu, list_name, cell_name, &args); if (ret < 0) continue; - if (target_idx == ret) + if (pargs->np == args.np && pargs->args_count == args.args_count && + !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count)) cpumask_set_cpu(cpu, cpumask); + + of_node_put(args.np); } - return target_idx; + return 0; } #else static inline int cpufreq_boost_trigger_state(int state) @@ -1185,7 +1188,8 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy, } static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, - const char *cell_name, struct cpumask *cpumask) + const char *cell_name, struct cpumask *cpumask, + struct of_phandle_args *pargs) { return -EOPNOTSUPP; } From 68069b0d458bb06541641a294c15e06c5704ec2b Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 25 Oct 2022 13:02:50 +0530 Subject: [PATCH 02/18] cpufreq: qcom-hw: Remove un-necessary cpumask_empty() check CPUFreq core will always set the "policy->cpus" bitmask with the bitfield of the CPU that goes first per domain/policy. So there is no way the "policy->cpus" bitmask will be empty during qcom_cpufreq_hw_cpu_init(). Signed-off-by: Manivannan Sadhasivam Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 833589bc95e4..61850d75e82f 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -566,11 +566,6 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) data->per_core_dcvs = true; qcom_get_related_cpus(index, policy->cpus); - if (cpumask_empty(policy->cpus)) { - dev_err(dev, "Domain-%d failed to get related CPUs\n", index); - ret = -ENOENT; - goto error; - } policy->driver_data = data; policy->dvfs_possible_from_any_cpu = true; From 054a3ef683a176a509cc9b37f762029aae942495 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 2 Nov 2022 14:30:36 +0530 Subject: [PATCH 03/18] cpufreq: qcom-hw: Allocate qcom_cpufreq_data during probe qcom_cpufreq_data is allocated based on the number of frequency domains defined in DT which is static and won't change during runtime. There is no real reason to allocate it during the CPU init() callback and deallocate it during exit(). Hence, move the allocation to probe() and use the allocated memory during init(). This also allows us to use devm_platform_get_and_ioremap_resource() helper for acquiring the freq-domain resources from DT. Signed-off-by: Manivannan Sadhasivam Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 90 ++++++++++++++----------------- 1 file changed, 39 insertions(+), 51 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 61850d75e82f..25951a32b9d5 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -61,6 +61,10 @@ struct qcom_cpufreq_data { struct freq_qos_request throttle_freq_req; }; +static struct { + struct qcom_cpufreq_data *data; +} qcom_cpufreq; + static unsigned long cpu_hw_rate, xo_rate; static bool icc_scaling_enabled; @@ -503,8 +507,6 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) struct of_phandle_args args; struct device_node *cpu_np; struct device *cpu_dev; - struct resource *res; - void __iomem *base; struct qcom_cpufreq_data *data; int ret, index; @@ -526,43 +528,16 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) return ret; index = args.args[0]; + data->soc_data = of_device_get_match_data(&pdev->dev); + data = &qcom_cpufreq.data[index]; - res = platform_get_resource(pdev, IORESOURCE_MEM, index); - if (!res) { - dev_err(dev, "failed to get mem resource %d\n", index); + /* HW should be in enabled state to proceed */ + if (!(readl_relaxed(data->base + data->soc_data->reg_enable) & 0x1)) { + dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index); return -ENODEV; } - if (!request_mem_region(res->start, resource_size(res), res->name)) { - dev_err(dev, "failed to request resource %pR\n", res); - return -EBUSY; - } - - base = ioremap(res->start, resource_size(res)); - if (!base) { - dev_err(dev, "failed to map resource %pR\n", res); - ret = -ENOMEM; - goto release_region; - } - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) { - ret = -ENOMEM; - goto unmap_base; - } - - data->soc_data = of_device_get_match_data(&pdev->dev); - data->base = base; - data->res = res; - - /* HW should be in enabled state to proceed */ - if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) { - dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index); - ret = -ENODEV; - goto error; - } - - if (readl_relaxed(base + data->soc_data->reg_dcvs_ctrl) & 0x1) + if (readl_relaxed(data->base + data->soc_data->reg_dcvs_ctrl) & 0x1) data->per_core_dcvs = true; qcom_get_related_cpus(index, policy->cpus); @@ -573,14 +548,13 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy); if (ret) { dev_err(dev, "Domain-%d failed to read LUT\n", index); - goto error; + return ret; } ret = dev_pm_opp_get_opp_count(cpu_dev); if (ret <= 0) { dev_err(cpu_dev, "Failed to add OPPs\n"); - ret = -ENODEV; - goto error; + return -ENODEV; } if (policy_has_boost_freq(policy)) { @@ -589,18 +563,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); } - ret = qcom_cpufreq_hw_lmh_init(policy, index); - if (ret) - goto error; - - return 0; -error: - kfree(data); -unmap_base: - iounmap(base); -release_region: - release_mem_region(res->start, resource_size(res)); - return ret; + return qcom_cpufreq_hw_lmh_init(policy, index); } static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) @@ -657,7 +620,7 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) { struct device *cpu_dev; struct clk *clk; - int ret; + int ret, i, num_domains; clk = clk_get(&pdev->dev, "xo"); if (IS_ERR(clk)) @@ -684,6 +647,31 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) if (ret) return ret; + /* Allocate qcom_cpufreq_data based on the available frequency domains in DT */ + num_domains = of_property_count_elems_of_size(pdev->dev.of_node, "reg", sizeof(u32) * 4); + if (num_domains <= 0) + return num_domains; + + qcom_cpufreq.data = devm_kzalloc(&pdev->dev, sizeof(struct qcom_cpufreq_data) * num_domains, + GFP_KERNEL); + if (!qcom_cpufreq.data) + return -ENOMEM; + + for (i = 0; i < num_domains; i++) { + struct qcom_cpufreq_data *data = &qcom_cpufreq.data[i]; + struct resource *res; + void __iomem *base; + + base = devm_platform_get_and_ioremap_resource(pdev, i, &res); + if (IS_ERR(base)) { + dev_err(&pdev->dev, "Failed to map resource %pR\n", res); + return PTR_ERR(base); + } + + data->base = base; + data->res = res; + } + ret = cpufreq_register_driver(&cpufreq_qcom_hw_driver); if (ret) dev_err(&pdev->dev, "CPUFreq HW driver failed to register\n"); From 7cfa8553fe8c10c7e28eb651a6e58a7ba2d5e429 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 2 Nov 2022 14:30:37 +0530 Subject: [PATCH 04/18] cpufreq: qcom-hw: Use cached dev pointer in probe() There are multiple instances of dev pointer used in the probe() function. Instead of referencing pdev->dev all the time, let's use a cached dev pointer to simplify the code. Signed-off-by: Manivannan Sadhasivam Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 25951a32b9d5..6d807956aaf6 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -618,18 +618,19 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = { static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct device *cpu_dev; struct clk *clk; int ret, i, num_domains; - clk = clk_get(&pdev->dev, "xo"); + clk = clk_get(dev, "xo"); if (IS_ERR(clk)) return PTR_ERR(clk); xo_rate = clk_get_rate(clk); clk_put(clk); - clk = clk_get(&pdev->dev, "alternate"); + clk = clk_get(dev, "alternate"); if (IS_ERR(clk)) return PTR_ERR(clk); @@ -648,11 +649,11 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) return ret; /* Allocate qcom_cpufreq_data based on the available frequency domains in DT */ - num_domains = of_property_count_elems_of_size(pdev->dev.of_node, "reg", sizeof(u32) * 4); + num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * 4); if (num_domains <= 0) return num_domains; - qcom_cpufreq.data = devm_kzalloc(&pdev->dev, sizeof(struct qcom_cpufreq_data) * num_domains, + qcom_cpufreq.data = devm_kzalloc(dev, sizeof(struct qcom_cpufreq_data) * num_domains, GFP_KERNEL); if (!qcom_cpufreq.data) return -ENOMEM; @@ -664,7 +665,7 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) base = devm_platform_get_and_ioremap_resource(pdev, i, &res); if (IS_ERR(base)) { - dev_err(&pdev->dev, "Failed to map resource %pR\n", res); + dev_err(dev, "Failed to map resource %pR\n", res); return PTR_ERR(base); } @@ -674,9 +675,9 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) ret = cpufreq_register_driver(&cpufreq_qcom_hw_driver); if (ret) - dev_err(&pdev->dev, "CPUFreq HW driver failed to register\n"); + dev_err(dev, "CPUFreq HW driver failed to register\n"); else - dev_dbg(&pdev->dev, "QCOM CPUFreq HW driver initialized\n"); + dev_dbg(dev, "QCOM CPUFreq HW driver initialized\n"); return ret; } From 4f7961706c63be4e55b720edb7748233761cfbf9 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 2 Nov 2022 14:30:38 +0530 Subject: [PATCH 05/18] cpufreq: qcom-hw: Move soc_data to struct qcom_cpufreq soc_data is a static info of the driver and thus no need to cache it inside the qcom_cpufreq_data struct which is allocated per frequency domain. So, move it inside qcom_cpufreq struct. Signed-off-by: Manivannan Sadhasivam Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 6d807956aaf6..5e0598730a04 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -43,7 +43,6 @@ struct qcom_cpufreq_soc_data { struct qcom_cpufreq_data { void __iomem *base; struct resource *res; - const struct qcom_cpufreq_soc_data *soc_data; /* * Mutex to synchronize between de-init sequence and re-starting LMh @@ -63,6 +62,7 @@ struct qcom_cpufreq_data { static struct { struct qcom_cpufreq_data *data; + const struct qcom_cpufreq_soc_data *soc_data; } qcom_cpufreq; static unsigned long cpu_hw_rate, xo_rate; @@ -113,7 +113,7 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, unsigned int index) { struct qcom_cpufreq_data *data = policy->driver_data; - const struct qcom_cpufreq_soc_data *soc_data = data->soc_data; + const struct qcom_cpufreq_soc_data *soc_data = qcom_cpufreq.soc_data; unsigned long freq = policy->freq_table[index].frequency; unsigned int i; @@ -141,7 +141,7 @@ static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) return 0; data = policy->driver_data; - soc_data = data->soc_data; + soc_data = qcom_cpufreq.soc_data; index = readl_relaxed(data->base + soc_data->reg_perf_state); index = min(index, LUT_MAX_ENTRIES - 1); @@ -153,7 +153,7 @@ static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { struct qcom_cpufreq_data *data = policy->driver_data; - const struct qcom_cpufreq_soc_data *soc_data = data->soc_data; + const struct qcom_cpufreq_soc_data *soc_data = qcom_cpufreq.soc_data; unsigned int index; unsigned int i; @@ -177,7 +177,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, unsigned long rate; int ret; struct qcom_cpufreq_data *drv_data = policy->driver_data; - const struct qcom_cpufreq_soc_data *soc_data = drv_data->soc_data; + const struct qcom_cpufreq_soc_data *soc_data = qcom_cpufreq.soc_data; table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL); if (!table) @@ -294,10 +294,10 @@ static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) { unsigned int lval; - if (data->soc_data->reg_current_vote) - lval = readl_relaxed(data->base + data->soc_data->reg_current_vote) & 0x3ff; + if (qcom_cpufreq.soc_data->reg_current_vote) + lval = readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_current_vote) & 0x3ff; else - lval = readl_relaxed(data->base + data->soc_data->reg_domain_state) & 0xff; + lval = readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_domain_state) & 0xff; return lval * xo_rate; } @@ -371,9 +371,9 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data) disable_irq_nosync(c_data->throttle_irq); schedule_delayed_work(&c_data->throttle_work, 0); - if (c_data->soc_data->reg_intr_clr) + if (qcom_cpufreq.soc_data->reg_intr_clr) writel_relaxed(GT_IRQ_STATUS, - c_data->base + c_data->soc_data->reg_intr_clr); + c_data->base + qcom_cpufreq.soc_data->reg_intr_clr); return IRQ_HANDLED; } @@ -528,16 +528,15 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) return ret; index = args.args[0]; - data->soc_data = of_device_get_match_data(&pdev->dev); data = &qcom_cpufreq.data[index]; /* HW should be in enabled state to proceed */ - if (!(readl_relaxed(data->base + data->soc_data->reg_enable) & 0x1)) { + if (!(readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_enable) & 0x1)) { dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index); return -ENODEV; } - if (readl_relaxed(data->base + data->soc_data->reg_dcvs_ctrl) & 0x1) + if (readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_dcvs_ctrl) & 0x1) data->per_core_dcvs = true; qcom_get_related_cpus(index, policy->cpus); @@ -658,6 +657,8 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) if (!qcom_cpufreq.data) return -ENOMEM; + qcom_cpufreq.soc_data = of_device_get_match_data(dev); + for (i = 0; i < num_domains; i++) { struct qcom_cpufreq_data *data = &qcom_cpufreq.data[i]; struct resource *res; From cb140497fe4718f7393451b2f9309866722852a0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 4 Nov 2022 11:24:29 -0500 Subject: [PATCH 06/18] dt-bindings: cpufreq: qcom: Add missing cache related properties The examples' cache nodes are incomplete as 'cache-unified' and 'cache-level' are required cache properties. Signed-off-by: Rob Herring Reviewed-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- .../bindings/cpufreq/cpufreq-qcom-hw.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 24fa3d87a40b..e58c55f78aaa 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -85,9 +85,13 @@ examples: qcom,freq-domain = <&cpufreq_hw 0>; L2_0: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; + cache-unified; + cache-level = <3>; }; }; }; @@ -101,6 +105,8 @@ examples: qcom,freq-domain = <&cpufreq_hw 0>; L2_100: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -114,6 +120,8 @@ examples: qcom,freq-domain = <&cpufreq_hw 0>; L2_200: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -127,6 +135,8 @@ examples: qcom,freq-domain = <&cpufreq_hw 0>; L2_300: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -140,6 +150,8 @@ examples: qcom,freq-domain = <&cpufreq_hw 1>; L2_400: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -153,6 +165,8 @@ examples: qcom,freq-domain = <&cpufreq_hw 1>; L2_500: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -166,6 +180,8 @@ examples: qcom,freq-domain = <&cpufreq_hw 1>; L2_600: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; @@ -179,6 +195,8 @@ examples: qcom,freq-domain = <&cpufreq_hw 1>; L2_700: l2-cache { compatible = "cache"; + cache-unified; + cache-level = <2>; next-level-cache = <&L3_0>; }; }; From aac0293a8f1cdf3ae4b2e97a66abc4f754d1c1d2 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 1 Nov 2022 13:09:31 -0500 Subject: [PATCH 07/18] cpufreq: ti-cpufreq: Add support for AM625 Add support for TI K3 AM625 SoC to read speed and revision values from hardware and pass to OPP layer. Signed-off-by: Dave Gerlach Signed-off-by: Vibhore Vardhan Signed-off-by: Viresh Kumar --- drivers/cpufreq/ti-cpufreq.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index f64180dd2005..be4209d97cb3 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -39,6 +39,14 @@ #define OMAP34xx_ProdID_SKUID 0x4830A20C #define OMAP3_SYSCON_BASE (0x48000000 + 0x2000 + 0x270) +#define AM625_EFUSE_K_MPU_OPP 11 +#define AM625_EFUSE_S_MPU_OPP 19 +#define AM625_EFUSE_T_MPU_OPP 20 + +#define AM625_SUPPORT_K_MPU_OPP BIT(0) +#define AM625_SUPPORT_S_MPU_OPP BIT(1) +#define AM625_SUPPORT_T_MPU_OPP BIT(2) + #define VERSION_COUNT 2 struct ti_cpufreq_data; @@ -104,6 +112,25 @@ static unsigned long omap3_efuse_xlate(struct ti_cpufreq_data *opp_data, return BIT(efuse); } +static unsigned long am625_efuse_xlate(struct ti_cpufreq_data *opp_data, + unsigned long efuse) +{ + unsigned long calculated_efuse = AM625_SUPPORT_K_MPU_OPP; + + switch (efuse) { + case AM625_EFUSE_T_MPU_OPP: + calculated_efuse |= AM625_SUPPORT_T_MPU_OPP; + fallthrough; + case AM625_EFUSE_S_MPU_OPP: + calculated_efuse |= AM625_SUPPORT_S_MPU_OPP; + fallthrough; + case AM625_EFUSE_K_MPU_OPP: + calculated_efuse |= AM625_SUPPORT_K_MPU_OPP; + } + + return calculated_efuse; +} + static struct ti_cpufreq_soc_data am3x_soc_data = { .efuse_xlate = amx3_efuse_xlate, .efuse_fallback = AM33XX_800M_ARM_MPU_MAX_FREQ, @@ -198,6 +225,14 @@ static struct ti_cpufreq_soc_data am3517_soc_data = { .multi_regulator = false, }; +static struct ti_cpufreq_soc_data am625_soc_data = { + .efuse_xlate = am625_efuse_xlate, + .efuse_offset = 0x0018, + .efuse_mask = 0x07c0, + .efuse_shift = 0x6, + .rev_offset = 0x0014, + .multi_regulator = false, +}; /** * ti_cpufreq_get_efuse() - Parse and return efuse value present on SoC @@ -301,6 +336,7 @@ static const struct of_device_id ti_cpufreq_of_match[] = { { .compatible = "ti,dra7", .data = &dra7_soc_data }, { .compatible = "ti,omap34xx", .data = &omap34xx_soc_data, }, { .compatible = "ti,omap36xx", .data = &omap36xx_soc_data, }, + { .compatible = "ti,am625", .data = &am625_soc_data, }, /* legacy */ { .compatible = "ti,omap3430", .data = &omap34xx_soc_data, }, { .compatible = "ti,omap3630", .data = &omap36xx_soc_data, }, From e66e20d71d79e66b2bd03b51964a5a7eddc5e55c Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 1 Nov 2022 13:09:32 -0500 Subject: [PATCH 08/18] cpufreq: dt-platdev: Blacklist ti,am625 SoC Add ti,am625 SoC to the blacklist as the ti-cpufreq driver will handle creating the cpufreq-dt platform device after it completes so it is not created twice. Signed-off-by: Dave Gerlach Signed-off-by: Vibhore Vardhan Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 6ac3800db450..85ee11f79840 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -160,6 +160,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "ti,am43", }, { .compatible = "ti,dra7", }, { .compatible = "ti,omap3", }, + { .compatible = "ti,am625", }, { .compatible = "qcom,ipq8064", }, { .compatible = "qcom,apq8064", }, From 6d4ee83b423fe663a8574d2f16b48b98cbfc9b26 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 1 Nov 2022 13:09:33 -0500 Subject: [PATCH 09/18] arm64: dts: ti: k3-am625: Introduce operating-points table Introduce an operating-points table for the A53 cores, containing only frequency values as this platform operates on a fixed voltage for the CPUs. Also provide opp-supported-hw values to ensure appropriate OPPs are enabled based on which type of silicon is in use. The latency between pre and post frequency transition was measured in CPUFreq driver for all combinations of OPP changes. The average value was selected as overall clock-latency-ns. Signed-off-by: Dave Gerlach Signed-off-by: Vibhore Vardhan Signed-off-by: Viresh Kumar --- arch/arm64/boot/dts/ti/k3-am625.dtsi | 51 ++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/arch/arm64/boot/dts/ti/k3-am625.dtsi b/arch/arm64/boot/dts/ti/k3-am625.dtsi index 887f31c23fef..cea2cc7de5dd 100644 --- a/arch/arm64/boot/dts/ti/k3-am625.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am625.dtsi @@ -48,6 +48,8 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 135 0>; }; cpu1: cpu@1 { @@ -62,6 +64,8 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 136 0>; }; cpu2: cpu@2 { @@ -76,6 +80,8 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 137 0>; }; cpu3: cpu@3 { @@ -90,6 +96,51 @@ d-cache-line-size = <64>; d-cache-sets = <128>; next-level-cache = <&L2_0>; + operating-points-v2 = <&a53_opp_table>; + clocks = <&k3_clks 138 0>; + }; + }; + + a53_opp_table: opp-table { + compatible = "operating-points-v2-ti-cpu"; + opp-shared; + syscon = <&wkup_conf>; + + opp-200000000 { + opp-hz = /bits/ 64 <200000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-400000000 { + opp-hz = /bits/ 64 <400000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-800000000 { + opp-hz = /bits/ 64 <800000000>; + opp-supported-hw = <0x01 0x0007>; + clock-latency-ns = <6000000>; + }; + + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-supported-hw = <0x01 0x0006>; + clock-latency-ns = <6000000>; + }; + + opp-1250000000 { + opp-hz = /bits/ 64 <1250000000>; + opp-supported-hw = <0x01 0x0004>; + clock-latency-ns = <6000000>; + opp-suspend; }; }; From ddb72884c8bf88c25edb2a722fabbb7d642922d7 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 1 Nov 2022 13:09:34 -0500 Subject: [PATCH 10/18] cpufreq: ti: Enable ti-cpufreq for ARCH_K3 Make ti-cpufreq driver depend on ARCH_K3 and set it to `default y` so it is always enabled for platforms that it depends on. Signed-off-by: Dave Gerlach Signed-off-by: Vibhore Vardhan Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 82e5de1f6f8c..be590f498e6a 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -340,8 +340,8 @@ config ARM_TEGRA194_CPUFREQ config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" - depends on ARCH_OMAP2PLUS - default ARCH_OMAP2PLUS + depends on ARCH_OMAP2PLUS || ARCH_K3 + default y help This driver enables valid OPPs on the running platform based on values contained within the SoC in use. Enable this in order to From 1313edfdcd79ac8f7aa92162db646eb0eb7a32c2 Mon Sep 17 00:00:00 2001 From: Vibhore Vardhan Date: Tue, 1 Nov 2022 13:09:35 -0500 Subject: [PATCH 11/18] arm64: dts: ti: k3-am625-sk: Add 1.4GHz OPP The 1.4 GHz OPP requires supported silicon variant (T speed grade) and also VDD_CORE to be at 0.85V. All production revisions of the AM625-SK have both so we can enable the 1.4 GHz OPP for it. Any other boards based on this design should verify that they have the right silicon variant and the right power tree before adding 1.4 GHz support in their board dts file. Signed-off-by: Vibhore Vardhan Signed-off-by: Viresh Kumar --- arch/arm64/boot/dts/ti/k3-am625-sk.dts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/boot/dts/ti/k3-am625-sk.dts b/arch/arm64/boot/dts/ti/k3-am625-sk.dts index 93a5f0817efc..4620ef5e19bb 100644 --- a/arch/arm64/boot/dts/ti/k3-am625-sk.dts +++ b/arch/arm64/boot/dts/ti/k3-am625-sk.dts @@ -31,6 +31,15 @@ bootargs = "console=ttyS2,115200n8 earlycon=ns16550a,mmio32,0x02800000"; }; + opp-table { + /* Add 1.4GHz OPP for am625-sk board. Requires VDD_CORE to be at 0.85V */ + opp-1400000000 { + opp-hz = /bits/ 64 <1400000000>; + opp-supported-hw = <0x01 0x0004>; + clock-latency-ns = <6000000>; + }; + }; + memory@80000000 { device_type = "memory"; /* 2G RAM */ From 9901c21bcaf2f01fe5078f750d624f4ddfa8f81b Mon Sep 17 00:00:00 2001 From: Chen Hui Date: Tue, 8 Nov 2022 15:23:02 +0800 Subject: [PATCH 12/18] cpufreq: qcom-hw: Fix memory leak in qcom_cpufreq_hw_read_lut() If "cpu_dev" fails to get opp table in qcom_cpufreq_hw_read_lut(), the program will return, resulting in "table" resource is not released. Fixes: 51c843cf77bb ("cpufreq: qcom: Update the bandwidth levels on frequency change") Signed-off-by: Chen Hui Reviewed-by: Sibi Sankar Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 5e0598730a04..45dbb5d7fa77 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -197,6 +197,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, } } else if (ret != -ENODEV) { dev_err(cpu_dev, "Invalid opp table in device tree\n"); + kfree(table); return ret; } else { policy->fast_switch_possible = true; From c72cf0cb1d77f6b1b58c334dcc3d09fa13111c4c Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 17 Nov 2022 11:01:45 +0530 Subject: [PATCH 13/18] cpufreq: qcom-hw: Fix the frequency returned by cpufreq_driver->get() The cpufreq_driver->get() callback is supposed to return the current frequency of the CPU and not the one requested by the CPUFreq core. Fix it by returning the frequency that gets supplied to the CPU after the DCVS operation of EPSS/OSM. Fixes: 2849dd8bc72b ("cpufreq: qcom-hw: Add support for QCOM cpufreq HW driver") Reported-by: Sudeep Holla Signed-off-by: Manivannan Sadhasivam Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 42 +++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 45dbb5d7fa77..a563f177e23b 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -129,7 +129,35 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, return 0; } +static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) +{ + unsigned int lval; + + if (qcom_cpufreq.soc_data->reg_current_vote) + lval = readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_current_vote) & 0x3ff; + else + lval = readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_domain_state) & 0xff; + + return lval * xo_rate; +} + +/* Get the current frequency of the CPU (after throttling) */ static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) +{ + struct qcom_cpufreq_data *data; + struct cpufreq_policy *policy; + + policy = cpufreq_cpu_get_raw(cpu); + if (!policy) + return 0; + + data = policy->driver_data; + + return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ; +} + +/* Get the frequency requested by the cpufreq core for the CPU */ +static unsigned int qcom_cpufreq_get_freq(unsigned int cpu) { struct qcom_cpufreq_data *data; const struct qcom_cpufreq_soc_data *soc_data; @@ -291,18 +319,6 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) } } -static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) -{ - unsigned int lval; - - if (qcom_cpufreq.soc_data->reg_current_vote) - lval = readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_current_vote) & 0x3ff; - else - lval = readl_relaxed(data->base + qcom_cpufreq.soc_data->reg_domain_state) & 0xff; - - return lval * xo_rate; -} - static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) { struct cpufreq_policy *policy = data->policy; @@ -346,7 +362,7 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) * If h/w throttled frequency is higher than what cpufreq has requested * for, then stop polling and switch back to interrupt mechanism. */ - if (throttled_freq >= qcom_cpufreq_hw_get(cpu)) + if (throttled_freq >= qcom_cpufreq_get_freq(cpu)) enable_irq(data->throttle_irq); else mod_delayed_work(system_highpri_wq, &data->throttle_work, From f9281ab6b8366a6697df27c11946f84a626f93b5 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 17 Nov 2022 11:01:42 +0530 Subject: [PATCH 14/18] dt-bindings: cpufreq: cpufreq-qcom-hw: Add cpufreq clock provider Qualcomm platforms making use of CPUFreq HW Engine (EPSS/OSM) supply clocks to the CPU cores. Document the same in the binding to reflect the actual implementation. CPUFreq HW will become the clock provider and CPU cores will become the clock consumers. The clock index for each CPU core is based on the frequency domain index. Signed-off-by: Manivannan Sadhasivam Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index e58c55f78aaa..676d369a6fdd 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -56,6 +56,9 @@ properties: '#freq-domain-cells': const: 1 + '#clock-cells': + const: 1 + required: - compatible - reg @@ -83,6 +86,7 @@ examples: enable-method = "psci"; next-level-cache = <&L2_0>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_0: l2-cache { compatible = "cache"; cache-unified; @@ -103,6 +107,7 @@ examples: enable-method = "psci"; next-level-cache = <&L2_100>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_100: l2-cache { compatible = "cache"; cache-unified; @@ -118,6 +123,7 @@ examples: enable-method = "psci"; next-level-cache = <&L2_200>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_200: l2-cache { compatible = "cache"; cache-unified; @@ -133,6 +139,7 @@ examples: enable-method = "psci"; next-level-cache = <&L2_300>; qcom,freq-domain = <&cpufreq_hw 0>; + clocks = <&cpufreq_hw 0>; L2_300: l2-cache { compatible = "cache"; cache-unified; @@ -148,6 +155,7 @@ examples: enable-method = "psci"; next-level-cache = <&L2_400>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_400: l2-cache { compatible = "cache"; cache-unified; @@ -163,6 +171,7 @@ examples: enable-method = "psci"; next-level-cache = <&L2_500>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_500: l2-cache { compatible = "cache"; cache-unified; @@ -178,6 +187,7 @@ examples: enable-method = "psci"; next-level-cache = <&L2_600>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_600: l2-cache { compatible = "cache"; cache-unified; @@ -193,6 +203,7 @@ examples: enable-method = "psci"; next-level-cache = <&L2_700>; qcom,freq-domain = <&cpufreq_hw 1>; + clocks = <&cpufreq_hw 1>; L2_700: l2-cache { compatible = "cache"; cache-unified; @@ -215,6 +226,7 @@ examples: clock-names = "xo", "alternate"; #freq-domain-cells = <1>; + #clock-cells = <1>; }; }; ... From 4370232c727bf45940345dd1b88dbd8c2e42ec56 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 17 Nov 2022 11:01:44 +0530 Subject: [PATCH 15/18] cpufreq: qcom-hw: Add CPU clock provider support Qcom CPUFreq hardware (EPSS/OSM) controls clock and voltage to the CPU cores. But this relationship is not represented with the clk framework so far. So, let's make the qcom-cpufreq-hw driver a clock provider. This makes the clock producer/consumer relationship cleaner and is also useful for CPU related frameworks like OPP to know the frequency at which the CPUs are running. The clock frequency provided by the driver is for each frequency domain. We cannot get the frequency of each CPU core because, not all platforms support per-core DCVS feature. Also the frequency supplied by the driver is the actual frequency that comes out of the EPSS/OSM block after the DCVS operation. This frequency is not same as what the CPUFreq framework has set but it is the one that gets supplied to the CPUs after throttling by LMh. Signed-off-by: Manivannan Sadhasivam [ Xiu: Fixed memleak. ] Signed-off-by: Xiu Jianfeng Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 46 +++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index a563f177e23b..340fed35e45d 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -54,6 +55,7 @@ struct qcom_cpufreq_data { bool cancel_throttle; struct delayed_work throttle_work; struct cpufreq_policy *policy; + struct clk_hw cpu_clk; bool per_core_dcvs; @@ -632,8 +634,20 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = { .ready = qcom_cpufreq_ready, }; +static unsigned long qcom_cpufreq_hw_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) +{ + struct qcom_cpufreq_data *data = container_of(hw, struct qcom_cpufreq_data, cpu_clk); + + return qcom_lmh_get_throttle_freq(data); +} + +static const struct clk_ops qcom_cpufreq_hw_clk_ops = { + .recalc_rate = qcom_cpufreq_hw_recalc_rate, +}; + static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) { + struct clk_hw_onecell_data *clk_data; struct device *dev = &pdev->dev; struct device *cpu_dev; struct clk *clk; @@ -676,8 +690,15 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) qcom_cpufreq.soc_data = of_device_get_match_data(dev); + clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, num_domains), GFP_KERNEL); + if (!clk_data) + return -ENOMEM; + + clk_data->num = num_domains; + for (i = 0; i < num_domains; i++) { struct qcom_cpufreq_data *data = &qcom_cpufreq.data[i]; + struct clk_init_data clk_init = {}; struct resource *res; void __iomem *base; @@ -689,6 +710,31 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) data->base = base; data->res = res; + + /* Register CPU clock for each frequency domain */ + clk_init.name = kasprintf(GFP_KERNEL, "qcom_cpufreq%d", i); + if (!clk_init.name) + return -ENOMEM; + + clk_init.flags = CLK_GET_RATE_NOCACHE; + clk_init.ops = &qcom_cpufreq_hw_clk_ops; + data->cpu_clk.init = &clk_init; + + ret = devm_clk_hw_register(dev, &data->cpu_clk); + if (ret < 0) { + dev_err(dev, "Failed to register clock %d: %d\n", i, ret); + kfree(clk_init.name); + return ret; + } + + clk_data->hws[i] = &data->cpu_clk; + kfree(clk_init.name); + } + + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, clk_data); + if (ret < 0) { + dev_err(dev, "Failed to add clock provider\n"); + return ret; } ret = cpufreq_register_driver(&cpufreq_qcom_hw_driver); From 6286bbb40576ffadfde206c332b61345c19af57f Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 28 Nov 2022 23:29:11 +0900 Subject: [PATCH 16/18] cpufreq: apple-soc: Add new driver to control Apple SoC CPU P-states This driver implements CPU frequency scaling for Apple Silicon SoCs, including M1 (t8103), M1 Max/Pro/Ultra (t600x), and M2 (t8112). Each CPU cluster has its own register set, and frequency management is fully automated by the hardware; the driver only has to write one register. There is boost frequency support, but the hardware will only allow their use if only a subset of cores in a cluster are in non-deep-idle. Since we don't support deep idle yet, these frequencies are not achievable, but the driver supports them. They will remain disabled in the device tree until deep idle is implemented, to avoid confusing users. This driver does not yet implement the memory controller performance state tuning that usually accompanies higher CPU p-states. This will be done in a future patch. Acked-by: Marc Zyngier Signed-off-by: Hector Martin Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 9 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/apple-soc-cpufreq.c | 352 +++++++++++++++++++++++++++ drivers/cpufreq/cpufreq-dt-platdev.c | 2 + 4 files changed, 364 insertions(+) create mode 100644 drivers/cpufreq/apple-soc-cpufreq.c diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index be590f498e6a..0a0352d8fa45 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -41,6 +41,15 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM To compile this driver as a module, choose M here: the module will be called sun50i-cpufreq-nvmem. +config ARM_APPLE_SOC_CPUFREQ + tristate "Apple Silicon SoC CPUFreq support" + depends on ARCH_APPLE || (COMPILE_TEST && 64BIT) + select PM_OPP + default ARCH_APPLE + help + This adds the CPUFreq driver for Apple Silicon machines + (e.g. Apple M1). + config ARM_ARMADA_37XX_CPUFREQ tristate "Armada 37xx CPUFreq support" depends on ARCH_MVEBU && CPUFREQ_DT diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 49b98c62c5af..32a7029e25ed 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o ################################################################################## # ARM SoC drivers +obj-$(CONFIG_ARM_APPLE_SOC_CPUFREQ) += apple-soc-cpufreq.o obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o obj-$(CONFIG_ARM_ARMADA_8K_CPUFREQ) += armada-8k-cpufreq.o obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c new file mode 100644 index 000000000000..d1801281cdd9 --- /dev/null +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Apple SoC CPU cluster performance state driver + * + * Copyright The Asahi Linux Contributors + * + * Based on scpi-cpufreq.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define APPLE_DVFS_CMD 0x20 +#define APPLE_DVFS_CMD_BUSY BIT(31) +#define APPLE_DVFS_CMD_SET BIT(25) +#define APPLE_DVFS_CMD_PS2 GENMASK(16, 12) +#define APPLE_DVFS_CMD_PS1 GENMASK(4, 0) + +/* Same timebase as CPU counter (24MHz) */ +#define APPLE_DVFS_LAST_CHG_TIME 0x38 + +/* + * Apple ran out of bits and had to shift this in T8112... + */ +#define APPLE_DVFS_STATUS 0x50 +#define APPLE_DVFS_STATUS_CUR_PS_T8103 GENMASK(7, 4) +#define APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103 4 +#define APPLE_DVFS_STATUS_TGT_PS_T8103 GENMASK(3, 0) +#define APPLE_DVFS_STATUS_CUR_PS_T8112 GENMASK(9, 5) +#define APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8112 5 +#define APPLE_DVFS_STATUS_TGT_PS_T8112 GENMASK(4, 0) + +/* + * Div is +1, base clock is 12MHz on existing SoCs. + * For documentation purposes. We use the OPP table to + * get the frequency. + */ +#define APPLE_DVFS_PLL_STATUS 0xc0 +#define APPLE_DVFS_PLL_FACTOR 0xc8 +#define APPLE_DVFS_PLL_FACTOR_MULT GENMASK(31, 16) +#define APPLE_DVFS_PLL_FACTOR_DIV GENMASK(15, 0) + +#define APPLE_DVFS_TRANSITION_TIMEOUT 100 + +struct apple_soc_cpufreq_info { + u64 max_pstate; + u64 cur_pstate_mask; + u64 cur_pstate_shift; +}; + +struct apple_cpu_priv { + struct device *cpu_dev; + void __iomem *reg_base; + const struct apple_soc_cpufreq_info *info; +}; + +static struct cpufreq_driver apple_soc_cpufreq_driver; + +static const struct apple_soc_cpufreq_info soc_t8103_info = { + .max_pstate = 15, + .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8103, + .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103, +}; + +static const struct apple_soc_cpufreq_info soc_t8112_info = { + .max_pstate = 31, + .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8112, + .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8112, +}; + +static const struct apple_soc_cpufreq_info soc_default_info = { + .max_pstate = 15, + .cur_pstate_mask = 0, /* fallback */ +}; + +static const struct of_device_id apple_soc_cpufreq_of_match[] = { + { + .compatible = "apple,t8103-cluster-cpufreq", + .data = &soc_t8103_info, + }, + { + .compatible = "apple,t8112-cluster-cpufreq", + .data = &soc_t8112_info, + }, + { + .compatible = "apple,cluster-cpufreq", + .data = &soc_default_info, + }, + {} +}; + +static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); + struct apple_cpu_priv *priv = policy->driver_data; + struct cpufreq_frequency_table *p; + unsigned int pstate; + + if (priv->info->cur_pstate_mask) { + u64 reg = readq_relaxed(priv->reg_base + APPLE_DVFS_STATUS); + + pstate = (reg & priv->info->cur_pstate_mask) >> priv->info->cur_pstate_shift; + } else { + /* + * For the fallback case we might not know the layout of DVFS_STATUS, + * so just use the command register value (which ignores boost limitations). + */ + u64 reg = readq_relaxed(priv->reg_base + APPLE_DVFS_CMD); + + pstate = FIELD_GET(APPLE_DVFS_CMD_PS1, reg); + } + + cpufreq_for_each_valid_entry(p, policy->freq_table) + if (p->driver_data == pstate) + return p->frequency; + + dev_err(priv->cpu_dev, "could not find frequency for pstate %d\n", + pstate); + return 0; +} + +static int apple_soc_cpufreq_set_target(struct cpufreq_policy *policy, + unsigned int index) +{ + struct apple_cpu_priv *priv = policy->driver_data; + unsigned int pstate = policy->freq_table[index].driver_data; + u64 reg; + + /* Fallback for newer SoCs */ + if (index > priv->info->max_pstate) + index = priv->info->max_pstate; + + if (readq_poll_timeout_atomic(priv->reg_base + APPLE_DVFS_CMD, reg, + !(reg & APPLE_DVFS_CMD_BUSY), 2, + APPLE_DVFS_TRANSITION_TIMEOUT)) { + return -EIO; + } + + reg &= ~(APPLE_DVFS_CMD_PS1 | APPLE_DVFS_CMD_PS2); + reg |= FIELD_PREP(APPLE_DVFS_CMD_PS1, pstate); + reg |= FIELD_PREP(APPLE_DVFS_CMD_PS2, pstate); + reg |= APPLE_DVFS_CMD_SET; + + writeq_relaxed(reg, priv->reg_base + APPLE_DVFS_CMD); + + return 0; +} + +static unsigned int apple_soc_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + if (apple_soc_cpufreq_set_target(policy, policy->cached_resolved_idx) < 0) + return 0; + + return policy->freq_table[policy->cached_resolved_idx].frequency; +} + +static int apple_soc_cpufreq_find_cluster(struct cpufreq_policy *policy, + void __iomem **reg_base, + const struct apple_soc_cpufreq_info **info) +{ + struct of_phandle_args args; + const struct of_device_id *match; + int ret = 0; + + ret = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains", + "#performance-domain-cells", + policy->cpus, &args); + if (ret < 0) + return ret; + + match = of_match_node(apple_soc_cpufreq_of_match, args.np); + of_node_put(args.np); + if (!match) + return -ENODEV; + + *info = match->data; + + *reg_base = of_iomap(args.np, 0); + if (IS_ERR(*reg_base)) + return PTR_ERR(*reg_base); + + return 0; +} + +static struct freq_attr *apple_soc_cpufreq_hw_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, /* Filled in below if boost is enabled */ + NULL, +}; + +static int apple_soc_cpufreq_init(struct cpufreq_policy *policy) +{ + int ret, i; + unsigned int transition_latency; + void __iomem *reg_base; + struct device *cpu_dev; + struct apple_cpu_priv *priv; + const struct apple_soc_cpufreq_info *info; + struct cpufreq_frequency_table *freq_table; + + cpu_dev = get_cpu_device(policy->cpu); + if (!cpu_dev) { + pr_err("failed to get cpu%d device\n", policy->cpu); + return -ENODEV; + } + + ret = dev_pm_opp_of_add_table(cpu_dev); + if (ret < 0) { + dev_err(cpu_dev, "%s: failed to add OPP table: %d\n", __func__, ret); + return ret; + } + + ret = apple_soc_cpufreq_find_cluster(policy, ®_base, &info); + if (ret) { + dev_err(cpu_dev, "%s: failed to get cluster info: %d\n", __func__, ret); + return ret; + } + + ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus); + if (ret) { + dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", __func__, ret); + goto out_iounmap; + } + + ret = dev_pm_opp_get_opp_count(cpu_dev); + if (ret <= 0) { + dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n"); + ret = -EPROBE_DEFER; + goto out_free_opp; + } + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto out_free_opp; + } + + ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); + if (ret) { + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); + goto out_free_priv; + } + + /* Get OPP levels (p-state indexes) and stash them in driver_data */ + for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + unsigned long rate = freq_table[i].frequency * 1000 + 999; + struct dev_pm_opp *opp = dev_pm_opp_find_freq_floor(cpu_dev, &rate); + + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + goto out_free_cpufreq_table; + } + freq_table[i].driver_data = dev_pm_opp_get_level(opp); + dev_pm_opp_put(opp); + } + + priv->cpu_dev = cpu_dev; + priv->reg_base = reg_base; + priv->info = info; + policy->driver_data = priv; + policy->freq_table = freq_table; + + transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); + if (!transition_latency) + transition_latency = CPUFREQ_ETERNAL; + + policy->cpuinfo.transition_latency = transition_latency; + policy->dvfs_possible_from_any_cpu = true; + policy->fast_switch_possible = true; + + if (policy_has_boost_freq(policy)) { + ret = cpufreq_enable_boost_support(); + if (ret) { + dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); + } else { + apple_soc_cpufreq_hw_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; + apple_soc_cpufreq_driver.boost_enabled = true; + } + } + + return 0; + +out_free_cpufreq_table: + dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); +out_free_priv: + kfree(priv); +out_free_opp: + dev_pm_opp_remove_all_dynamic(cpu_dev); +out_iounmap: + iounmap(reg_base); + return ret; +} + +static int apple_soc_cpufreq_exit(struct cpufreq_policy *policy) +{ + struct apple_cpu_priv *priv = policy->driver_data; + + dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); + dev_pm_opp_remove_all_dynamic(priv->cpu_dev); + iounmap(priv->reg_base); + kfree(priv); + + return 0; +} + +static struct cpufreq_driver apple_soc_cpufreq_driver = { + .name = "apple-cpufreq", + .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | + CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, + .verify = cpufreq_generic_frequency_table_verify, + .attr = cpufreq_generic_attr, + .get = apple_soc_cpufreq_get_rate, + .init = apple_soc_cpufreq_init, + .exit = apple_soc_cpufreq_exit, + .target_index = apple_soc_cpufreq_set_target, + .fast_switch = apple_soc_cpufreq_fast_switch, + .register_em = cpufreq_register_em_with_opp, + .attr = apple_soc_cpufreq_hw_attr, +}; + +static int __init apple_soc_cpufreq_module_init(void) +{ + if (!of_machine_is_compatible("apple,arm-platform")) + return -ENODEV; + + return cpufreq_register_driver(&apple_soc_cpufreq_driver); +} +module_init(apple_soc_cpufreq_module_init); + +static void __exit apple_soc_cpufreq_module_exit(void) +{ + cpufreq_unregister_driver(&apple_soc_cpufreq_driver); +} +module_exit(apple_soc_cpufreq_module_exit); + +MODULE_DEVICE_TABLE(of, apple_soc_cpufreq_of_match); +MODULE_AUTHOR("Hector Martin "); +MODULE_DESCRIPTION("Apple SoC CPU cluster DVFS driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 85ee11f79840..8ab672883043 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -103,6 +103,8 @@ static const struct of_device_id allowlist[] __initconst = { static const struct of_device_id blocklist[] __initconst = { { .compatible = "allwinner,sun50i-h6", }, + { .compatible = "apple,arm-platform", }, + { .compatible = "arm,vexpress", }, { .compatible = "calxeda,highbank", }, From 32eb6453328c372a11559a21a53e5280ad44a421 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 20 Nov 2022 18:19:02 +0100 Subject: [PATCH 17/18] cpufreq: tegra186: Use flexible array to simplify memory allocation Use flexible array to simplify memory allocation. It saves some memory, avoids an indirection when reading the 'clusters' array and removes some LoC. Detailed explanation: ==================== Knowing that: - each devm_ allocation over-allocates 40 bytes for internal needs - Some rounding is done by the memory allocator on 8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096, 8192 boundaries and that: - sizeof(struct tegra186_cpufreq_data) = 24 - sizeof(struct tegra186_cpufreq_cluster) = 16 Memory allocations in tegra186_cpufreq_probe() are: data: (24 + 40) = 64 => 64 bytes data->clusters: (2 * 16 + 40) = 72 => 96 bytes So a total of 160 bytes are allocated. 56 for the real need, 80 for internal uses and 24 are wasted. If 'struct tegra186_cpufreq_data' is reordered so that 'clusters' is a flexible array: - it saves one pointer in the structure - only one allocation is needed So, only 96 bytes are allocated: 16 + 2 * 16 + 40 = 88 => 96 bytes Signed-off-by: Christophe JAILLET Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra186-cpufreq.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index 6c88827f4e62..f98f53bf1011 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -65,8 +65,8 @@ struct tegra186_cpufreq_cluster { struct tegra186_cpufreq_data { void __iomem *regs; - struct tegra186_cpufreq_cluster *clusters; const struct tegra186_cpufreq_cpu *cpus; + struct tegra186_cpufreq_cluster clusters[]; }; static int tegra186_cpufreq_init(struct cpufreq_policy *policy) @@ -221,15 +221,12 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev) struct tegra_bpmp *bpmp; unsigned int i = 0, err; - data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(&pdev->dev, + struct_size(data, clusters, TEGRA186_NUM_CLUSTERS), + GFP_KERNEL); if (!data) return -ENOMEM; - data->clusters = devm_kcalloc(&pdev->dev, TEGRA186_NUM_CLUSTERS, - sizeof(*data->clusters), GFP_KERNEL); - if (!data->clusters) - return -ENOMEM; - data->cpus = tegra186_cpus; bpmp = tegra_bpmp_get(&pdev->dev); From 8ff150aa6fe252e9b7713cf737c4dc5cbaa263ab Mon Sep 17 00:00:00 2001 From: Melody Olvera Date: Fri, 18 Nov 2022 10:24:16 -0800 Subject: [PATCH 18/18] dt-bindings: cpufreq: cpufreq-qcom-hw: Add QDU1000/QRU1000 cpufreq Add cpufreq epss for QDU1000 and QRU1000 SoCs. Signed-off-by: Melody Olvera Reviewed-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 676d369a6fdd..903b31129f01 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -25,6 +25,7 @@ properties: - description: v2 of CPUFREQ HW (EPSS) items: - enum: + - qcom,qdu1000-cpufreq-epss - qcom,sm6375-cpufreq-epss - qcom,sm8250-cpufreq-epss - const: qcom,cpufreq-epss