From 4ca68a16f48ef388742a25c6d6643bf996e8d1d5 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 5 Jul 2024 10:31:29 +0200 Subject: [PATCH 02/10] amd-pstate Signed-off-by: Peter Jung --- Documentation/admin-guide/pm/amd-pstate.rst | 18 +- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 2 + arch/x86/kernel/cpu/scattered.c | 1 + drivers/cpufreq/Kconfig.x86 | 1 + drivers/cpufreq/acpi-cpufreq.c | 3 +- drivers/cpufreq/amd-pstate.c | 356 ++++++++++++++------ drivers/cpufreq/amd-pstate.h | 16 +- drivers/cpufreq/cpufreq.c | 11 +- 9 files changed, 290 insertions(+), 119 deletions(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 1e0d101b020a..d0324d44f548 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -281,6 +281,22 @@ integer values defined between 0 to 255 when EPP feature is enabled by platform firmware, if EPP feature is disabled, driver will ignore the written value This attribute is read-write. +``boost`` +The `boost` sysfs attribute provides control over the CPU core +performance boost, allowing users to manage the maximum frequency limitation +of the CPU. This attribute can be used to enable or disable the boost feature +on individual CPUs. + +When the boost feature is enabled, the CPU can dynamically increase its frequency +beyond the base frequency, providing enhanced performance for demanding workloads. +On the other hand, disabling the boost feature restricts the CPU to operate at the +base frequency, which may be desirable in certain scenarios to prioritize power +efficiency or manage temperature. + +To manipulate the `boost` attribute, users can write a value of `0` to disable the +boost or `1` to enable it, for the respective CPU using the sysfs path +`/sys/devices/system/cpu/cpuX/cpufreq/boost`, where `X` represents the CPU number. + Other performance and frequency values can be read back from ``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`. @@ -406,7 +422,7 @@ control its functionality at the system level. They are located in the ``/sys/devices/system/cpu/amd_pstate/`` directory and affect all CPUs. ``status`` - Operation mode of the driver: "active", "passive" or "disable". + Operation mode of the driver: "active", "passive", "guided" or "disable". "active" The driver is functional and in the ``active mode`` diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3c7434329661..6c128d463a14 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -470,6 +470,7 @@ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* "" AMD Fast CPPC */ /* * BUG word(s) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e72c2b872957..8738a7b3917d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -782,6 +782,8 @@ #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K7_HWCR_CPB_DIS_BIT 25 +#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) /* K6 MSRs */ #define MSR_K6_WHCR 0xc0000082 diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index af5aa2c754c2..c84c30188fdf 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, + { X86_FEATURE_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 438c9e75a04d..97c2d4f15d76 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -71,6 +71,7 @@ config X86_AMD_PSTATE_DEFAULT_MODE config X86_AMD_PSTATE_UT tristate "selftest for AMD Processor P-State driver" depends on X86 && ACPI_PROCESSOR + depends on X86_AMD_PSTATE default n help This kernel module is used for testing. It's safe to say M here. diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 4ac3a35dcd98..f4f8587c4ea0 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -50,8 +50,6 @@ enum { #define AMD_MSR_RANGE (0x7) #define HYGON_MSR_RANGE (0x7) -#define MSR_K7_HWCR_CPB_DIS (1ULL << 25) - struct acpi_cpufreq_data { unsigned int resume; unsigned int cpu_feature; @@ -139,6 +137,7 @@ static int set_boost(struct cpufreq_policy *policy, int val) (void *)(long)val, 1); pr_debug("CPU %*pbl: Core Boosting %s.\n", cpumask_pr_args(policy->cpus), str_enabled_disabled(val)); + policy->boost_enabled = val; return 0; } diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 6af175e6c08a..80eaa58f1405 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -51,6 +51,7 @@ #define AMD_PSTATE_TRANSITION_LATENCY 20000 #define AMD_PSTATE_TRANSITION_DELAY 1000 +#define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600 #define CPPC_HIGHEST_PERF_PERFORMANCE 196 #define CPPC_HIGHEST_PERF_DEFAULT 166 @@ -85,15 +86,6 @@ struct quirk_entry { u32 lowest_freq; }; -/* - * TODO: We need more time to fine tune processors with shared memory solution - * with community together. - * - * There are some performance drops on the CPU benchmarks which reports from - * Suse. We are co-working with them to fine tune the shared memory solution. So - * we disable it by default to go acpi-cpufreq on these processors and add a - * module parameter to be able to enable it manually for debugging. - */ static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; @@ -157,7 +149,7 @@ static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) * broken BIOS lack of nominal_freq and lowest_freq capabilities * definition in ACPI tables */ - if (boot_cpu_has(X86_FEATURE_ZEN2)) { + if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { quirks = dmi->driver_data; pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); return 1; @@ -199,7 +191,7 @@ static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) u64 epp; int ret; - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { if (!cppc_req_cached) { epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req_cached); @@ -252,7 +244,7 @@ static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) int ret; struct cppc_perf_ctrls perf_ctrls; - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { u64 value = READ_ONCE(cpudata->cppc_req_cached); value &= ~GENMASK_ULL(31, 24); @@ -281,10 +273,8 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, int epp = -EINVAL; int ret; - if (!pref_index) { - pr_debug("EPP pref_index is invalid\n"); - return -EINVAL; - } + if (!pref_index) + epp = cpudata->epp_default; if (epp == -EINVAL) epp = epp_values[pref_index]; @@ -521,7 +511,10 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) { + unsigned long max_freq; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); u64 prev = READ_ONCE(cpudata->cppc_req_cached); + u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); u64 value = prev; min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, @@ -530,6 +523,9 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + max_freq = READ_ONCE(cpudata->max_limit_freq); + policy->cur = div_u64(des_perf * max_freq, max_perf); + if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { min_perf = des_perf; des_perf = 0; @@ -541,6 +537,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, value &= ~AMD_CPPC_DES_PERF(~0L); value |= AMD_CPPC_DES_PERF(des_perf); + /* limit the max perf when core performance boost feature is disabled */ + if (!cpudata->boost_supported) + max_perf = min_t(unsigned long, nominal_perf, max_perf); + value &= ~AMD_CPPC_MAX_PERF(~0L); value |= AMD_CPPC_MAX_PERF(max_perf); @@ -651,10 +651,9 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long capacity) { unsigned long max_perf, min_perf, des_perf, - cap_perf, lowest_nonlinear_perf, max_freq; + cap_perf, lowest_nonlinear_perf; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata = policy->driver_data; - unsigned int target_freq; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); @@ -662,7 +661,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu, cap_perf = READ_ONCE(cpudata->highest_perf); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); - max_freq = READ_ONCE(cpudata->max_freq); des_perf = cap_perf; if (target_perf < capacity) @@ -680,32 +678,57 @@ static void amd_pstate_adjust_perf(unsigned int cpu, max_perf = min_perf; des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); - target_freq = div_u64(des_perf * max_freq, max_perf); - policy->cur = target_freq; amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, policy->governor->flags); cpufreq_cpu_put(policy); } -static int amd_get_min_freq(struct amd_cpudata *cpudata) +static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) { - return READ_ONCE(cpudata->min_freq); -} + struct amd_cpudata *cpudata = policy->driver_data; + struct cppc_perf_ctrls perf_ctrls; + u32 highest_perf, nominal_perf, nominal_freq, max_freq; + int ret; -static int amd_get_max_freq(struct amd_cpudata *cpudata) -{ - return READ_ONCE(cpudata->max_freq); -} + highest_perf = READ_ONCE(cpudata->highest_perf); + nominal_perf = READ_ONCE(cpudata->nominal_perf); + nominal_freq = READ_ONCE(cpudata->nominal_freq); + max_freq = READ_ONCE(cpudata->max_freq); -static int amd_get_nominal_freq(struct amd_cpudata *cpudata) -{ - return READ_ONCE(cpudata->nominal_freq); -} + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 value = READ_ONCE(cpudata->cppc_req_cached); -static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) -{ - return READ_ONCE(cpudata->lowest_nonlinear_freq); + value &= ~GENMASK_ULL(7, 0); + value |= on ? highest_perf : nominal_perf; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + } else { + perf_ctrls.max_perf = on ? highest_perf : nominal_perf; + ret = cppc_set_perf(cpudata->cpu, &perf_ctrls); + if (ret) { + cpufreq_cpu_release(policy); + pr_debug("Failed to set max perf on CPU:%d. ret:%d\n", + cpudata->cpu, ret); + return ret; + } + } + + if (on) + policy->cpuinfo.max_freq = max_freq; + else if (policy->cpuinfo.max_freq > nominal_freq * 1000) + policy->cpuinfo.max_freq = nominal_freq * 1000; + + policy->max = policy->cpuinfo.max_freq; + + if (cppc_state == AMD_PSTATE_PASSIVE) { + ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq); + if (ret < 0) + pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu); + } + + return ret < 0 ? ret : 0; } static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) @@ -715,36 +738,51 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) if (!cpudata->boost_supported) { pr_err("Boost mode is not supported by this processor or SBIOS\n"); - return -EINVAL; + return -EOPNOTSUPP; } + mutex_lock(&amd_pstate_driver_lock); + ret = amd_pstate_cpu_boost_update(policy, state); + WRITE_ONCE(cpudata->boost_state, !ret ? state : false); + policy->boost_enabled = !ret ? state : false; + refresh_frequency_limits(policy); + mutex_unlock(&amd_pstate_driver_lock); - if (state) - policy->cpuinfo.max_freq = cpudata->max_freq; - else - policy->cpuinfo.max_freq = cpudata->nominal_freq * 1000; + return ret; +} - policy->max = policy->cpuinfo.max_freq; +static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) +{ + u64 boost_val; + int ret = -1; - ret = freq_qos_update_request(&cpudata->req[1], - policy->cpuinfo.max_freq); - if (ret < 0) - return ret; + /* + * If platform has no CPB support or disable it, initialize current driver + * boost_enabled state to be false, it is not an error for cpufreq core to handle. + */ + if (!cpu_feature_enabled(X86_FEATURE_CPB)) { + pr_debug_once("Boost CPB capabilities not present in the processor\n"); + ret = 0; + goto exit_err; + } - return 0; -} + /* at least one CPU supports CPB, even if others fail later on to set up */ + current_pstate_driver->boost_enabled = true; -static void amd_pstate_boost_init(struct amd_cpudata *cpudata) -{ - u32 highest_perf, nominal_perf; + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); + if (ret) { + pr_err_once("failed to read initial CPU boost state!\n"); + ret = -EIO; + goto exit_err; + } - highest_perf = READ_ONCE(cpudata->highest_perf); - nominal_perf = READ_ONCE(cpudata->nominal_perf); + if (!(boost_val & MSR_K7_HWCR_CPB_DIS)) + cpudata->boost_supported = true; - if (highest_perf <= nominal_perf) - return; + return 0; - cpudata->boost_supported = true; - current_pstate_driver->boost_enabled = true; +exit_err: + cpudata->boost_supported = false; + return ret; } static void amd_perf_ctl_reset(unsigned int cpu) @@ -773,7 +811,7 @@ static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) { int ret; - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { u64 cap1; ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); @@ -860,7 +898,41 @@ static void amd_pstate_update_limits(unsigned int cpu) mutex_unlock(&amd_pstate_driver_lock); } -/** +/* + * Get pstate transition delay time from ACPI tables that firmware set + * instead of using hardcode value directly. + */ +static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) +{ + u32 transition_delay_ns; + + transition_delay_ns = cppc_get_transition_latency(cpu); + if (transition_delay_ns == CPUFREQ_ETERNAL) { + if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC)) + return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; + else + return AMD_PSTATE_TRANSITION_DELAY; + } + + return transition_delay_ns / NSEC_PER_USEC; +} + +/* + * Get pstate transition latency value from ACPI tables that firmware + * set instead of using hardcode value directly. + */ +static u32 amd_pstate_get_transition_latency(unsigned int cpu) +{ + u32 transition_latency; + + transition_latency = cppc_get_transition_latency(cpu); + if (transition_latency == CPUFREQ_ETERNAL) + return AMD_PSTATE_TRANSITION_LATENCY; + + return transition_latency; +} + +/* * amd_pstate_init_freq: Initialize the max_freq, min_freq, * nominal_freq and lowest_nonlinear_freq for * the @cpudata object. @@ -881,7 +953,6 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) u32 boost_ratio, lowest_nonlinear_ratio; struct cppc_perf_caps cppc_perf; - ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret; @@ -912,12 +983,30 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->nominal_freq, nominal_freq); WRITE_ONCE(cpudata->max_freq, max_freq); + /** + * Below values need to be initialized correctly, otherwise driver will fail to load + * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf + * lowest_nonlinear_freq is a value between [min_freq, nominal_freq] + * Check _CPC in ACPI table objects if any values are incorrect + */ + if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { + pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", + min_freq, max_freq, nominal_freq * 1000); + return -EINVAL; + } + + if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) { + pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", + lowest_nonlinear_freq, min_freq, nominal_freq * 1000); + return -EINVAL; + } + return 0; } static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + int min_freq, max_freq, ret; struct device *dev; struct amd_cpudata *cpudata; @@ -946,20 +1035,15 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) if (ret) goto free_cpudata1; - min_freq = amd_get_min_freq(cpudata); - max_freq = amd_get_max_freq(cpudata); - nominal_freq = amd_get_nominal_freq(cpudata); - lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); - - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", - min_freq, max_freq); - ret = -EINVAL; + ret = amd_pstate_init_boost_support(cpudata); + if (ret) goto free_cpudata1; - } - policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; - policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; + min_freq = READ_ONCE(cpudata->min_freq); + max_freq = READ_ONCE(cpudata->max_freq); + + policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); + policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); policy->min = min_freq; policy->max = max_freq; @@ -967,10 +1051,12 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = min_freq; policy->cpuinfo.max_freq = max_freq; + policy->boost_enabled = READ_ONCE(cpudata->boost_supported); + /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; - if (boot_cpu_has(X86_FEATURE_CPPC)) + if (cpu_feature_enabled(X86_FEATURE_CPPC)) policy->fast_switch_possible = true; ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], @@ -992,7 +1078,6 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; - amd_pstate_boost_init(cpudata); if (!current_pstate_driver->adjust_perf) current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; @@ -1052,7 +1137,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, int max_freq; struct amd_cpudata *cpudata = policy->driver_data; - max_freq = amd_get_max_freq(cpudata); + max_freq = READ_ONCE(cpudata->max_freq); if (max_freq < 0) return max_freq; @@ -1065,7 +1150,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli int freq; struct amd_cpudata *cpudata = policy->driver_data; - freq = amd_get_lowest_nonlinear_freq(cpudata); + freq = READ_ONCE(cpudata->lowest_nonlinear_freq); if (freq < 0) return freq; @@ -1203,7 +1288,7 @@ static int amd_pstate_change_mode_without_dvr_change(int mode) cppc_state = mode; - if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) + if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) return 0; for_each_present_cpu(cpu) { @@ -1376,7 +1461,7 @@ static bool amd_pstate_acpi_pm_profile_undefined(void) static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + int min_freq, max_freq, ret; struct amd_cpudata *cpudata; struct device *dev; u64 value; @@ -1407,16 +1492,12 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) if (ret) goto free_cpudata1; - min_freq = amd_get_min_freq(cpudata); - max_freq = amd_get_max_freq(cpudata); - nominal_freq = amd_get_nominal_freq(cpudata); - lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", - min_freq, max_freq); - ret = -EINVAL; + ret = amd_pstate_init_boost_support(cpudata); + if (ret) goto free_cpudata1; - } + + min_freq = READ_ONCE(cpudata->min_freq); + max_freq = READ_ONCE(cpudata->max_freq); policy->cpuinfo.min_freq = min_freq; policy->cpuinfo.max_freq = max_freq; @@ -1425,11 +1506,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; - cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); + cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0); policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; + policy->boost_enabled = READ_ONCE(cpudata->boost_supported); + /* * Set the policy to provide a valid fallback value in case * the default cpufreq governor is neither powersave nor performance. @@ -1440,7 +1523,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) else policy->policy = CPUFREQ_POLICY_POWERSAVE; - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); if (ret) return ret; @@ -1451,7 +1534,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return ret; WRITE_ONCE(cpudata->cppc_cap1_cached, value); } - amd_pstate_boost_init(cpudata); return 0; @@ -1530,7 +1612,7 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) epp = 0; /* Set initial EPP value */ - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { value &= ~GENMASK_ULL(31, 24); value |= (u64)epp << 24; } @@ -1553,6 +1635,12 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) amd_pstate_epp_update_limit(policy); + /* + * policy->cur is never updated with the amd_pstate_epp driver, but it + * is used as a stale frequency value. So, keep it within limits. + */ + policy->cur = policy->min; + return 0; } @@ -1569,7 +1657,7 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) value = READ_ONCE(cpudata->cppc_req_cached); max_perf = READ_ONCE(cpudata->highest_perf); - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); } else { perf_ctrls.max_perf = max_perf; @@ -1603,7 +1691,7 @@ static void amd_pstate_epp_offline(struct cpufreq_policy *policy) value = READ_ONCE(cpudata->cppc_req_cached); mutex_lock(&amd_pstate_limits_lock); - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; /* Set max perf same as min perf */ @@ -1707,6 +1795,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .suspend = amd_pstate_epp_suspend, .resume = amd_pstate_epp_resume, .update_limits = amd_pstate_update_limits, + .set_boost = amd_pstate_set_boost, .name = "amd-pstate-epp", .attr = amd_pstate_epp_attr, }; @@ -1730,6 +1819,46 @@ static int __init amd_pstate_set_driver(int mode_idx) return -EINVAL; } +/** + * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. + * show the debug message that helps to check if the CPU has CPPC support for loading issue. + */ +static bool amd_cppc_supported(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + bool warn = false; + + if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) { + pr_debug_once("CPPC feature is not supported by the processor\n"); + return false; + } + + /* + * If the CPPC feature is disabled in the BIOS for processors that support MSR-based CPPC, + * the AMD Pstate driver may not function correctly. + * Check the CPPC flag and display a warning message if the platform supports CPPC. + * Note: below checking code will not abort the driver registeration process because of + * the code is added for debugging purposes. + */ + if (!cpu_feature_enabled(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_ZEN1) || cpu_feature_enabled(X86_FEATURE_ZEN2)) { + if (c->x86_model > 0x60 && c->x86_model < 0xaf) + warn = true; + } else if (cpu_feature_enabled(X86_FEATURE_ZEN3) || cpu_feature_enabled(X86_FEATURE_ZEN4)) { + if ((c->x86_model > 0x10 && c->x86_model < 0x1F) || + (c->x86_model > 0x40 && c->x86_model < 0xaf)) + warn = true; + } else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) { + warn = true; + } + } + + if (warn) + pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n" + "Please enable it if your BIOS has the CPPC option.\n"); + return true; +} + static int __init amd_pstate_init(void) { struct device *dev_root; @@ -1738,6 +1867,11 @@ static int __init amd_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return -ENODEV; + /* show debug message only if CPPC is not supported */ + if (!amd_cppc_supported()) + return -EOPNOTSUPP; + + /* show warning message when BIOS broken or ACPI disabled */ if (!acpi_cpc_valid()) { pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); return -ENODEV; @@ -1752,35 +1886,43 @@ static int __init amd_pstate_init(void) /* check if this machine need CPPC quirks */ dmi_check_system(amd_pstate_quirks_table); - switch (cppc_state) { - case AMD_PSTATE_UNDEFINED: + /* + * determine the driver mode from the command line or kernel config. + * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED. + * command line options will override the kernel config settings. + */ + + if (cppc_state == AMD_PSTATE_UNDEFINED) { /* Disable on the following configs by default: * 1. Undefined platforms * 2. Server platforms - * 3. Shared memory designs */ if (amd_pstate_acpi_pm_profile_undefined() || - amd_pstate_acpi_pm_profile_server() || - !boot_cpu_has(X86_FEATURE_CPPC)) { + amd_pstate_acpi_pm_profile_server()) { pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; } - ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE); - if (ret) - return ret; - break; + /* get driver mode from kernel config option [1:4] */ + cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE; + } + + switch (cppc_state) { case AMD_PSTATE_DISABLE: + pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; case AMD_PSTATE_PASSIVE: case AMD_PSTATE_ACTIVE: case AMD_PSTATE_GUIDED: + ret = amd_pstate_set_driver(cppc_state); + if (ret) + return ret; break; default: return -EINVAL; } /* capability check */ - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); if (cppc_state != AMD_PSTATE_ACTIVE) current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; @@ -1794,13 +1936,15 @@ static int __init amd_pstate_init(void) /* enable amd pstate feature */ ret = amd_pstate_enable(true); if (ret) { - pr_err("failed to enable with return %d\n", ret); + pr_err("failed to enable driver mode(%d)\n", cppc_state); return ret; } ret = cpufreq_register_driver(current_pstate_driver); - if (ret) + if (ret) { pr_err("failed to register with return %d\n", ret); + goto disable_driver; + } dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { @@ -1816,6 +1960,8 @@ static int __init amd_pstate_init(void) global_attr_free: cpufreq_unregister_driver(current_pstate_driver); +disable_driver: + amd_pstate_enable(false); return ret; } device_initcall(amd_pstate_init); diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index bc341f35908d..cc8bb2bc325a 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -42,13 +42,17 @@ struct amd_aperf_mperf { * @lowest_perf: the absolute lowest performance level of the processor * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher * priority. - * @max_freq: the frequency that mapped to highest_perf - * @min_freq: the frequency that mapped to lowest_perf - * @nominal_freq: the frequency that mapped to nominal_perf - * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf + * @min_limit_perf: Cached value of the performance corresponding to policy->min + * @max_limit_perf: Cached value of the performance corresponding to policy->max + * @min_limit_freq: Cached value of policy->min (in khz) + * @max_limit_freq: Cached value of policy->max (in khz) + * @max_freq: the frequency (in khz) that mapped to highest_perf + * @min_freq: the frequency (in khz) that mapped to lowest_perf + * @nominal_freq: the frequency (in khz) that mapped to nominal_perf + * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf * @cur: Difference of Aperf/Mperf/tsc count between last and current sample * @prev: Last Aperf/Mperf/tsc count value read from register - * @freq: current cpu frequency value + * @freq: current cpu frequency value (in khz) * @boost_supported: check whether the Processor or SBIOS supports boost mode * @hw_prefcore: check whether HW supports preferred core featue. * Only when hw_prefcore and early prefcore param are true, @@ -95,6 +99,8 @@ struct amd_cpudata { u32 policy; u64 cppc_cap1_cached; bool suspended; + s16 epp_default; + bool boost_state; }; #endif /* _LINUX_AMD_PSTATE_H */ diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d7630d9cdb2f..bbbeb8b90313 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -614,10 +614,9 @@ static ssize_t show_boost(struct kobject *kobj, static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - int ret, enable; + bool enable; - ret = sscanf(buf, "%d", &enable); - if (ret != 1 || enable < 0 || enable > 1) + if (kstrtobool(buf, &enable)) return -EINVAL; if (cpufreq_boost_trigger_state(enable)) { @@ -641,10 +640,10 @@ static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf) static ssize_t store_local_boost(struct cpufreq_policy *policy, const char *buf, size_t count) { - int ret, enable; + int ret; + bool enable; - ret = kstrtoint(buf, 10, &enable); - if (ret || enable < 0 || enable > 1) + if (kstrtobool(buf, &enable)) return -EINVAL; if (!cpufreq_driver->boost_enabled) -- 2.46.0.rc1