From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Fri, 23 Feb 2024 17:11:08 +0100 Subject: [PATCH] amd-pstate Signed-off-by: Peter Jung Signed-off-by: Jan200101 --- .../admin-guide/kernel-parameters.txt | 5 + Documentation/admin-guide/pm/amd-pstate.rst | 59 +++++- arch/x86/Kconfig | 5 +- drivers/acpi/cppc_acpi.c | 13 ++ drivers/acpi/processor_driver.c | 6 + drivers/cpufreq/amd-pstate.c | 179 +++++++++++++++++- include/acpi/cppc_acpi.h | 5 + include/linux/amd-pstate.h | 10 + include/linux/cpufreq.h | 1 + 9 files changed, 272 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b72e2049c487..95164b35f973 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -363,6 +363,11 @@ selects a performance level in this range and appropriate to the current workload. + amd_prefcore= + [X86] + disable + Disable amd-pstate preferred core. + amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT Format: , diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 1cf40f69278c..0b832ff529db 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -300,8 +300,8 @@ platforms. The AMD P-States mechanism is the more performance and energy efficiency frequency management method on AMD processors. -AMD Pstate Driver Operation Modes -================================= +``amd-pstate`` Driver Operation Modes +====================================== ``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode, non-autonomous (passive) mode and guided autonomous (guided) mode. @@ -353,6 +353,48 @@ is activated. In this mode, driver requests minimum and maximum performance level and the platform autonomously selects a performance level in this range and appropriate to the current workload. +``amd-pstate`` Preferred Core +================================= + +The core frequency is subjected to the process variation in semiconductors. +Not all cores are able to reach the maximum frequency respecting the +infrastructure limits. Consequently, AMD has redefined the concept of +maximum frequency of a part. This means that a fraction of cores can reach +maximum frequency. To find the best process scheduling policy for a given +scenario, OS needs to know the core ordering informed by the platform through +highest performance capability register of the CPPC interface. + +``amd-pstate`` preferred core enables the scheduler to prefer scheduling on +cores that can achieve a higher frequency with lower voltage. The preferred +core rankings can dynamically change based on the workload, platform conditions, +thermals and ageing. + +The priority metric will be initialized by the ``amd-pstate`` driver. The ``amd-pstate`` +driver will also determine whether or not ``amd-pstate`` preferred core is +supported by the platform. + +``amd-pstate`` driver will provide an initial core ordering when the system boots. +The platform uses the CPPC interfaces to communicate the core ranking to the +operating system and scheduler to make sure that OS is choosing the cores +with highest performance firstly for scheduling the process. When ``amd-pstate`` +driver receives a message with the highest performance change, it will +update the core ranking and set the cpu's priority. + +``amd-pstate`` Preferred Core Switch +================================= +Kernel Parameters +----------------- + +``amd-pstate`` peferred core`` has two states: enable and disable. +Enable/disable states can be chosen by different kernel parameters. +Default enable ``amd-pstate`` preferred core. + +``amd_prefcore=disable`` + +For systems that support ``amd-pstate`` preferred core, the core rankings will +always be advertised by the platform. But OS can choose to ignore that via the +kernel parameter ``amd_prefcore=disable``. + User Space Interface in ``sysfs`` - General =========================================== @@ -385,6 +427,19 @@ control its functionality at the system level. They are located in the to the operation mode represented by that string - or to be unregistered in the "disable" case. +``prefcore`` + Preferred core state of the driver: "enabled" or "disabled". + + "enabled" + Enable the ``amd-pstate`` preferred core. + + "disabled" + Disable the ``amd-pstate`` preferred core + + + This attribute is read-only to check the state of preferred core set + by the kernel parameter. + ``cpupower`` tool support for ``amd-pstate`` =============================================== diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1566748f16c4..4fd69cd4241a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1054,8 +1054,9 @@ config SCHED_MC config SCHED_MC_PRIO bool "CPU core priorities scheduler support" - depends on SCHED_MC && CPU_SUP_INTEL - select X86_INTEL_PSTATE + depends on SCHED_MC + select X86_INTEL_PSTATE if CPU_SUP_INTEL + select X86_AMD_PSTATE if CPU_SUP_AMD && ACPI select CPU_FREQ default y help diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 7ff269a78c20..ad388a0e8484 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1154,6 +1154,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf); } +/** + * cppc_get_highest_perf - Get the highest performance register value. + * @cpunum: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_highest_perf(int cpunum, u64 *highest_perf) +{ + return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf); +} +EXPORT_SYMBOL_GPL(cppc_get_highest_perf); + /** * cppc_get_epp_perf - Get the epp register value. * @cpunum: CPU from which to get epp preference value. diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 4bd16b3f0781..67db60eda370 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -27,6 +27,7 @@ #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 +#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85 MODULE_AUTHOR("Paul Diefenbaugh"); MODULE_DESCRIPTION("ACPI Processor Driver"); @@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data) acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, 0); break; + case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED: + cpufreq_update_limits(pr->id); + acpi_bus_generate_netlink_event(device->pnp.device_class, + dev_name(&device->dev), event, 0); + break; default: acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event); break; diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1791d37fbc53..54df68773620 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -64,6 +65,7 @@ static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; static bool cppc_enabled; +static bool amd_pstate_prefcore = true; /* * AMD Energy Preference Performance (EPP) @@ -296,14 +298,12 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) &cap1); if (ret) return ret; - - /* - * TODO: Introduce AMD specific power feature. - * - * CPPC entry doesn't indicate the highest performance in some ASICs. + + /* Some CPUs have different highest_perf from others, it is safer + * to read it than to assume some erroneous value, leading to performance issues. */ highest_perf = amd_get_highest_perf(); - if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1)) + if(highest_perf > AMD_CPPC_HIGHEST_PERF(cap1)) highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); WRITE_ONCE(cpudata->highest_perf, highest_perf); @@ -311,6 +311,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); + WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); return 0; } @@ -324,8 +325,11 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; + /* Some CPUs have different highest_perf from others, it is safer + * to read it than to assume some erroneous value, leading to performance issues. + */ highest_perf = amd_get_highest_perf(); - if (highest_perf > cppc_perf.highest_perf) + if(highest_perf > cppc_perf.highest_perf) highest_perf = cppc_perf.highest_perf; WRITE_ONCE(cpudata->highest_perf, highest_perf); @@ -334,6 +338,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) @@ -706,6 +711,114 @@ static void amd_perf_ctl_reset(unsigned int cpu) wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); } +/* + * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks + * due to locking, so queue the work for later. + */ +static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) +{ + sched_set_itmt_support(); +} +static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); + +/* + * Get the highest performance register value. + * @cpu: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) +{ + int ret; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 cap1; + + ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); + } else { + u64 cppc_highest_perf; + + ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, cppc_highest_perf); + } + + return (ret); +} + +#define CPPC_MAX_PERF U8_MAX + +static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) +{ + int ret, prio; + u32 highest_perf; + + ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); + if (ret) + return; + + cpudata->hw_prefcore = true; + /* check if CPPC preferred core feature is enabled*/ + if (highest_perf < CPPC_MAX_PERF) + prio = (int)highest_perf; + else { + pr_debug("AMD CPPC preferred core is unsupported!\n"); + cpudata->hw_prefcore = false; + return; + } + + if (!amd_pstate_prefcore) + return; + + /* + * The priorities can be set regardless of whether or not + * sched_set_itmt_support(true) has been called and it is valid to + * update them at any time after it has been called. + */ + sched_set_itmt_core_prio(prio, cpudata->cpu); + + schedule_work(&sched_prefcore_work); +} + +static void amd_pstate_update_limits(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata = policy->driver_data; + u32 prev_high = 0, cur_high = 0; + int ret; + bool highest_perf_changed = false; + + mutex_lock(&amd_pstate_driver_lock); + if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) + goto free_cpufreq_put; + + ret = amd_pstate_get_highest_perf(cpu, &cur_high); + if (ret) + goto free_cpufreq_put; + + prev_high = READ_ONCE(cpudata->prefcore_ranking); + if (prev_high != cur_high) { + highest_perf_changed = true; + WRITE_ONCE(cpudata->prefcore_ranking, cur_high); + + if (cur_high < CPPC_MAX_PERF) + sched_set_itmt_core_prio((int)cur_high, cpu); + } + +free_cpufreq_put: + cpufreq_cpu_put(policy); + + if (!highest_perf_changed) + cpufreq_update_policy(cpu); + + mutex_unlock(&amd_pstate_driver_lock); +} + static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; @@ -727,6 +840,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; @@ -877,6 +992,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, return sysfs_emit(buf, "%u\n", perf); } +static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, + char *buf) +{ + u32 perf; + struct amd_cpudata *cpudata = policy->driver_data; + + perf = READ_ONCE(cpudata->prefcore_ranking); + + return sysfs_emit(buf, "%u\n", perf); +} + +static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, + char *buf) +{ + bool hw_prefcore; + struct amd_cpudata *cpudata = policy->driver_data; + + hw_prefcore = READ_ONCE(cpudata->hw_prefcore); + + return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); +} + static ssize_t show_energy_performance_available_preferences( struct cpufreq_policy *policy, char *buf) { @@ -1074,18 +1211,29 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, return ret < 0 ? ret : count; } +static ssize_t prefcore_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); +} + cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); +cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); +cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); cpufreq_freq_attr_rw(energy_performance_preference); cpufreq_freq_attr_ro(energy_performance_available_preferences); static DEVICE_ATTR_RW(status); +static DEVICE_ATTR_RO(prefcore); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, NULL, }; @@ -1093,6 +1241,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, &energy_performance_preference, &energy_performance_available_preferences, NULL, @@ -1100,6 +1250,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = { static struct attribute *pstate_global_attributes[] = { &dev_attr_status.attr, + &dev_attr_prefcore.attr, NULL }; @@ -1151,6 +1302,8 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; cpudata->epp_policy = 0; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; @@ -1432,6 +1585,7 @@ static struct cpufreq_driver amd_pstate_driver = { .suspend = amd_pstate_cpu_suspend, .resume = amd_pstate_cpu_resume, .set_boost = amd_pstate_set_boost, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate", .attr = amd_pstate_attr, }; @@ -1446,6 +1600,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .online = amd_pstate_epp_cpu_online, .suspend = amd_pstate_epp_suspend, .resume = amd_pstate_epp_resume, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate-epp", .attr = amd_pstate_epp_attr, }; @@ -1567,7 +1722,17 @@ static int __init amd_pstate_param(char *str) return amd_pstate_set_driver(mode_idx); } + +static int __init amd_prefcore_param(char *str) +{ + if (!strcmp(str, "disable")) + amd_pstate_prefcore = false; + + return 0; +} + early_param("amd_pstate", amd_pstate_param); +early_param("amd_prefcore", amd_prefcore_param); MODULE_AUTHOR("Huang Rui "); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 6126c977ece0..c0b69ffe7bdb 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -139,6 +139,7 @@ struct cppc_cpudata { #ifdef CONFIG_ACPI_CPPC_LIB extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); +extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); @@ -165,6 +166,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) { return -ENOTSUPP; } +static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf) +{ + return -ENOTSUPP; +} static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { return -ENOTSUPP; diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 6ad02ad9c7b4..d21838835abd 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -39,11 +39,16 @@ struct amd_aperf_mperf { * @cppc_req_cached: cached performance request hints * @highest_perf: the maximum performance an individual processor may reach, * assuming ideal conditions + * For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the fixed value as the highest_perf. * @nominal_perf: the maximum sustained performance level of the processor, * assuming ideal operating conditions * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power * savings are achieved * @lowest_perf: the absolute lowest performance level of the processor + * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher + * priority. * @max_freq: the frequency that mapped to highest_perf * @min_freq: the frequency that mapped to lowest_perf * @nominal_freq: the frequency that mapped to nominal_perf @@ -52,6 +57,9 @@ struct amd_aperf_mperf { * @prev: Last Aperf/Mperf/tsc count value read from register * @freq: current cpu frequency value * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @hw_prefcore: check whether HW supports preferred core featue. + * Only when hw_prefcore and early prefcore param are true, + * AMD P-State driver supports preferred core featue. * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value @@ -70,6 +78,7 @@ struct amd_cpudata { u32 nominal_perf; u32 lowest_nonlinear_perf; u32 lowest_perf; + u32 prefcore_ranking; u32 min_limit_perf; u32 max_limit_perf; u32 min_limit_freq; @@ -85,6 +94,7 @@ struct amd_cpudata { u64 freq; bool boost_supported; + bool hw_prefcore; /* EPP feature related attributes*/ s16 epp_policy; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1c5ca92a0555..5d62beea2712 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -263,6 +263,7 @@ static inline bool cpufreq_supports_freq_invariance(void) return false; } static inline void disable_cpufreq(void) { } +static inline void cpufreq_update_limits(unsigned int cpu) { } #endif #ifdef CONFIG_CPU_FREQ_STAT From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 31 Jan 2024 16:50:15 +0800 Subject: [PATCH] AMD Pstate Fixes And Enhancements The patch series adds some fixes and enhancements to the AMD pstate driver. It enables CPPC v2 for certain processors in the family 17H, as requested by TR40 processor users who expect improved performance and lower system temperature. Additionally, it fixes the initialization of nominal_freq for each cpudata and changes latency and delay values to be read from platform firmware firstly for more accurate timing. A new quirk is also added for legacy processors that lack CPPC capabilities, which caused the pstate driver to fail loading. I would greatly appreciate any feedbacks. Thank you! Perry Yuan (6): ACPI: CPPC: enable AMD CPPC V2 support for family 17h processors cpufreq:amd-pstate: fix the nominal freq value set cpufreq:amd-pstate: initialize nominal_freq of each cpudata cpufreq:amd-pstate: get pstate transition delay and latency value from ACPI tables cppc_acpi: print error message if CPPC is unsupported cpufreq:amd-pstate: add quirk for the pstate CPPC capabilities missing arch/x86/kernel/acpi/cppc.c | 2 +- drivers/acpi/cppc_acpi.c | 6 +- drivers/cpufreq/amd-pstate.c | 112 ++++++++++++++++++++++++++++------- include/linux/amd-pstate.h | 6 ++ 4 files changed, 102 insertions(+), 24 deletions(-) -- 2.34.1 Signed-off-by: Jan200101 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 31 Jan 2024 16:50:16 +0800 Subject: [PATCH] ACPI: CPPC: enable AMD CPPC V2 support for family 17h processors As there are some AMD processors which only support CPPC V2 firmware and BIOS implementation, the amd_pstate driver will be failed to load when system booting with below kernel warning message: [ 0.477523] amd_pstate: the _CPC object is not present in SBIOS or ACPI disabled To make the amd_pstate driver can be loaded on those TR40 processors, it needs to match x86_model from 0x30 to 0x7F for family 17H. With the change, the system can load amd_pstate driver as expected. Reported-by: Gino Badouri Issue: https://bugzilla.kernel.org/show_bug.cgi?id=218171 Fixes: fbd74d1689 ("ACPI: CPPC: Fix enabling CPPC on AMD systems with shared memory") Signed-off-by: Perry Yuan Signed-off-by: Jan200101 --- arch/x86/kernel/acpi/cppc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 8d8752b44f11..ff8f25faca3d 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -20,7 +20,7 @@ bool cpc_supported_by_cpu(void) (boot_cpu_data.x86_model >= 0x20 && boot_cpu_data.x86_model <= 0x2f))) return true; else if (boot_cpu_data.x86 == 0x17 && - boot_cpu_data.x86_model >= 0x70 && boot_cpu_data.x86_model <= 0x7f) + boot_cpu_data.x86_model >= 0x30 && boot_cpu_data.x86_model <= 0x7f) return true; return boot_cpu_has(X86_FEATURE_CPPC); } From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 31 Jan 2024 16:50:17 +0800 Subject: [PATCH] cpufreq:amd-pstate: fix the nominal freq value set Address an untested error where the nominal_freq was returned in KHz instead of the correct MHz units, this oversight led to a wrong nominal_freq set and resued, it will cause the max frequency of core to be initialized with a wrong frequency value. Cc: stable@vger.kernel.org Fixes: ec437d71db7 ("cpufreq: amd-pstate: Introduce a new AMD P-State driver to support future processors") Signed-off-by: Perry Yuan Signed-off-by: Jan200101 --- drivers/cpufreq/amd-pstate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 54df68773620..d2591ef3b4f6 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -638,8 +638,7 @@ static int amd_get_nominal_freq(struct amd_cpudata *cpudata) if (ret) return ret; - /* Switch to khz */ - return cppc_perf.nominal_freq * 1000; + return cppc_perf.nominal_freq; } static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 31 Jan 2024 16:50:18 +0800 Subject: [PATCH] cpufreq:amd-pstate: initialize nominal_freq of each cpudata Optimizes the process of retrieving the nominal frequency by utilizing 'cpudata->nominal_freq' instead of repeatedly accessing the cppc_acpi interface. To enhance efficiency and reduce the CPU load, shifted to using 'cpudata->nominal_freq'. It allows for the nominal frequency to be accessed directly from the cached data in 'cpudata' of each CPU. It will also slightly reduce the frequency change latency while using pstate driver passive mode. Signed-off-by: Perry Yuan Signed-off-by: Jan200101 --- drivers/cpufreq/amd-pstate.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d2591ef3b4f6..fa4908b37793 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -617,7 +617,7 @@ static int amd_get_max_freq(struct amd_cpudata *cpudata) if (ret) return ret; - nominal_freq = cppc_perf.nominal_freq; + nominal_freq = READ_ONCE(cpudata->nominal_freq); nominal_perf = READ_ONCE(cpudata->nominal_perf); max_perf = READ_ONCE(cpudata->highest_perf); @@ -652,7 +652,7 @@ static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) if (ret) return ret; - nominal_freq = cppc_perf.nominal_freq; + nominal_freq = READ_ONCE(cpudata->nominal_freq); nominal_perf = READ_ONCE(cpudata->nominal_perf); lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; @@ -846,13 +846,15 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) goto free_cpudata1; min_freq = amd_get_min_freq(cpudata); - max_freq = amd_get_max_freq(cpudata); nominal_freq = amd_get_nominal_freq(cpudata); + cpudata->nominal_freq = nominal_freq; + max_freq = amd_get_max_freq(cpudata); lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", - min_freq, max_freq); + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq || nominal_freq == 0) { + dev_err(dev, "min_freq(%d) or max_freq(%d) or nominal_freq(%d) \ + value is incorrect\n", \ + min_freq, max_freq, nominal_freq); ret = -EINVAL; goto free_cpudata1; } @@ -891,7 +893,6 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) cpudata->min_freq = min_freq; cpudata->max_limit_freq = max_freq; cpudata->min_limit_freq = min_freq; - cpudata->nominal_freq = nominal_freq; cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; policy->driver_data = cpudata; @@ -1308,12 +1309,14 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) goto free_cpudata1; min_freq = amd_get_min_freq(cpudata); - max_freq = amd_get_max_freq(cpudata); nominal_freq = amd_get_nominal_freq(cpudata); + cpudata->nominal_freq = nominal_freq; + max_freq = amd_get_max_freq(cpudata); lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", - min_freq, max_freq); + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq || nominal_freq == 0) { + dev_err(dev, "min_freq(%d) or max_freq(%d) or nominal_freq(%d) \ + value is incorrect\n", \ + min_freq, max_freq, nominal_freq); ret = -EINVAL; goto free_cpudata1; } @@ -1326,7 +1329,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) /* Initial processor data capability frequencies */ cpudata->max_freq = max_freq; cpudata->min_freq = min_freq; - cpudata->nominal_freq = nominal_freq; cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; policy->driver_data = cpudata; From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 31 Jan 2024 16:50:19 +0800 Subject: [PATCH] cpufreq:amd-pstate: get pstate transition delay and latency value from ACPI tables make pstate driver initially retrieve the P-state transition delay and latency values from the BIOS ACPI tables which has more reasonable delay and latency values according to the platform design and requirements. Previously there values were hardcoded at specific value which may have conflicted with platform and it might not reflect the most accurate or optimized setting for the processor. [054h 0084 8] Preserve Mask : FFFFFFFF00000000 [05Ch 0092 8] Write Mask : 0000000000000001 [064h 0100 4] Command Latency : 00000FA0 [068h 0104 4] Maximum Access Rate : 0000EA60 [06Ch 0108 2] Minimum Turnaround Time : 0000 Signed-off-by: Perry Yuan Signed-off-by: Jan200101 --- drivers/cpufreq/amd-pstate.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index fa4908b37793..484e89c9cd86 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -818,6 +818,36 @@ static void amd_pstate_update_limits(unsigned int cpu) mutex_unlock(&amd_pstate_driver_lock); } +/** + * Get pstate transition delay time from ACPI tables that firmware set + * instead of using hardcode value directly. + */ +static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) +{ + u32 transition_delay_ns; + + transition_delay_ns= cppc_get_transition_latency(cpu); + if (transition_delay_ns == CPUFREQ_ETERNAL) + return AMD_PSTATE_TRANSITION_DELAY; + + return transition_delay_ns / NSEC_PER_USEC; +} + +/** + * Get pstate transition latency value from ACPI tables that firmware set + * instead of using hardcode value directly. + */ +static u32 amd_pstate_get_transition_latency(unsigned int cpu) +{ + u32 transition_latency; + + transition_latency = cppc_get_transition_latency(cpu); + if (transition_latency == CPUFREQ_ETERNAL) + return AMD_PSTATE_TRANSITION_LATENCY; + + return transition_latency; +} + static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; @@ -859,8 +889,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) goto free_cpudata1; } - policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; - policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; + policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); + policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); policy->min = min_freq; policy->max = max_freq; From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 31 Jan 2024 16:50:20 +0800 Subject: [PATCH] cppc_acpi: print error message if CPPC is unsupported to be more clear what is wrong with CPPC when pstate driver failed to load which has dependency on the CPPC capabilities. Add one more debug message to notify user if CPPC is not supported by the CPU, then it will be easy to find out what need to fix for pstate driver loading issue. [ 0.477523] amd_pstate: the _CPC object is not present in SBIOS or ACPI disabled Above message is not clear enough to verify whether CPPC is not supported. Signed-off-by: Perry Yuan Signed-off-by: Jan200101 --- drivers/acpi/cppc_acpi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index ad388a0e8484..6e89fc97c82f 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -676,8 +676,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) if (!osc_sb_cppc2_support_acked) { pr_debug("CPPC v2 _OSC not acked\n"); - if (!cpc_supported_by_cpu()) - return -ENODEV; + if (!cpc_supported_by_cpu()) { + pr_debug("CPPC is not supported\n"); + return -ENODEV; + } } /* Parse the ACPI _CPC table for this CPU. */ From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 31 Jan 2024 16:50:21 +0800 Subject: [PATCH] cpufreq:amd-pstate: add quirk for the pstate CPPC capabilities missing Add quirk table to get CPPC capabilities issue fixed by providing correct perf or frequency values while driver loading. If CPPC capabilities are not defined in the ACPI tables or wrongly defined by platform firmware, it needs to use quick to get those issues fixed with correct workaround values to make pstate driver can be loaded even though there are CPPC capabilities errors. Signed-off-by: Perry Yuan Signed-off-by: Jan200101 --- drivers/cpufreq/amd-pstate.c | 51 +++++++++++++++++++++++++++++++----- include/linux/amd-pstate.h | 6 +++++ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 484e89c9cd86..01ae446c08e6 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -66,6 +66,7 @@ static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; static bool cppc_enabled; static bool amd_pstate_prefcore = true; +static struct quirk_entry *quirks; /* * AMD Energy Preference Performance (EPP) @@ -110,6 +111,32 @@ static unsigned int epp_values[] = { typedef int (*cppc_mode_transition_fn)(int); +static struct quirk_entry quirk_amd_7k62 = { + .nominal_freq = 2600, + .lowest_freq = 550, +}; + +static int __init dmi_matched(const struct dmi_system_id *dmi) +{ + quirks = dmi->driver_data; + + return 1; +} + +static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = { + { + .callback = dmi_matched, + .ident = "AMD EPYC 7K62", + .matches = { + DMI_MATCH(DMI_PRODUCT_VERSION, "C1"), + DMI_MATCH(DMI_PRODUCT_SERIAL, "FX19911000028"), + }, + .driver_data = &quirk_amd_7k62, + }, + {} +}; +MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table); + static inline int get_mode_idx_from_str(const char *str, size_t size) { int i; @@ -598,13 +625,19 @@ static void amd_pstate_adjust_perf(unsigned int cpu, static int amd_get_min_freq(struct amd_cpudata *cpudata) { struct cppc_perf_caps cppc_perf; + u32 lowest_freq; int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret; + if (quirks && quirks->lowest_freq) + lowest_freq = quirks->lowest_freq; + else + lowest_freq = cppc_perf.lowest_freq; + /* Switch to khz */ - return cppc_perf.lowest_freq * 1000; + return lowest_freq * 1000; } static int amd_get_max_freq(struct amd_cpudata *cpudata) @@ -632,13 +665,14 @@ static int amd_get_max_freq(struct amd_cpudata *cpudata) static int amd_get_nominal_freq(struct amd_cpudata *cpudata) { - struct cppc_perf_caps cppc_perf; + u32 nominal_freq; - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; + if (quirks && quirks->nominal_freq) + nominal_freq = quirks->nominal_freq; + else + nominal_freq = READ_ONCE(cpudata->nominal_freq); - return cppc_perf.nominal_freq; + return nominal_freq; } static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) @@ -1672,6 +1706,11 @@ static int __init amd_pstate_init(void) if (cpufreq_get_current_driver()) return -EEXIST; + quirks = NULL; + + /* check if this machine need CPPC quirks */ + dmi_check_system(amd_pstate_quirks_table); + switch (cppc_state) { case AMD_PSTATE_UNDEFINED: /* Disable on the following configs by default: diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index d21838835abd..7b2cbb892fd9 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -124,4 +124,10 @@ static const char * const amd_pstate_mode_string[] = { [AMD_PSTATE_GUIDED] = "guided", NULL, }; + +struct quirk_entry { + u32 nominal_freq; + u32 lowest_freq; +}; + #endif /* _LINUX_AMD_PSTATE_H */