aboutsummaryrefslogtreecommitdiff
path: root/SOURCES/0001-amd-pstate.patch
diff options
context:
space:
mode:
authorJan200101 <sentrycraft123@gmail.com>2024-06-12 23:03:17 +0200
committerJan200101 <sentrycraft123@gmail.com>2024-06-12 23:04:02 +0200
commit4ccc9dfd8e38348d527d3704b87a680ba43756cd (patch)
tree2d72166c60df258cebe6160b5943624dceb8855d /SOURCES/0001-amd-pstate.patch
parentd39b424f868921cd22fbfac392912a911c72bcf2 (diff)
downloadkernel-fsync-4ccc9dfd8e38348d527d3704b87a680ba43756cd.tar.gz
kernel-fsync-4ccc9dfd8e38348d527d3704b87a680ba43756cd.zip
kernel 6.9.4
Diffstat (limited to 'SOURCES/0001-amd-pstate.patch')
-rw-r--r--SOURCES/0001-amd-pstate.patch880
1 files changed, 366 insertions, 514 deletions
diff --git a/SOURCES/0001-amd-pstate.patch b/SOURCES/0001-amd-pstate.patch
index d7fd4b3..6e929f5 100644
--- a/SOURCES/0001-amd-pstate.patch
+++ b/SOURCES/0001-amd-pstate.patch
@@ -1,578 +1,430 @@
-From 1449b07b2bd2af451bba8ba17f7b01cf30b6471f Mon Sep 17 00:00:00 2001
+From 841eda55513de3b157eb94460e37e7b779980e0c Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
-Date: Fri, 23 Feb 2024 17:11:08 +0100
-Subject: [PATCH 1/7] amd-pstate
+Date: Wed, 12 Jun 2024 18:19:17 +0200
+Subject: [PATCH 02/11] amd-pstate
Signed-off-by: Peter Jung <admin@ptr1337.dev>
---
- .../admin-guide/kernel-parameters.txt | 5 +
- Documentation/admin-guide/pm/amd-pstate.rst | 59 +++++-
- arch/x86/Kconfig | 5 +-
- drivers/acpi/cppc_acpi.c | 13 ++
- drivers/acpi/processor_driver.c | 6 +
- drivers/cpufreq/amd-pstate.c | 179 +++++++++++++++++-
- include/acpi/cppc_acpi.h | 5 +
- include/linux/amd-pstate.h | 10 +
- include/linux/cpufreq.h | 1 +
- 9 files changed, 272 insertions(+), 11 deletions(-)
+ drivers/cpufreq/amd-pstate.c | 267 +++++++++++++++++++++--------------
+ include/linux/amd-pstate.h | 20 ++-
+ 2 files changed, 178 insertions(+), 109 deletions(-)
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index b72e2049c487..95164b35f973 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -363,6 +363,11 @@
- selects a performance level in this range and appropriate
- to the current workload.
-
-+ amd_prefcore=
-+ [X86]
-+ disable
-+ Disable amd-pstate preferred core.
-+
- amijoy.map= [HW,JOY] Amiga joystick support
- Map of devices attached to JOY0DAT and JOY1DAT
- Format: <a>,<b>
-diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
-index 1cf40f69278c..0b832ff529db 100644
---- a/Documentation/admin-guide/pm/amd-pstate.rst
-+++ b/Documentation/admin-guide/pm/amd-pstate.rst
-@@ -300,8 +300,8 @@ platforms. The AMD P-States mechanism is the more performance and energy
- efficiency frequency management method on AMD processors.
-
-
--AMD Pstate Driver Operation Modes
--=================================
-+``amd-pstate`` Driver Operation Modes
-+======================================
-
- ``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode,
- non-autonomous (passive) mode and guided autonomous (guided) mode.
-@@ -353,6 +353,48 @@ is activated. In this mode, driver requests minimum and maximum performance
- level and the platform autonomously selects a performance level in this range
- and appropriate to the current workload.
-
-+``amd-pstate`` Preferred Core
-+=================================
-+
-+The core frequency is subjected to the process variation in semiconductors.
-+Not all cores are able to reach the maximum frequency respecting the
-+infrastructure limits. Consequently, AMD has redefined the concept of
-+maximum frequency of a part. This means that a fraction of cores can reach
-+maximum frequency. To find the best process scheduling policy for a given
-+scenario, OS needs to know the core ordering informed by the platform through
-+highest performance capability register of the CPPC interface.
-+
-+``amd-pstate`` preferred core enables the scheduler to prefer scheduling on
-+cores that can achieve a higher frequency with lower voltage. The preferred
-+core rankings can dynamically change based on the workload, platform conditions,
-+thermals and ageing.
-+
-+The priority metric will be initialized by the ``amd-pstate`` driver. The ``amd-pstate``
-+driver will also determine whether or not ``amd-pstate`` preferred core is
-+supported by the platform.
-+
-+``amd-pstate`` driver will provide an initial core ordering when the system boots.
-+The platform uses the CPPC interfaces to communicate the core ranking to the
-+operating system and scheduler to make sure that OS is choosing the cores
-+with highest performance firstly for scheduling the process. When ``amd-pstate``
-+driver receives a message with the highest performance change, it will
-+update the core ranking and set the cpu's priority.
-+
-+``amd-pstate`` Preferred Core Switch
-+=================================
-+Kernel Parameters
-+-----------------
-+
-+``amd-pstate`` peferred core`` has two states: enable and disable.
-+Enable/disable states can be chosen by different kernel parameters.
-+Default enable ``amd-pstate`` preferred core.
-+
-+``amd_prefcore=disable``
-+
-+For systems that support ``amd-pstate`` preferred core, the core rankings will
-+always be advertised by the platform. But OS can choose to ignore that via the
-+kernel parameter ``amd_prefcore=disable``.
-+
- User Space Interface in ``sysfs`` - General
- ===========================================
-
-@@ -385,6 +427,19 @@ control its functionality at the system level. They are located in the
- to the operation mode represented by that string - or to be
- unregistered in the "disable" case.
-
-+``prefcore``
-+ Preferred core state of the driver: "enabled" or "disabled".
-+
-+ "enabled"
-+ Enable the ``amd-pstate`` preferred core.
-+
-+ "disabled"
-+ Disable the ``amd-pstate`` preferred core
-+
-+
-+ This attribute is read-only to check the state of preferred core set
-+ by the kernel parameter.
-+
- ``cpupower`` tool support for ``amd-pstate``
- ===============================================
-
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 1566748f16c4..4fd69cd4241a 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1054,8 +1054,9 @@ config SCHED_MC
-
- config SCHED_MC_PRIO
- bool "CPU core priorities scheduler support"
-- depends on SCHED_MC && CPU_SUP_INTEL
-- select X86_INTEL_PSTATE
-+ depends on SCHED_MC
-+ select X86_INTEL_PSTATE if CPU_SUP_INTEL
-+ select X86_AMD_PSTATE if CPU_SUP_AMD && ACPI
- select CPU_FREQ
- default y
- help
-diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
-index 7ff269a78c20..ad388a0e8484 100644
---- a/drivers/acpi/cppc_acpi.c
-+++ b/drivers/acpi/cppc_acpi.c
-@@ -1154,6 +1154,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
- return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
- }
-
-+/**
-+ * cppc_get_highest_perf - Get the highest performance register value.
-+ * @cpunum: CPU from which to get highest performance.
-+ * @highest_perf: Return address.
-+ *
-+ * Return: 0 for success, -EIO otherwise.
-+ */
-+int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
-+{
-+ return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf);
-+}
-+EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
-+
- /**
- * cppc_get_epp_perf - Get the epp register value.
- * @cpunum: CPU from which to get epp preference value.
-diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
-index 4bd16b3f0781..67db60eda370 100644
---- a/drivers/acpi/processor_driver.c
-+++ b/drivers/acpi/processor_driver.c
-@@ -27,6 +27,7 @@
- #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
- #define ACPI_PROCESSOR_NOTIFY_POWER 0x81
- #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82
-+#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85
-
- MODULE_AUTHOR("Paul Diefenbaugh");
- MODULE_DESCRIPTION("ACPI Processor Driver");
-@@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data)
- acpi_bus_generate_netlink_event(device->pnp.device_class,
- dev_name(&device->dev), event, 0);
- break;
-+ case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED:
-+ cpufreq_update_limits(pr->id);
-+ acpi_bus_generate_netlink_event(device->pnp.device_class,
-+ dev_name(&device->dev), event, 0);
-+ break;
- default:
- acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event);
- break;
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
-index 1791d37fbc53..54df68773620 100644
+index 28166df81cf8d..cde3b91b4422a 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
-@@ -37,6 +37,7 @@
- #include <linux/uaccess.h>
- #include <linux/static_call.h>
- #include <linux/amd-pstate.h>
-+#include <linux/topology.h>
-
- #include <acpi/processor.h>
- #include <acpi/cppc_acpi.h>
-@@ -64,6 +65,7 @@ static struct cpufreq_driver amd_pstate_driver;
- static struct cpufreq_driver amd_pstate_epp_driver;
+@@ -68,6 +68,7 @@ static struct cpufreq_driver amd_pstate_epp_driver;
static int cppc_state = AMD_PSTATE_UNDEFINED;
static bool cppc_enabled;
-+static bool amd_pstate_prefcore = true;
+ static bool amd_pstate_prefcore = true;
++static struct quirk_entry *quirks;
/*
* AMD Energy Preference Performance (EPP)
-@@ -296,14 +298,12 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
- &cap1);
- if (ret)
- return ret;
--
-- /*
-- * TODO: Introduce AMD specific power feature.
-- *
-- * CPPC entry doesn't indicate the highest performance in some ASICs.
-+
-+ /* Some CPUs have different highest_perf from others, it is safer
-+ * to read it than to assume some erroneous value, leading to performance issues.
- */
- highest_perf = amd_get_highest_perf();
-- if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
-+ if(highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
- highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
+@@ -112,6 +113,41 @@ static unsigned int epp_values[] = {
- WRITE_ONCE(cpudata->highest_perf, highest_perf);
-@@ -311,6 +311,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
- WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
- WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
- WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
-+ WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
- WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
- return 0;
- }
-@@ -324,8 +325,11 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
- if (ret)
- return ret;
+ typedef int (*cppc_mode_transition_fn)(int);
-+ /* Some CPUs have different highest_perf from others, it is safer
-+ * to read it than to assume some erroneous value, leading to performance issues.
++static struct quirk_entry quirk_amd_7k62 = {
++ .nominal_freq = 2600,
++ .lowest_freq = 550,
++};
++
++static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
++{
++ /**
++ * match the broken bios for family 17h processor support CPPC V2
++ * broken BIOS lack of nominal_freq and lowest_freq capabilities
++ * definition in ACPI tables
+ */
- highest_perf = amd_get_highest_perf();
-- if (highest_perf > cppc_perf.highest_perf)
-+ if(highest_perf > cppc_perf.highest_perf)
- highest_perf = cppc_perf.highest_perf;
-
- WRITE_ONCE(cpudata->highest_perf, highest_perf);
-@@ -334,6 +338,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
- WRITE_ONCE(cpudata->lowest_nonlinear_perf,
- cppc_perf.lowest_nonlinear_perf);
- WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
-+ WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
- WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
++ if (boot_cpu_has(X86_FEATURE_ZEN2)) {
++ quirks = dmi->driver_data;
++ pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
++ return 1;
++ }
++
++ return 0;
++}
++
++static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = {
++ {
++ .callback = dmi_matched_7k62_bios_bug,
++ .ident = "AMD EPYC 7K62",
++ .matches = {
++ DMI_MATCH(DMI_BIOS_VERSION, "5.14"),
++ DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"),
++ },
++ .driver_data = &quirk_amd_7k62,
++ },
++ {}
++};
++MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table);
++
+ static inline int get_mode_idx_from_str(const char *str, size_t size)
+ {
+ int i;
+@@ -620,78 +656,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
+ cpufreq_cpu_put(policy);
+ }
- if (cppc_state == AMD_PSTATE_ACTIVE)
-@@ -706,6 +711,114 @@ static void amd_perf_ctl_reset(unsigned int cpu)
- wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
+-static int amd_get_min_freq(struct amd_cpudata *cpudata)
+-{
+- struct cppc_perf_caps cppc_perf;
+-
+- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+- if (ret)
+- return ret;
+-
+- /* Switch to khz */
+- return cppc_perf.lowest_freq * 1000;
+-}
+-
+-static int amd_get_max_freq(struct amd_cpudata *cpudata)
+-{
+- struct cppc_perf_caps cppc_perf;
+- u32 max_perf, max_freq, nominal_freq, nominal_perf;
+- u64 boost_ratio;
+-
+- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+- if (ret)
+- return ret;
+-
+- nominal_freq = cppc_perf.nominal_freq;
+- nominal_perf = READ_ONCE(cpudata->nominal_perf);
+- max_perf = READ_ONCE(cpudata->highest_perf);
+-
+- boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT,
+- nominal_perf);
+-
+- max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT;
+-
+- /* Switch to khz */
+- return max_freq * 1000;
+-}
+-
+-static int amd_get_nominal_freq(struct amd_cpudata *cpudata)
+-{
+- struct cppc_perf_caps cppc_perf;
+-
+- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+- if (ret)
+- return ret;
+-
+- /* Switch to khz */
+- return cppc_perf.nominal_freq * 1000;
+-}
+-
+-static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
+-{
+- struct cppc_perf_caps cppc_perf;
+- u32 lowest_nonlinear_freq, lowest_nonlinear_perf,
+- nominal_freq, nominal_perf;
+- u64 lowest_nonlinear_ratio;
+-
+- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+- if (ret)
+- return ret;
+-
+- nominal_freq = cppc_perf.nominal_freq;
+- nominal_perf = READ_ONCE(cpudata->nominal_perf);
+-
+- lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
+-
+- lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
+- nominal_perf);
+-
+- lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT;
+-
+- /* Switch to khz */
+- return lowest_nonlinear_freq * 1000;
+-}
+-
+ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
+ {
+ struct amd_cpudata *cpudata = policy->driver_data;
+@@ -705,7 +669,7 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
+ if (state)
+ policy->cpuinfo.max_freq = cpudata->max_freq;
+ else
+- policy->cpuinfo.max_freq = cpudata->nominal_freq;
++ policy->cpuinfo.max_freq = cpudata->nominal_freq * 1000;
+
+ policy->max = policy->cpuinfo.max_freq;
+
+@@ -844,9 +808,93 @@ static void amd_pstate_update_limits(unsigned int cpu)
+ mutex_unlock(&amd_pstate_driver_lock);
}
+/*
-+ * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
-+ * due to locking, so queue the work for later.
-+ */
-+static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
-+{
-+ sched_set_itmt_support();
-+}
-+static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
-+
-+/*
-+ * Get the highest performance register value.
-+ * @cpu: CPU from which to get highest performance.
-+ * @highest_perf: Return address.
-+ *
-+ * Return: 0 for success, -EIO otherwise.
++ * Get pstate transition delay time from ACPI tables that firmware set
++ * instead of using hardcode value directly.
+ */
-+static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
++static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
+{
-+ int ret;
++ u32 transition_delay_ns;
+
-+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
-+ u64 cap1;
++ transition_delay_ns = cppc_get_transition_latency(cpu);
++ if (transition_delay_ns == CPUFREQ_ETERNAL)
++ return AMD_PSTATE_TRANSITION_DELAY;
+
-+ ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
-+ if (ret)
-+ return ret;
-+ WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
-+ } else {
-+ u64 cppc_highest_perf;
-+
-+ ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
-+ if (ret)
-+ return ret;
-+ WRITE_ONCE(*highest_perf, cppc_highest_perf);
-+ }
-+
-+ return (ret);
++ return transition_delay_ns / NSEC_PER_USEC;
+}
+
-+#define CPPC_MAX_PERF U8_MAX
-+
-+static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
++/*
++ * Get pstate transition latency value from ACPI tables that firmware
++ * set instead of using hardcode value directly.
++ */
++static u32 amd_pstate_get_transition_latency(unsigned int cpu)
+{
-+ int ret, prio;
-+ u32 highest_perf;
-+
-+ ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
-+ if (ret)
-+ return;
-+
-+ cpudata->hw_prefcore = true;
-+ /* check if CPPC preferred core feature is enabled*/
-+ if (highest_perf < CPPC_MAX_PERF)
-+ prio = (int)highest_perf;
-+ else {
-+ pr_debug("AMD CPPC preferred core is unsupported!\n");
-+ cpudata->hw_prefcore = false;
-+ return;
-+ }
++ u32 transition_latency;
+
-+ if (!amd_pstate_prefcore)
-+ return;
++ transition_latency = cppc_get_transition_latency(cpu);
++ if (transition_latency == CPUFREQ_ETERNAL)
++ return AMD_PSTATE_TRANSITION_LATENCY;
+
-+ /*
-+ * The priorities can be set regardless of whether or not
-+ * sched_set_itmt_support(true) has been called and it is valid to
-+ * update them at any time after it has been called.
-+ */
-+ sched_set_itmt_core_prio(prio, cpudata->cpu);
-+
-+ schedule_work(&sched_prefcore_work);
++ return transition_latency;
+}
+
-+static void amd_pstate_update_limits(unsigned int cpu)
++/*
++ * amd_pstate_init_freq: Initialize the max_freq, min_freq,
++ * nominal_freq and lowest_nonlinear_freq for
++ * the @cpudata object.
++ *
++ * Requires: highest_perf, lowest_perf, nominal_perf and
++ * lowest_nonlinear_perf members of @cpudata to be
++ * initialized.
++ *
++ * Returns 0 on success, non-zero value on failure.
++ */
++static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
+{
-+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-+ struct amd_cpudata *cpudata = policy->driver_data;
-+ u32 prev_high = 0, cur_high = 0;
+ int ret;
-+ bool highest_perf_changed = false;
++ u32 min_freq;
++ u32 highest_perf, max_freq;
++ u32 nominal_perf, nominal_freq;
++ u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
++ u32 boost_ratio, lowest_nonlinear_ratio;
++ struct cppc_perf_caps cppc_perf;
++
++ ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
++ if (ret)
++ return ret;
+
-+ mutex_lock(&amd_pstate_driver_lock);
-+ if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
-+ goto free_cpufreq_put;
++ if (quirks && quirks->lowest_freq)
++ min_freq = quirks->lowest_freq * 1000;
++ else
++ min_freq = cppc_perf.lowest_freq * 1000;
+
-+ ret = amd_pstate_get_highest_perf(cpu, &cur_high);
-+ if (ret)
-+ goto free_cpufreq_put;
++ if (quirks && quirks->nominal_freq)
++ nominal_freq = quirks->nominal_freq ;
++ else
++ nominal_freq = cppc_perf.nominal_freq;
+
-+ prev_high = READ_ONCE(cpudata->prefcore_ranking);
-+ if (prev_high != cur_high) {
-+ highest_perf_changed = true;
-+ WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
++ nominal_perf = READ_ONCE(cpudata->nominal_perf);
+
-+ if (cur_high < CPPC_MAX_PERF)
-+ sched_set_itmt_core_prio((int)cur_high, cpu);
-+ }
++ highest_perf = READ_ONCE(cpudata->highest_perf);
++ boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
++ max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
+
-+free_cpufreq_put:
-+ cpufreq_cpu_put(policy);
++ lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
++ lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
++ nominal_perf);
++ lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
+
-+ if (!highest_perf_changed)
-+ cpufreq_update_policy(cpu);
++ WRITE_ONCE(cpudata->min_freq, min_freq);
++ WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
++ WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
++ WRITE_ONCE(cpudata->max_freq, max_freq);
+
-+ mutex_unlock(&amd_pstate_driver_lock);
++ return 0;
+}
+
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
{
- int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
-@@ -727,6 +840,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
-
- cpudata->cpu = policy->cpu;
+- int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
++ int min_freq, max_freq, nominal_freq, ret;
+ struct device *dev;
+ struct amd_cpudata *cpudata;
-+ amd_pstate_init_prefcore(cpudata);
-+
- ret = amd_pstate_init_perf(cpudata);
+@@ -871,20 +919,25 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
if (ret)
goto free_cpudata1;
-@@ -877,6 +992,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
- return sysfs_emit(buf, "%u\n", perf);
- }
-+static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
-+ char *buf)
-+{
-+ u32 perf;
-+ struct amd_cpudata *cpudata = policy->driver_data;
-+
-+ perf = READ_ONCE(cpudata->prefcore_ranking);
-+
-+ return sysfs_emit(buf, "%u\n", perf);
-+}
-+
-+static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
-+ char *buf)
-+{
-+ bool hw_prefcore;
-+ struct amd_cpudata *cpudata = policy->driver_data;
-+
-+ hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
-+
-+ return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore));
-+}
-+
- static ssize_t show_energy_performance_available_preferences(
- struct cpufreq_policy *policy, char *buf)
+- min_freq = amd_get_min_freq(cpudata);
+- max_freq = amd_get_max_freq(cpudata);
+- nominal_freq = amd_get_nominal_freq(cpudata);
+- lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
++ ret = amd_pstate_init_freq(cpudata);
++ if (ret)
++ goto free_cpudata1;
+
+- if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
+- dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
+- min_freq, max_freq);
++ min_freq = READ_ONCE(cpudata->min_freq);
++ max_freq = READ_ONCE(cpudata->max_freq);
++ nominal_freq = READ_ONCE(cpudata->nominal_freq);
++
++ if (min_freq <= 0 || max_freq <= 0 ||
++ nominal_freq <= 0 || min_freq > max_freq) {
++ dev_err(dev,
++ "min_freq(%d) or max_freq(%d) or nominal_freq (%d) value is incorrect, check _CPC in ACPI tables\n",
++ min_freq, max_freq, nominal_freq);
+ ret = -EINVAL;
+ goto free_cpudata1;
+ }
+
+- policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY;
+- policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY;
++ policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
++ policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
+
+ policy->min = min_freq;
+ policy->max = max_freq;
+@@ -912,13 +965,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
+ goto free_cpudata2;
+ }
+
+- /* Initial processor data capability frequencies */
+- cpudata->max_freq = max_freq;
+- cpudata->min_freq = min_freq;
+ cpudata->max_limit_freq = max_freq;
+ cpudata->min_limit_freq = min_freq;
+- cpudata->nominal_freq = nominal_freq;
+- cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
+
+ policy->driver_data = cpudata;
+
+@@ -982,7 +1030,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
+ int max_freq;
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+- max_freq = amd_get_max_freq(cpudata);
++ max_freq = READ_ONCE(cpudata->max_freq);
+ if (max_freq < 0)
+ return max_freq;
+
+@@ -995,7 +1043,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli
+ int freq;
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+- freq = amd_get_lowest_nonlinear_freq(cpudata);
++ freq = READ_ONCE(cpudata->lowest_nonlinear_freq);
+ if (freq < 0)
+ return freq;
+
+@@ -1306,7 +1354,7 @@ static bool amd_pstate_acpi_pm_profile_undefined(void)
+
+ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{
-@@ -1074,18 +1211,29 @@ static ssize_t status_store(struct device *a, struct device_attribute *b,
- return ret < 0 ? ret : count;
- }
-
-+static ssize_t prefcore_show(struct device *dev,
-+ struct device_attribute *attr, char *buf)
-+{
-+ return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
-+}
-+
- cpufreq_freq_attr_ro(amd_pstate_max_freq);
- cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
-
- cpufreq_freq_attr_ro(amd_pstate_highest_perf);
-+cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
-+cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
- cpufreq_freq_attr_rw(energy_performance_preference);
- cpufreq_freq_attr_ro(energy_performance_available_preferences);
- static DEVICE_ATTR_RW(status);
-+static DEVICE_ATTR_RO(prefcore);
-
- static struct freq_attr *amd_pstate_attr[] = {
- &amd_pstate_max_freq,
- &amd_pstate_lowest_nonlinear_freq,
- &amd_pstate_highest_perf,
-+ &amd_pstate_prefcore_ranking,
-+ &amd_pstate_hw_prefcore,
- NULL,
- };
-
-@@ -1093,6 +1241,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
- &amd_pstate_max_freq,
- &amd_pstate_lowest_nonlinear_freq,
- &amd_pstate_highest_perf,
-+ &amd_pstate_prefcore_ranking,
-+ &amd_pstate_hw_prefcore,
- &energy_performance_preference,
- &energy_performance_available_preferences,
- NULL,
-@@ -1100,6 +1250,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
+- int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
++ int min_freq, max_freq, nominal_freq, ret;
+ struct amd_cpudata *cpudata;
+ struct device *dev;
+ u64 value;
+@@ -1333,13 +1381,18 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+ if (ret)
+ goto free_cpudata1;
- static struct attribute *pstate_global_attributes[] = {
- &dev_attr_status.attr,
-+ &dev_attr_prefcore.attr,
- NULL
- };
+- min_freq = amd_get_min_freq(cpudata);
+- max_freq = amd_get_max_freq(cpudata);
+- nominal_freq = amd_get_nominal_freq(cpudata);
+- lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
+- if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
+- dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
+- min_freq, max_freq);
++ ret = amd_pstate_init_freq(cpudata);
++ if (ret)
++ goto free_cpudata1;
++
++ min_freq = READ_ONCE(cpudata->min_freq);
++ max_freq = READ_ONCE(cpudata->max_freq);
++ nominal_freq = READ_ONCE(cpudata->nominal_freq);
++ if (min_freq <= 0 || max_freq <= 0 ||
++ nominal_freq <= 0 || min_freq > max_freq) {
++ dev_err(dev,
++ "min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect, check _CPC in ACPI tables\n",
++ min_freq, max_freq, nominal_freq);
+ ret = -EINVAL;
+ goto free_cpudata1;
+ }
+@@ -1349,12 +1402,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+ /* It will be updated by governor */
+ policy->cur = policy->cpuinfo.min_freq;
+
+- /* Initial processor data capability frequencies */
+- cpudata->max_freq = max_freq;
+- cpudata->min_freq = min_freq;
+- cpudata->nominal_freq = nominal_freq;
+- cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
+-
+ policy->driver_data = cpudata;
-@@ -1151,6 +1302,8 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
- cpudata->cpu = policy->cpu;
- cpudata->epp_policy = 0;
+ cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0);
+@@ -1394,6 +1441,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
-+ amd_pstate_init_prefcore(cpudata);
+ static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
+ {
++ struct amd_cpudata *cpudata = policy->driver_data;
+
- ret = amd_pstate_init_perf(cpudata);
- if (ret)
- goto free_cpudata1;
-@@ -1432,6 +1585,7 @@ static struct cpufreq_driver amd_pstate_driver = {
- .suspend = amd_pstate_cpu_suspend,
- .resume = amd_pstate_cpu_resume,
- .set_boost = amd_pstate_set_boost,
-+ .update_limits = amd_pstate_update_limits,
- .name = "amd-pstate",
- .attr = amd_pstate_attr,
- };
-@@ -1446,6 +1600,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
- .online = amd_pstate_epp_cpu_online,
- .suspend = amd_pstate_epp_suspend,
- .resume = amd_pstate_epp_resume,
-+ .update_limits = amd_pstate_update_limits,
- .name = "amd-pstate-epp",
- .attr = amd_pstate_epp_attr,
- };
-@@ -1567,7 +1722,17 @@ static int __init amd_pstate_param(char *str)
-
- return amd_pstate_set_driver(mode_idx);
- }
++ if (cpudata) {
++ kfree(cpudata);
++ policy->driver_data = NULL;
++ }
+
-+static int __init amd_prefcore_param(char *str)
-+{
-+ if (!strcmp(str, "disable"))
-+ amd_pstate_prefcore = false;
+ pr_debug("CPU %d exiting\n", policy->cpu);
+ return 0;
+ }
+@@ -1672,6 +1726,11 @@ static int __init amd_pstate_init(void)
+ if (cpufreq_get_current_driver())
+ return -EEXIST;
+
++ quirks = NULL;
+
-+ return 0;
-+}
++ /* check if this machine need CPPC quirks */
++ dmi_check_system(amd_pstate_quirks_table);
+
- early_param("amd_pstate", amd_pstate_param);
-+early_param("amd_prefcore", amd_prefcore_param);
-
- MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
- MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
-diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
-index 6126c977ece0..c0b69ffe7bdb 100644
---- a/include/acpi/cppc_acpi.h
-+++ b/include/acpi/cppc_acpi.h
-@@ -139,6 +139,7 @@ struct cppc_cpudata {
- #ifdef CONFIG_ACPI_CPPC_LIB
- extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
- extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
-+extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf);
- extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
- extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
- extern int cppc_set_enable(int cpu, bool enable);
-@@ -165,6 +166,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
- {
- return -ENOTSUPP;
- }
-+static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
-+{
-+ return -ENOTSUPP;
-+}
- static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
- {
- return -ENOTSUPP;
+ switch (cppc_state) {
+ case AMD_PSTATE_UNDEFINED:
+ /* Disable on the following configs by default:
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
-index 6ad02ad9c7b4..d21838835abd 100644
+index d21838835abda..d58fc022ec466 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
-@@ -39,11 +39,16 @@ struct amd_aperf_mperf {
- * @cppc_req_cached: cached performance request hints
- * @highest_perf: the maximum performance an individual processor may reach,
- * assuming ideal conditions
-+ * For platforms that do not support the preferred core feature, the
-+ * highest_pef may be configured with 166 or 255, to avoid max frequency
-+ * calculated wrongly. we take the fixed value as the highest_perf.
- * @nominal_perf: the maximum sustained performance level of the processor,
- * assuming ideal operating conditions
- * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
- * savings are achieved
+@@ -49,13 +49,17 @@ struct amd_aperf_mperf {
* @lowest_perf: the absolute lowest performance level of the processor
-+ * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
-+ * priority.
- * @max_freq: the frequency that mapped to highest_perf
- * @min_freq: the frequency that mapped to lowest_perf
- * @nominal_freq: the frequency that mapped to nominal_perf
-@@ -52,6 +57,9 @@ struct amd_aperf_mperf {
+ * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
+ * priority.
+- * @max_freq: the frequency that mapped to highest_perf
+- * @min_freq: the frequency that mapped to lowest_perf
+- * @nominal_freq: the frequency that mapped to nominal_perf
+- * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
++ * @min_limit_perf: Cached value of the performance corresponding to policy->min
++ * @max_limit_perf: Cached value of the performance corresponding to policy->max
++ * @min_limit_freq: Cached value of policy->min (in khz)
++ * @max_limit_freq: Cached value of policy->max (in khz)
++ * @max_freq: the frequency (in khz) that mapped to highest_perf
++ * @min_freq: the frequency (in khz) that mapped to lowest_perf
++ * @nominal_freq: the frequency (in khz) that mapped to nominal_perf
++ * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf
+ * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
* @prev: Last Aperf/Mperf/tsc count value read from register
- * @freq: current cpu frequency value
+- * @freq: current cpu frequency value
++ * @freq: current cpu frequency value (in khz)
* @boost_supported: check whether the Processor or SBIOS supports boost mode
-+ * @hw_prefcore: check whether HW supports preferred core featue.
-+ * Only when hw_prefcore and early prefcore param are true,
-+ * AMD P-State driver supports preferred core featue.
- * @epp_policy: Last saved policy used to set energy-performance preference
- * @epp_cached: Cached CPPC energy-performance preference value
- * @policy: Cpufreq policy value
-@@ -70,6 +78,7 @@ struct amd_cpudata {
- u32 nominal_perf;
- u32 lowest_nonlinear_perf;
- u32 lowest_perf;
-+ u32 prefcore_ranking;
- u32 min_limit_perf;
- u32 max_limit_perf;
- u32 min_limit_freq;
-@@ -85,6 +94,7 @@ struct amd_cpudata {
-
- u64 freq;
- bool boost_supported;
-+ bool hw_prefcore;
-
- /* EPP feature related attributes*/
- s16 epp_policy;
-diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
-index 1c5ca92a0555..5d62beea2712 100644
---- a/include/linux/cpufreq.h
-+++ b/include/linux/cpufreq.h
-@@ -263,6 +263,7 @@ static inline bool cpufreq_supports_freq_invariance(void)
- return false;
- }
- static inline void disable_cpufreq(void) { }
-+static inline void cpufreq_update_limits(unsigned int cpu) { }
- #endif
-
- #ifdef CONFIG_CPU_FREQ_STAT
+ * @hw_prefcore: check whether HW supports preferred core featue.
+ * Only when hw_prefcore and early prefcore param are true,
+@@ -124,4 +128,10 @@ static const char * const amd_pstate_mode_string[] = {
+ [AMD_PSTATE_GUIDED] = "guided",
+ NULL,
+ };
++
++struct quirk_entry {
++ u32 nominal_freq;
++ u32 lowest_freq;
++};
++
+ #endif /* _LINUX_AMD_PSTATE_H */
--
-2.43.2
+2.45.2