From b8cbe7e82ec8b55d7bbdde66fc69e788fde00dc6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:57:56 +1030 Subject: [PATCH 1/8] [CPUFREQ] cpumask: don't put a cpumask on the stack in x86...cpufreq/powernow-k8.c It's still mugging the current process's cpumask, but as comment in 1ff6e97f1d says, it's not a trivial fix. So, at least we can use a cpumask_var_t to do the Wrong Thing the Right Way :) Signed-off-by: Rusty Russell To: cpufreq@vger.kernel.org Cc: Mark Langsdorf Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 3f12dabeab5..f30d2538394 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1118,7 +1118,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { - cpumask_t oldmask; + cpumask_var_t oldmask; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1131,9 +1131,13 @@ static int powernowk8_target(struct cpufreq_policy *pol, checkfid = data->currfid; checkvid = data->currvid; - /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); + /* only run on specific CPU from here on. */ + /* This is poor form: use a workqueue or smp_call_function_single */ + if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_copy(oldmask, tsk_cpumask(current)); + set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1193,7 +1197,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, ret = 0; err_out: - set_cpus_allowed_ptr(current, &oldmask); + set_cpus_allowed_ptr(current, oldmask); + free_cpumask_var(oldmask); return ret; } From db2820dd5445a44b4726f15a2bc89b9ded2503eb Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sun, 25 Oct 2009 19:45:57 +0100 Subject: [PATCH 2/8] [CPUFREQ] powernow-k6: set transition latency value so ondemand governor can be used Set the transition latency to value smaller than CPUFREQ_ETERNAL so governors other than "performance" work (like the "ondemand" one). The value is found in "AMD PowerNow! Technology Platform Design Guide for Embedded Processors" dated December 2000 (AMD doc #24267A). There is the answer to one of FAQs on page 40 which states that suggested complete transition period is 200 us. Tested on K6-2+ CPU with K6-3 core (model 13, stepping 4). Signed-off-by: Krzysztof Helt Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index f10dea409f4..cb01dac267d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -164,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) } /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cpuinfo.transition_latency = 200000; policy->cur = busfreq * max_multiplier; result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); From 0cda8b91f2e096bbef1cb05f23c42e423eae7728 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 21 Oct 2009 21:45:46 -0600 Subject: [PATCH 3/8] [CPUFREQ] Documentation: ABI: /sys/devices/system/cpu/cpu#/cpufreq/ This is a complex interface and is already described in Documentation/cpu-freq/, especially in the user-guide.txt file. No need to copy/paste all that information. Let's just alert the reader to the presence of the user-guide. Signed-off-by: Alex Chiang Signed-off-by: Dave Jones --- .../ABI/testing/sysfs-devices-system-cpu | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index a703b9e9aeb..974e29f5da8 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -136,6 +136,24 @@ Description: Discover cpuidle policy and mechanism See files in Documentation/cpuidle/ for more information. +What: /sys/devices/system/cpu/cpu#/cpufreq/* +Date: pre-git history +Contact: cpufreq@vger.kernel.org +Description: Discover and change clock speed of CPUs + + Clock scaling allows you to change the clock speed of the + CPUs on the fly. This is a nice method to save battery + power, because the lower the clock speed, the less power + the CPU consumes. + + There are many knobs to tweak in this directory. + + See files in Documentation/cpu-freq/ for more information. + + In particular, read Documentation/cpu-freq/user-guide.txt + to learn how to control the knobs. + + What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X Date: August 2008 KernelVersion: 2.6.27 From 49b015ce38edeb484fb2efa09048c23e903f49d6 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 1 Oct 2009 19:49:28 +0200 Subject: [PATCH 4/8] [CPUFREQ] Use global sysfs cpufreq structure for conservative governor tunings Same adustments that have been added to the ondemand recently. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_conservative.c | 129 +++++++++++++++++++++---- 1 file changed, 110 insertions(+), 19 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index c7b081b839f..599a40b25cb 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -164,20 +164,22 @@ static struct notifier_block dbs_cpufreq_notifier_block = { }; /************************** sysfs interface ************************/ -static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) +static ssize_t show_sampling_rate_max(struct kobject *kobj, + struct attribute *attr, char *buf) { printk_once(KERN_INFO "CPUFREQ: conservative sampling_rate_max " "sysfs file is deprecated - used by: %s\n", current->comm); return sprintf(buf, "%u\n", -1U); } -static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) { return sprintf(buf, "%u\n", min_sampling_rate); } #define define_one_ro(_name) \ -static struct freq_attr _name = \ +static struct global_attr _name = \ __ATTR(_name, 0444, show_##_name, NULL) define_one_ro(sampling_rate_max); @@ -186,7 +188,7 @@ define_one_ro(sampling_rate_min); /* cpufreq_conservative Governor Tunables */ #define show_one(file_name, object) \ static ssize_t show_##file_name \ -(struct cpufreq_policy *unused, char *buf) \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ { \ return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ } @@ -197,8 +199,40 @@ show_one(down_threshold, down_threshold); show_one(ignore_nice_load, ignore_nice); show_one(freq_step, freq_step); -static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, - const char *buf, size_t count) +/*** delete after deprecation time ***/ +#define DEPRECATION_MSG(file_name) \ + printk_once(KERN_INFO "CPUFREQ: Per core conservative sysfs " \ + "interface is deprecated - " #file_name "\n"); + +#define show_one_old(file_name) \ +static ssize_t show_##file_name##_old \ +(struct cpufreq_policy *unused, char *buf) \ +{ \ + printk_once(KERN_INFO "CPUFREQ: Per core conservative sysfs " \ + "interface is deprecated - " #file_name "\n"); \ + return show_##file_name(NULL, NULL, buf); \ +} +show_one_old(sampling_rate); +show_one_old(sampling_down_factor); +show_one_old(up_threshold); +show_one_old(down_threshold); +show_one_old(ignore_nice_load); +show_one_old(freq_step); +show_one_old(sampling_rate_min); +show_one_old(sampling_rate_max); + +#define define_one_ro_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0444, show_##_name##_old, NULL) + +define_one_ro_old(sampling_rate_min_old, sampling_rate_min); +define_one_ro_old(sampling_rate_max_old, sampling_rate_max); + +/*** delete after deprecation time ***/ + +static ssize_t store_sampling_down_factor(struct kobject *a, + struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -214,8 +248,8 @@ static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, return count; } -static ssize_t store_sampling_rate(struct cpufreq_policy *unused, - const char *buf, size_t count) +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -231,8 +265,8 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, return count; } -static ssize_t store_up_threshold(struct cpufreq_policy *unused, - const char *buf, size_t count) +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -251,8 +285,8 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, return count; } -static ssize_t store_down_threshold(struct cpufreq_policy *unused, - const char *buf, size_t count) +static ssize_t store_down_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -272,8 +306,8 @@ static ssize_t store_down_threshold(struct cpufreq_policy *unused, return count; } -static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, - const char *buf, size_t count) +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -308,8 +342,8 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, return count; } -static ssize_t store_freq_step(struct cpufreq_policy *policy, - const char *buf, size_t count) +static ssize_t store_freq_step(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -331,7 +365,7 @@ static ssize_t store_freq_step(struct cpufreq_policy *policy, } #define define_one_rw(_name) \ -static struct freq_attr _name = \ +static struct global_attr _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) define_one_rw(sampling_rate); @@ -358,6 +392,53 @@ static struct attribute_group dbs_attr_group = { .name = "conservative", }; +/*** delete after deprecation time ***/ + +#define write_one_old(file_name) \ +static ssize_t store_##file_name##_old \ +(struct cpufreq_policy *unused, const char *buf, size_t count) \ +{ \ + printk_once(KERN_INFO "CPUFREQ: Per core conservative sysfs " \ + "interface is deprecated - " #file_name "\n"); \ + return store_##file_name(NULL, NULL, buf, count); \ +} +write_one_old(sampling_rate); +write_one_old(sampling_down_factor); +write_one_old(up_threshold); +write_one_old(down_threshold); +write_one_old(ignore_nice_load); +write_one_old(freq_step); + +#define define_one_rw_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old) + +define_one_rw_old(sampling_rate_old, sampling_rate); +define_one_rw_old(sampling_down_factor_old, sampling_down_factor); +define_one_rw_old(up_threshold_old, up_threshold); +define_one_rw_old(down_threshold_old, down_threshold); +define_one_rw_old(ignore_nice_load_old, ignore_nice_load); +define_one_rw_old(freq_step_old, freq_step); + +static struct attribute *dbs_attributes_old[] = { + &sampling_rate_max_old.attr, + &sampling_rate_min_old.attr, + &sampling_rate_old.attr, + &sampling_down_factor_old.attr, + &up_threshold_old.attr, + &down_threshold_old.attr, + &ignore_nice_load_old.attr, + &freq_step_old.attr, + NULL +}; + +static struct attribute_group dbs_attr_group_old = { + .attrs = dbs_attributes_old, + .name = "conservative", +}; + +/*** delete after deprecation time ***/ + /************************** sysfs end ************************/ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) @@ -530,7 +611,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, mutex_lock(&dbs_mutex); - rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); + rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old); if (rc) { mutex_unlock(&dbs_mutex); return rc; @@ -564,6 +645,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (latency == 0) latency = 1; + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + /* * conservative does not implement micro like ondemand * governor, thus we are bound to jiffes/HZ @@ -591,7 +679,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, dbs_timer_exit(this_dbs_info); mutex_lock(&dbs_mutex); - sysfs_remove_group(&policy->kobj, &dbs_attr_group); + sysfs_remove_group(&policy->kobj, &dbs_attr_group_old); dbs_enable--; mutex_destroy(&this_dbs_info->timer_mutex); @@ -605,6 +693,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, CPUFREQ_TRANSITION_NOTIFIER); mutex_unlock(&dbs_mutex); + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); break; From bbe237aafeaae37a1088f2a95ebe81ff81d9e646 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 12 Nov 2009 16:06:45 +0000 Subject: [PATCH 5/8] [CPUFREQ] Document units for transition latency They're documented in the header but not in Documentation. Signed-off-by: Mark Brown Signed-off-by: Dave Jones --- Documentation/cpu-freq/cpu-drivers.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index 75a58d14d3c..6c30e930c12 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -92,9 +92,9 @@ policy->cpuinfo.max_freq - the minimum and maximum frequency (in kHz) which is supported by this CPU policy->cpuinfo.transition_latency the time it takes on this CPU to - switch between two frequencies (if - appropriate, else specify - CPUFREQ_ETERNAL) + switch between two frequencies in + nanoseconds (if appropriate, else + specify CPUFREQ_ETERNAL) policy->cur The current operating frequency of this CPU (if appropriate) From 1cce76c2ac60df40b02bf747982fb3f00e68f50a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Nov 2009 14:39:53 -0800 Subject: [PATCH 6/8] [CPUFREQ] use an enum for speedstep processor identification The "unsigned int processor" everywhere confused Rusty, leading to breakage when he passed in smp_processor_id(). Signed-off-by: Rusty Russell Acked-by: Dominik Brodowski Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 2 +- arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 6 +++--- arch/x86/kernel/cpu/cpufreq/speedstep-lib.h | 24 ++++++++++----------- arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 3ae5a7a3a50..2ce8e0b5cc5 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev; /* speedstep_processor */ -static unsigned int speedstep_processor; +static enum speedstep_processor speedstep_processor; static u32 pmbase; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index f4c290b8482..ad0083abfa2 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -34,7 +34,7 @@ static int relaxed_check; * GET PROCESSOR CORE SPEED IN KHZ * *********************************************************************/ -static unsigned int pentium3_get_frequency(unsigned int processor) +static unsigned int pentium3_get_frequency(enum speedstep_processor processor) { /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ struct { @@ -227,7 +227,7 @@ static unsigned int pentium4_get_frequency(void) /* Warning: may get called from smp_call_function_single. */ -unsigned int speedstep_get_frequency(unsigned int processor) +unsigned int speedstep_get_frequency(enum speedstep_processor processor) { switch (processor) { case SPEEDSTEP_CPU_PCORE: @@ -380,7 +380,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); * DETECT SPEEDSTEP SPEEDS * *********************************************************************/ -unsigned int speedstep_get_freqs(unsigned int processor, +unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int *low_speed, unsigned int *high_speed, unsigned int *transition_latency, diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h index 2b6c04e5a30..70d9cea1219 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h @@ -11,18 +11,18 @@ /* processors */ - -#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ -#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ -#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ - +enum speedstep_processor { + SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */ + SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */ + SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */ + SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */ /* the following processors are not speedstep-capable and are not auto-detected * in speedstep_detect_processor(). However, their speed can be detected using * the speedstep_get_frequency() call. */ -#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ -#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ -#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ + SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */ + SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */ + SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */ +}; /* speedstep states -- only two of them */ @@ -31,10 +31,10 @@ /* detect a speedstep-capable processor */ -extern unsigned int speedstep_detect_processor (void); +extern enum speedstep_processor speedstep_detect_processor(void); /* detect the current speed (in khz) of the processor */ -extern unsigned int speedstep_get_frequency(unsigned int processor); +extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); /* detect the low and high speeds of the processor. The callback @@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor); * SPEEDSTEP_LOW; the second argument is zero so that no * cpufreq_notify_transition calls are initiated. */ -extern unsigned int speedstep_get_freqs(unsigned int processor, +extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int *low_speed, unsigned int *high_speed, unsigned int *transition_latency, diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index befea088e4f..04d73c114e4 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -35,7 +35,7 @@ static int smi_cmd; static unsigned int smi_sig; /* info about the processor */ -static unsigned int speedstep_processor; +static enum speedstep_processor speedstep_processor; /* * There are only two frequency states for each processor. Values From cf3289d0e701b2f59123bf653c12722a7e32aedb Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Tue, 17 Nov 2009 20:27:08 -0700 Subject: [PATCH 7/8] [CPUFREQ] make internal cpufreq_add_dev_* static No need to export these symbols; make them static. cpufreq_add_dev_policy cpufreq_add_dev_symlink cpufreq_add_dev_interface Signed-off-by: Alex Chiang Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ff57c40e9b8..5b9b1c8c495 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -767,8 +767,9 @@ static struct kobj_type ktype_cpufreq = { * 0: Success * Positive: When we have a managed CPU and the sysfs got symlinked */ -int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, - struct sys_device *sys_dev) +static int cpufreq_add_dev_policy(unsigned int cpu, + struct cpufreq_policy *policy, + struct sys_device *sys_dev) { int ret = 0; #ifdef CONFIG_SMP @@ -842,7 +843,8 @@ int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, /* symlink affected CPUs */ -int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) +static int cpufreq_add_dev_symlink(unsigned int cpu, + struct cpufreq_policy *policy) { unsigned int j; int ret = 0; @@ -869,8 +871,9 @@ int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) return ret; } -int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy, - struct sys_device *sys_dev) +static int cpufreq_add_dev_interface(unsigned int cpu, + struct cpufreq_policy *policy, + struct sys_device *sys_dev) { struct cpufreq_policy new_policy; struct freq_attr **drv_attr; From e2f74f355e9e2914483db10c05d70e69e0b7ae04 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 19 Nov 2009 12:31:01 +0100 Subject: [PATCH 8/8] [ACPI/CPUFREQ] Introduce bios_limit per cpu cpufreq sysfs interface This interface is mainly intended (and implemented) for ACPI _PPC BIOS frequency limitations, but other cpufreq drivers can also use it for similar use-cases. Why is this needed: Currently it's not obvious why cpufreq got limited. People see cpufreq/scaling_max_freq reduced, but this could have happened by: - any userspace prog writing to scaling_max_freq - thermal limitations - hardware (_PPC in ACPI case) limitiations Therefore export bios_limit (in kHz) to: - Point the user that it's the BIOS (broken or intended) which limits frequency - Export it as a sysfs interface for userspace progs. While this was a rarely used feature on laptops, there will appear more and more server implemenations providing "Green IT" features like allowing the service processor to limit the frequency. People want to know about HW/BIOS frequency limitations. All ACPI P-state driven cpufreq drivers are covered with this patch: - powernow-k8 - powernow-k7 - acpi-cpufreq Tested with a patched DSDT which limits the first two cores (_PPC returns 1) via _PPC, exposed by bios_limit: # echo 2200000 >cpu2/cpufreq/scaling_max_freq # cat cpu*/cpufreq/scaling_max_freq 2600000 2600000 2200000 2200000 # #scaling_max_freq shows general user/thermal/BIOS limitations # cat cpu*/cpufreq/bios_limit 2600000 2600000 2800000 2800000 # #bios_limit only shows the HW/BIOS limitation CC: Pallipadi Venkatesh CC: Len Brown CC: davej@codemonkey.org.uk CC: linux@dominikbrodowski.net Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- Documentation/cpu-freq/user-guide.txt | 11 +++++++++++ arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 17 +++++++++-------- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 19 +++++++++++-------- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 17 +++++++++-------- drivers/acpi/processor_perflib.c | 13 +++++++++++++ drivers/cpufreq/cpufreq.c | 21 +++++++++++++++++++++ include/acpi/processor.h | 6 ++++++ include/linux/cpufreq.h | 1 + 8 files changed, 81 insertions(+), 24 deletions(-) diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 2a5b850847c..04f6b32993e 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -203,6 +203,17 @@ scaling_cur_freq : Current frequency of the CPU as determined by the frequency the kernel thinks the CPU runs at. +bios_limit : If the BIOS tells the OS to limit a CPU to + lower frequencies, the user can read out the + maximum available frequency from this file. + This typically can happen through (often not + intended) BIOS settings, restrictions + triggered through a service processor or other + BIOS/HW based implementations. + This does not cover thermal ACPI limitations + which can be detected through the generic + thermal driver. + If you have selected the "userspace" governor which allows you to set the CPU operating frequency to a specific value, you can read out the current frequency in diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8b581d3905c..d2e7c77c1ea 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -764,14 +764,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = { }; static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .bios_limit = acpi_processor_get_bios_limit, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, }; static int __init acpi_cpufreq_init(void) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index d47c775eb0a..9a97116f89e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = { }; static struct cpufreq_driver powernow_driver = { - .verify = powernow_verify, - .target = powernow_target, - .get = powernow_get, - .init = powernow_cpu_init, - .exit = powernow_cpu_exit, - .name = "powernow-k7", - .owner = THIS_MODULE, - .attr = powernow_table_attr, + .verify = powernow_verify, + .target = powernow_target, + .get = powernow_get, +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + .bios_limit = acpi_processor_get_bios_limit, +#endif + .init = powernow_cpu_init, + .exit = powernow_cpu_exit, + .name = "powernow-k7", + .owner = THIS_MODULE, + .attr = powernow_table_attr, }; static int __init powernow_init(void) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index f30d2538394..a9df9441a9a 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1398,14 +1398,15 @@ static struct freq_attr *powernow_k8_attr[] = { }; static struct cpufreq_driver cpufreq_amd64_driver = { - .verify = powernowk8_verify, - .target = powernowk8_target, - .init = powernowk8_cpu_init, - .exit = __devexit_p(powernowk8_cpu_exit), - .get = powernowk8_get, - .name = "powernow-k8", - .owner = THIS_MODULE, - .attr = powernow_k8_attr, + .verify = powernowk8_verify, + .target = powernowk8_target, + .bios_limit = acpi_processor_get_bios_limit, + .init = powernowk8_cpu_init, + .exit = __devexit_p(powernowk8_cpu_exit), + .get = powernowk8_get, + .name = "powernow-k8", + .owner = THIS_MODULE, + .attr = powernow_k8_attr, }; /* driver entry point for init */ diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 8ba0ed0b9dd..01e366d2b6f 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -167,6 +167,19 @@ int acpi_processor_ppc_has_changed(struct acpi_processor *pr) return cpufreq_update_policy(pr->id); } +int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) +{ + struct acpi_processor *pr; + + pr = per_cpu(processors, cpu); + if (!pr || !pr->performance || !pr->performance->state_count) + return -ENODEV; + *limit = pr->performance->states[pr->performance_platform_limit]. + core_frequency * 1000; + return 0; +} +EXPORT_SYMBOL(acpi_processor_get_bios_limit); + void acpi_processor_ppc_init(void) { if (!cpufreq_register_notifier diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5b9b1c8c495..f20668c09ce 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -647,6 +647,21 @@ static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) return policy->governor->show_setspeed(policy, buf); } +/** + * show_scaling_driver - show the current cpufreq HW/BIOS limitation + */ +static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) +{ + unsigned int limit; + int ret; + if (cpufreq_driver->bios_limit) { + ret = cpufreq_driver->bios_limit(policy->cpu, &limit); + if (!ret) + return sprintf(buf, "%u\n", limit); + } + return sprintf(buf, "%u\n", policy->cpuinfo.max_freq); +} + #define define_one_ro(_name) \ static struct freq_attr _name = \ __ATTR(_name, 0444, show_##_name, NULL) @@ -666,6 +681,7 @@ define_one_ro(cpuinfo_transition_latency); define_one_ro(scaling_available_governors); define_one_ro(scaling_driver); define_one_ro(scaling_cur_freq); +define_one_ro(bios_limit); define_one_ro(related_cpus); define_one_ro(affected_cpus); define_one_rw(scaling_min_freq); @@ -905,6 +921,11 @@ static int cpufreq_add_dev_interface(unsigned int cpu, if (ret) goto err_out_kobj_put; } + if (cpufreq_driver->bios_limit) { + ret = sysfs_create_file(&policy->kobj, &bios_limit.attr); + if (ret) + goto err_out_kobj_put; + } spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) { diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 740ac3ad8fd..8b668ead6d6 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -295,6 +295,7 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx void acpi_processor_ppc_init(void); void acpi_processor_ppc_exit(void); int acpi_processor_ppc_has_changed(struct acpi_processor *pr); +extern int acpi_processor_get_bios_limit(int cpu, unsigned int *limit); #else static inline void acpi_processor_ppc_init(void) { @@ -316,6 +317,11 @@ static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr) } return 0; } +static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) +{ + return -ENODEV; +} + #endif /* CONFIG_CPU_FREQ */ /* in processor_throttling.c */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 79a2340d83c..4de02b10007 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -232,6 +232,7 @@ struct cpufreq_driver { /* optional */ unsigned int (*getavg) (struct cpufreq_policy *policy, unsigned int cpu); + int (*bios_limit) (int cpu, unsigned int *limit); int (*exit) (struct cpufreq_policy *policy); int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg);