Merge branch 'pm-cpufreq'
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>
Thu, 9 Mar 2017 14:12:27 +0000 (15:12 +0100)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Thu, 9 Mar 2017 14:12:27 +0000 (15:12 +0100)
* pm-cpufreq:
  cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy
  cpufreq: intel_pstate: Fix intel_pstate_verify_policy()
  cpufreq: intel_pstate: Fix global settings in active mode
  cpufreq: Add the "cpufreq.off=1" cmdline option
  cpufreq: intel_pstate: Avoid triggering cpu_frequency tracepoint unnecessarily
  cpufreq: intel_pstate: Fix intel_cpufreq_verify_policy()
  cpufreq: intel_pstate: Do not use performance_limits in passive mode

1  2 
Documentation/admin-guide/kernel-parameters.txt
drivers/cpufreq/cpufreq.c
drivers/cpufreq/intel_pstate.c

index 986e44387dad493e268ab93253120df73abc3045,3988d2311f9707c48b4f3dca5df1c333ef0972cf..41851d42b84d1ba89774a7c6d61c2ce9ce601f34
                        loops can be debugged more effectively on production
                        systems.
  
 -      clocksource.arm_arch_timer.fsl-a008585=
 -                      [ARM64]
 -                      Format: <bool>
 -                      Enable/disable the workaround of Freescale/NXP
 -                      erratum A-008585.  This can be useful for KVM
 -                      guests, if the guest device tree doesn't show the
 -                      erratum.  If unspecified, the workaround is
 -                      enabled based on the device tree.
 -
        clearcpuid=BITNUM [X86]
                        Disable CPUID feature X for the kernel. See
                        arch/x86/include/asm/cpufeatures.h for the valid bit
        cpuidle.off=1   [CPU_IDLE]
                        disable the cpuidle sub-system
  
+       cpufreq.off=1   [CPU_FREQ]
+                       disable the cpufreq sub-system
        cpu_init_udelay=N
                        [X86] Delay for N microsec between assert and de-assert
                        of APIC INIT to start processors.  This delay occurs
                        serial port must already be setup and configured.
                        Options are not yet supported.
  
 +              lantiq,<addr>
 +                      Start an early, polled-mode console on a lantiq serial
 +                      (lqasc) port at the specified address. The serial port
 +                      must already be setup and configured. Options are not
 +                      yet supported.
 +
                lpuart,<addr>
                lpuart32,<addr>
                        Use early console provided by Freescale LP UART driver
                        address. The serial port must already be setup
                        and configured. Options are not yet supported.
  
 -      earlyprintk=    [X86,SH,BLACKFIN,ARM,M68k]
 +      earlyprintk=    [X86,SH,BLACKFIN,ARM,M68k,S390]
                        earlyprintk=vga
                        earlyprintk=efi
 +                      earlyprintk=sclp
                        earlyprintk=xen
                        earlyprintk=serial[,ttySn[,baudrate]]
                        earlyprintk=serial[,0x...[,baudrate]]
  
                        The xen output can only be used by Xen PV guests.
  
 +                      The sclp output can only be used on s390.
 +
        edac_report=    [HW,EDAC] Control how to report EDAC event
                        Format: {"on" | "off" | "force"}
                        on: enable EDAC to report H/W event. May be overridden
                        When zero, profiling data is discarded and associated
                        debugfs files are removed at module unload time.
  
 +      goldfish        [X86] Enable the goldfish android emulator platform.
 +                      Don't use this when you are not running on the
 +                      android emulator
 +
        gpt             [EFI] Forces disk with valid GPT signature but
                        invalid Protective MBR to be treated as GPT. If the
                        primary GPT is corrupted, it enables the backup/alternate
                        Lazy RCU callbacks are those which RCU can
                        prove do nothing more than free memory.
  
 +      rcutree.rcu_kick_kthreads= [KNL]
 +                      Cause the grace-period kthread to get an extra
 +                      wake_up() if it sleeps three times longer than
 +                      it should at force-quiescent-state time.
 +                      This wake_up() will be accompanied by a
 +                      WARN_ONCE() splat and an ftrace_dump().
 +
        rcuperf.gp_exp= [KNL]
                        Measure performance of expedited synchronous
                        grace-period primitives.
        rhash_entries=  [KNL,NET]
                        Set number of hash buckets for route cache
  
 +      ring3mwait=disable
 +                      [KNL] Disable ring 3 MONITOR/MWAIT feature on supported
 +                      CPUs.
 +
        ro              [KNL] Mount root device read-only on boot
  
        rodata=         [KNL]
                        last alloc / free. For more information see
                        Documentation/vm/slub.txt.
  
 +      slub_memcg_sysfs=       [MM, SLUB]
 +                      Determines whether to enable sysfs directories for
 +                      memory cgroup sub-caches. 1 to enable, 0 to disable.
 +                      The default is determined by CONFIG_SLUB_MEMCG_SYSFS_ON.
 +                      Enabling this can lead to a very high number of debug
 +                      directories and files being created under
 +                      /sys/kernel/slub.
 +
        slub_max_order= [MM, SLUB]
                        Determines the maximum allowed order for slabs.
                        A high setting may cause OOMs due to memory
index a475432818642fee4547699011ba4cf5aa619f3a,7790db2645d7cae6d7b5f762efc9dc42d3ba45dd..38b9fdf854a49a7e4ba9950e365904d18b64caf5
@@@ -132,7 -132,7 +132,7 @@@ static inline u64 get_cpu_idle_time_jif
        u64 cur_wall_time;
        u64 busy_time;
  
 -      cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
 +      cur_wall_time = jiffies64_to_nsecs(get_jiffies_64());
  
        busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
  
        idle_time = cur_wall_time - busy_time;
        if (wall)
 -              *wall = cputime_to_usecs(cur_wall_time);
 +              *wall = div_u64(cur_wall_time, NSEC_PER_USEC);
  
 -      return cputime_to_usecs(idle_time);
 +      return div_u64(idle_time, NSEC_PER_USEC);
  }
  
  u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
@@@ -2532,4 -2532,5 +2532,5 @@@ static int __init cpufreq_core_init(voi
  
        return 0;
  }
+ module_param(off, int, 0444);
  core_initcall(cpufreq_core_init);
index b1fbaa30ae0415c330b9b1069e17900b99a48868,f1f8fbe0b4c46a2024fd552df8806f62d289a000..3d37219a0dd7afc3108b017f1d2960868efb7903
@@@ -19,7 -19,7 +19,7 @@@
  #include <linux/hrtimer.h>
  #include <linux/tick.h>
  #include <linux/slab.h>
 -#include <linux/sched.h>
 +#include <linux/sched/cpufreq.h>
  #include <linux/list.h>
  #include <linux/cpu.h>
  #include <linux/cpufreq.h>
  
  #define INTEL_CPUFREQ_TRANSITION_LATENCY      20000
  
 -#define ATOM_RATIOS           0x66a
 -#define ATOM_VIDS             0x66b
 -#define ATOM_TURBO_RATIOS     0x66c
 -#define ATOM_TURBO_VIDS               0x66d
 -
  #ifdef CONFIG_ACPI
  #include <acpi/processor.h>
  #include <acpi/cppc_acpi.h>
@@@ -377,6 -382,7 +377,7 @@@ static void intel_pstate_set_performanc
        intel_pstate_init_limits(limits);
        limits->min_perf_pct = 100;
        limits->min_perf = int_ext_tofp(1);
+       limits->min_sysfs_pct = 100;
  }
  
  static DEFINE_MUTEX(intel_pstate_driver_lock);
@@@ -968,11 -974,20 +969,20 @@@ static int intel_pstate_resume(struct c
  }
  
  static void intel_pstate_update_policies(void)
+       __releases(&intel_pstate_limits_lock)
+       __acquires(&intel_pstate_limits_lock)
  {
+       struct perf_limits *saved_limits = limits;
        int cpu;
  
+       mutex_unlock(&intel_pstate_limits_lock);
        for_each_possible_cpu(cpu)
                cpufreq_update_policy(cpu);
+       mutex_lock(&intel_pstate_limits_lock);
+       limits = saved_limits;
  }
  
  /************************** debugfs begin ************************/
@@@ -1180,10 -1195,10 +1190,10 @@@ static ssize_t store_no_turbo(struct ko
  
        limits->no_turbo = clamp_t(int, input, 0, 1);
  
-       mutex_unlock(&intel_pstate_limits_lock);
        intel_pstate_update_policies();
  
+       mutex_unlock(&intel_pstate_limits_lock);
        mutex_unlock(&intel_pstate_driver_lock);
  
        return count;
@@@ -1217,10 -1232,10 +1227,10 @@@ static ssize_t store_max_perf_pct(struc
                                   limits->max_perf_pct);
        limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
  
-       mutex_unlock(&intel_pstate_limits_lock);
        intel_pstate_update_policies();
  
+       mutex_unlock(&intel_pstate_limits_lock);
        mutex_unlock(&intel_pstate_driver_lock);
  
        return count;
@@@ -1254,10 -1269,10 +1264,10 @@@ static ssize_t store_min_perf_pct(struc
                                   limits->min_perf_pct);
        limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
  
-       mutex_unlock(&intel_pstate_limits_lock);
        intel_pstate_update_policies();
  
+       mutex_unlock(&intel_pstate_limits_lock);
        mutex_unlock(&intel_pstate_driver_lock);
  
        return count;
@@@ -1350,7 -1365,7 +1360,7 @@@ static int atom_get_min_pstate(void
  {
        u64 value;
  
 -      rdmsrl(ATOM_RATIOS, value);
 +      rdmsrl(MSR_ATOM_CORE_RATIOS, value);
        return (value >> 8) & 0x7F;
  }
  
@@@ -1358,7 -1373,7 +1368,7 @@@ static int atom_get_max_pstate(void
  {
        u64 value;
  
 -      rdmsrl(ATOM_RATIOS, value);
 +      rdmsrl(MSR_ATOM_CORE_RATIOS, value);
        return (value >> 16) & 0x7F;
  }
  
@@@ -1366,7 -1381,7 +1376,7 @@@ static int atom_get_turbo_pstate(void
  {
        u64 value;
  
 -      rdmsrl(ATOM_TURBO_RATIOS, value);
 +      rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
        return value & 0x7F;
  }
  
@@@ -1428,7 -1443,7 +1438,7 @@@ static void atom_get_vid(struct cpudat
  {
        u64 value;
  
 -      rdmsrl(ATOM_VIDS, value);
 +      rdmsrl(MSR_ATOM_CORE_VIDS, value);
        cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
        cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
        cpudata->vid.ratio = div_fp(
                int_tofp(cpudata->pstate.max_pstate -
                        cpudata->pstate.min_pstate));
  
 -      rdmsrl(ATOM_TURBO_VIDS, value);
 +      rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
        cpudata->vid.turbo = value & 0x7f;
  }
  
@@@ -1874,13 -1889,11 +1884,11 @@@ static int intel_pstate_prepare_request
  
        intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
        pstate = clamp_t(int, pstate, min_perf, max_perf);
-       trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
        return pstate;
  }
  
  static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
  {
-       pstate = intel_pstate_prepare_request(cpu, pstate);
        if (pstate == cpu->pstate.current_pstate)
                return;
  
@@@ -1900,6 -1913,8 +1908,8 @@@ static inline void intel_pstate_adjust_
  
        update_turbo_state();
  
+       target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
+       trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
        intel_pstate_update_pstate(cpu, target_pstate);
  
        sample = &cpu->sample;
@@@ -2132,16 -2147,11 +2142,11 @@@ static int intel_pstate_set_policy(stru
        mutex_lock(&intel_pstate_limits_lock);
  
        if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
+               pr_debug("set performance\n");
                if (!perf_limits) {
                        limits = &performance_limits;
                        perf_limits = limits;
                }
-               if (policy->max >= policy->cpuinfo.max_freq &&
-                   !limits->no_turbo) {
-                       pr_debug("set performance\n");
-                       intel_pstate_set_performance_limits(perf_limits);
-                       goto out;
-               }
        } else {
                pr_debug("set powersave\n");
                if (!perf_limits) {
        }
  
        intel_pstate_update_perf_limits(policy, perf_limits);
-  out:
        if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
                /*
                 * NOHZ_FULL CPUs need this as the governor callback may not
@@@ -2198,9 -2208,9 +2203,9 @@@ static int intel_pstate_verify_policy(s
                unsigned int max_freq, min_freq;
  
                max_freq = policy->cpuinfo.max_freq *
-                                               limits->max_sysfs_pct / 100;
+                                       perf_limits->max_sysfs_pct / 100;
                min_freq = policy->cpuinfo.max_freq *
-                                               limits->min_sysfs_pct / 100;
+                                       perf_limits->min_sysfs_pct / 100;
                cpufreq_verify_within_limits(policy, min_freq, max_freq);
        }
  
@@@ -2243,13 -2253,8 +2248,8 @@@ static int __intel_pstate_cpu_init(stru
  
        cpu = all_cpu_data[policy->cpu];
  
-       /*
-        * We need sane value in the cpu->perf_limits, so inherit from global
-        * perf_limits limits, which are seeded with values based on the
-        * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up.
-        */
        if (per_cpu_limits)
-               memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits));
+               intel_pstate_init_limits(cpu->perf_limits);
  
        policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
        policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
@@@ -2301,7 -2306,6 +2301,6 @@@ static struct cpufreq_driver intel_psta
  static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
  {
        struct cpudata *cpu = all_cpu_data[policy->cpu];
-       struct perf_limits *perf_limits = limits;
  
        update_turbo_state();
        policy->cpuinfo.max_freq = limits->turbo_disabled ?
  
        cpufreq_verify_within_cpu_limits(policy);
  
-       if (per_cpu_limits)
-               perf_limits = cpu->perf_limits;
-       mutex_lock(&intel_pstate_limits_lock);
-       intel_pstate_update_perf_limits(policy, perf_limits);
-       mutex_unlock(&intel_pstate_limits_lock);
        return 0;
  }
  
@@@ -2370,6 -2365,7 +2360,7 @@@ static int intel_cpufreq_target(struct 
                wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
                              pstate_funcs.get_val(cpu, target_pstate));
        }
+       freqs.new = target_pstate * cpu->pstate.scaling;
        cpufreq_freq_transition_end(policy, &freqs, false);
  
        return 0;
@@@ -2383,8 -2379,9 +2374,9 @@@ static unsigned int intel_cpufreq_fast_
  
        target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
        target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
+       target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
        intel_pstate_update_pstate(cpu, target_pstate);
-       return target_freq;
+       return target_pstate * cpu->pstate.scaling;
  }
  
  static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@@ -2437,8 -2434,11 +2429,11 @@@ static int intel_pstate_register_driver
  
        intel_pstate_init_limits(&powersave_limits);
        intel_pstate_set_performance_limits(&performance_limits);
-       limits = IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) ?
-                       &performance_limits : &powersave_limits;
+       if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) &&
+           intel_pstate_driver == &intel_pstate)
+               limits = &performance_limits;
+       else
+               limits = &powersave_limits;
  
        ret = cpufreq_register_driver(intel_pstate_driver);
        if (ret) {