73747d20df85d5cc8daf781c3d3b01ff3ccd6af3
[linux-2.6-block.git] / drivers / idle / intel_idle.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013 - 2020, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8  */
9
10 /*
11  * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
13  * make Linux more efficient on these processors, as intel_idle knows
14  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15  */
16
17 /*
18  * Design Assumptions
19  *
20  * All CPUs have same idle states as boot CPU
21  *
22  * Chipset BM_STS (bus master status) bit is a NOP
23  *      for preventing entry into deep C-states
24  *
25  * CPU will flush caches as needed when entering a C-state via MWAIT
26  *      (in contrast to entering ACPI C3, in which case the WBINVD
27  *      instruction needs to be executed to flush the caches)
28  */
29
30 /*
31  * Known limitations
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 /* #define DEBUG */
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/sched/smt.h>
51 #include <linux/mutex.h>
52 #include <linux/notifier.h>
53 #include <linux/cpu.h>
54 #include <linux/moduleparam.h>
55 #include <linux/sysfs.h>
56 #include <asm/cpuid/api.h>
57 #include <asm/cpu_device_id.h>
58 #include <asm/intel-family.h>
59 #include <asm/mwait.h>
60 #include <asm/spec-ctrl.h>
61 #include <asm/msr.h>
62 #include <asm/tsc.h>
63 #include <asm/fpu/api.h>
64 #include <asm/smp.h>
65
66 #define INTEL_IDLE_VERSION "0.5.1"
67
68 static struct cpuidle_driver intel_idle_driver = {
69         .name = "intel_idle",
70         .owner = THIS_MODULE,
71 };
72 /* intel_idle.max_cstate=0 disables driver */
73 static int max_cstate = CPUIDLE_STATE_MAX - 1;
74 static unsigned int disabled_states_mask __read_mostly;
75 static unsigned int preferred_states_mask __read_mostly;
76 static bool force_irq_on __read_mostly;
77 static bool ibrs_off __read_mostly;
78
79 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
80
81 static unsigned long auto_demotion_disable_flags;
82
83 static enum {
84         C1E_PROMOTION_PRESERVE,
85         C1E_PROMOTION_ENABLE,
86         C1E_PROMOTION_DISABLE
87 } c1e_promotion = C1E_PROMOTION_PRESERVE;
88
89 struct idle_cpu {
90         struct cpuidle_state *state_table;
91
92         /*
93          * Hardware C-state auto-demotion may not always be optimal.
94          * Indicate which enable bits to clear here.
95          */
96         unsigned long auto_demotion_disable_flags;
97         bool disable_promotion_to_c1e;
98         bool c1_demotion_supported;
99         bool use_acpi;
100 };
101
102 static bool c1_demotion_supported;
103 static DEFINE_MUTEX(c1_demotion_mutex);
104
105 static struct device *sysfs_root __initdata;
106
107 static const struct idle_cpu *icpu __initdata;
108 static struct cpuidle_state *cpuidle_state_table __initdata;
109
110 static unsigned int mwait_substates __initdata;
111
112 /*
113  * Enable interrupts before entering the C-state. On some platforms and for
114  * some C-states, this may measurably decrease interrupt latency.
115  */
116 #define CPUIDLE_FLAG_IRQ_ENABLE         BIT(14)
117
118 /*
119  * Enable this state by default even if the ACPI _CST does not list it.
120  */
121 #define CPUIDLE_FLAG_ALWAYS_ENABLE      BIT(15)
122
123 /*
124  * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
125  * above.
126  */
127 #define CPUIDLE_FLAG_IBRS               BIT(16)
128
129 /*
130  * Initialize large xstate for the C6-state entrance.
131  */
132 #define CPUIDLE_FLAG_INIT_XSTATE        BIT(17)
133
134 /*
135  * Ignore the sub-state when matching mwait hints between the ACPI _CST and
136  * custom tables.
137  */
138 #define CPUIDLE_FLAG_PARTIAL_HINT_MATCH BIT(18)
139
140 /*
141  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
142  * the C-state (top nibble) and sub-state (bottom nibble)
143  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
144  *
145  * We store the hint at the top of our "flags" for each state.
146  */
147 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
148 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
149
150 static __always_inline int __intel_idle(struct cpuidle_device *dev,
151                                         struct cpuidle_driver *drv,
152                                         int index, bool irqoff)
153 {
154         struct cpuidle_state *state = &drv->states[index];
155         unsigned int eax = flg2MWAIT(state->flags);
156         unsigned int ecx = 1*irqoff; /* break on interrupt flag */
157
158         mwait_idle_with_hints(eax, ecx);
159
160         return index;
161 }
162
163 /**
164  * intel_idle - Ask the processor to enter the given idle state.
165  * @dev: cpuidle device of the target CPU.
166  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
167  * @index: Target idle state index.
168  *
169  * Use the MWAIT instruction to notify the processor that the CPU represented by
170  * @dev is idle and it can try to enter the idle state corresponding to @index.
171  *
172  * If the local APIC timer is not known to be reliable in the target idle state,
173  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
174  *
175  * Must be called under local_irq_disable().
176  */
177 static __cpuidle int intel_idle(struct cpuidle_device *dev,
178                                 struct cpuidle_driver *drv, int index)
179 {
180         return __intel_idle(dev, drv, index, true);
181 }
182
183 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
184                                     struct cpuidle_driver *drv, int index)
185 {
186         return __intel_idle(dev, drv, index, false);
187 }
188
189 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
190                                      struct cpuidle_driver *drv, int index)
191 {
192         bool smt_active = sched_smt_active();
193         u64 spec_ctrl = spec_ctrl_current();
194         int ret;
195
196         if (smt_active)
197                 __update_spec_ctrl(0);
198
199         ret = __intel_idle(dev, drv, index, true);
200
201         if (smt_active)
202                 __update_spec_ctrl(spec_ctrl);
203
204         return ret;
205 }
206
207 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
208                                        struct cpuidle_driver *drv, int index)
209 {
210         fpu_idle_fpregs();
211         return __intel_idle(dev, drv, index, true);
212 }
213
214 /**
215  * intel_idle_s2idle - Ask the processor to enter the given idle state.
216  * @dev: cpuidle device of the target CPU.
217  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
218  * @index: Target idle state index.
219  *
220  * Use the MWAIT instruction to notify the processor that the CPU represented by
221  * @dev is idle and it can try to enter the idle state corresponding to @index.
222  *
223  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
224  * scheduler tick and suspended scheduler clock on the target CPU.
225  */
226 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
227                                        struct cpuidle_driver *drv, int index)
228 {
229         struct cpuidle_state *state = &drv->states[index];
230         unsigned int eax = flg2MWAIT(state->flags);
231         unsigned int ecx = 1; /* break on interrupt flag */
232
233         if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
234                 fpu_idle_fpregs();
235
236         mwait_idle_with_hints(eax, ecx);
237
238         return 0;
239 }
240
241 static void intel_idle_enter_dead(struct cpuidle_device *dev, int index)
242 {
243         struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
244         struct cpuidle_state *state = &drv->states[index];
245         unsigned long eax = flg2MWAIT(state->flags);
246
247         mwait_play_dead(eax);
248 }
249
250 /*
251  * States are indexed by the cstate number,
252  * which is also the index into the MWAIT hint array.
253  * Thus C0 is a dummy.
254  */
255 static struct cpuidle_state nehalem_cstates[] __initdata = {
256         {
257                 .name = "C1",
258                 .desc = "MWAIT 0x00",
259                 .flags = MWAIT2flg(0x00),
260                 .exit_latency = 3,
261                 .target_residency = 6,
262                 .enter = &intel_idle,
263                 .enter_s2idle = intel_idle_s2idle, },
264         {
265                 .name = "C1E",
266                 .desc = "MWAIT 0x01",
267                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
268                 .exit_latency = 10,
269                 .target_residency = 20,
270                 .enter = &intel_idle,
271                 .enter_s2idle = intel_idle_s2idle, },
272         {
273                 .name = "C3",
274                 .desc = "MWAIT 0x10",
275                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
276                 .exit_latency = 20,
277                 .target_residency = 80,
278                 .enter = &intel_idle,
279                 .enter_s2idle = intel_idle_s2idle, },
280         {
281                 .name = "C6",
282                 .desc = "MWAIT 0x20",
283                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
284                 .exit_latency = 200,
285                 .target_residency = 800,
286                 .enter = &intel_idle,
287                 .enter_s2idle = intel_idle_s2idle, },
288         {
289                 .enter = NULL }
290 };
291
292 static struct cpuidle_state snb_cstates[] __initdata = {
293         {
294                 .name = "C1",
295                 .desc = "MWAIT 0x00",
296                 .flags = MWAIT2flg(0x00),
297                 .exit_latency = 2,
298                 .target_residency = 2,
299                 .enter = &intel_idle,
300                 .enter_s2idle = intel_idle_s2idle, },
301         {
302                 .name = "C1E",
303                 .desc = "MWAIT 0x01",
304                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
305                 .exit_latency = 10,
306                 .target_residency = 20,
307                 .enter = &intel_idle,
308                 .enter_s2idle = intel_idle_s2idle, },
309         {
310                 .name = "C3",
311                 .desc = "MWAIT 0x10",
312                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
313                 .exit_latency = 80,
314                 .target_residency = 211,
315                 .enter = &intel_idle,
316                 .enter_s2idle = intel_idle_s2idle, },
317         {
318                 .name = "C6",
319                 .desc = "MWAIT 0x20",
320                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
321                 .exit_latency = 104,
322                 .target_residency = 345,
323                 .enter = &intel_idle,
324                 .enter_s2idle = intel_idle_s2idle, },
325         {
326                 .name = "C7",
327                 .desc = "MWAIT 0x30",
328                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
329                 .exit_latency = 109,
330                 .target_residency = 345,
331                 .enter = &intel_idle,
332                 .enter_s2idle = intel_idle_s2idle, },
333         {
334                 .enter = NULL }
335 };
336
337 static struct cpuidle_state byt_cstates[] __initdata = {
338         {
339                 .name = "C1",
340                 .desc = "MWAIT 0x00",
341                 .flags = MWAIT2flg(0x00),
342                 .exit_latency = 1,
343                 .target_residency = 1,
344                 .enter = &intel_idle,
345                 .enter_s2idle = intel_idle_s2idle, },
346         {
347                 .name = "C6N",
348                 .desc = "MWAIT 0x58",
349                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
350                 .exit_latency = 300,
351                 .target_residency = 275,
352                 .enter = &intel_idle,
353                 .enter_s2idle = intel_idle_s2idle, },
354         {
355                 .name = "C6S",
356                 .desc = "MWAIT 0x52",
357                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
358                 .exit_latency = 500,
359                 .target_residency = 560,
360                 .enter = &intel_idle,
361                 .enter_s2idle = intel_idle_s2idle, },
362         {
363                 .name = "C7",
364                 .desc = "MWAIT 0x60",
365                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
366                 .exit_latency = 1200,
367                 .target_residency = 4000,
368                 .enter = &intel_idle,
369                 .enter_s2idle = intel_idle_s2idle, },
370         {
371                 .name = "C7S",
372                 .desc = "MWAIT 0x64",
373                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
374                 .exit_latency = 10000,
375                 .target_residency = 20000,
376                 .enter = &intel_idle,
377                 .enter_s2idle = intel_idle_s2idle, },
378         {
379                 .enter = NULL }
380 };
381
382 static struct cpuidle_state cht_cstates[] __initdata = {
383         {
384                 .name = "C1",
385                 .desc = "MWAIT 0x00",
386                 .flags = MWAIT2flg(0x00),
387                 .exit_latency = 1,
388                 .target_residency = 1,
389                 .enter = &intel_idle,
390                 .enter_s2idle = intel_idle_s2idle, },
391         {
392                 .name = "C6N",
393                 .desc = "MWAIT 0x58",
394                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
395                 .exit_latency = 80,
396                 .target_residency = 275,
397                 .enter = &intel_idle,
398                 .enter_s2idle = intel_idle_s2idle, },
399         {
400                 .name = "C6S",
401                 .desc = "MWAIT 0x52",
402                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
403                 .exit_latency = 200,
404                 .target_residency = 560,
405                 .enter = &intel_idle,
406                 .enter_s2idle = intel_idle_s2idle, },
407         {
408                 .name = "C7",
409                 .desc = "MWAIT 0x60",
410                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
411                 .exit_latency = 1200,
412                 .target_residency = 4000,
413                 .enter = &intel_idle,
414                 .enter_s2idle = intel_idle_s2idle, },
415         {
416                 .name = "C7S",
417                 .desc = "MWAIT 0x64",
418                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
419                 .exit_latency = 10000,
420                 .target_residency = 20000,
421                 .enter = &intel_idle,
422                 .enter_s2idle = intel_idle_s2idle, },
423         {
424                 .enter = NULL }
425 };
426
427 static struct cpuidle_state ivb_cstates[] __initdata = {
428         {
429                 .name = "C1",
430                 .desc = "MWAIT 0x00",
431                 .flags = MWAIT2flg(0x00),
432                 .exit_latency = 1,
433                 .target_residency = 1,
434                 .enter = &intel_idle,
435                 .enter_s2idle = intel_idle_s2idle, },
436         {
437                 .name = "C1E",
438                 .desc = "MWAIT 0x01",
439                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
440                 .exit_latency = 10,
441                 .target_residency = 20,
442                 .enter = &intel_idle,
443                 .enter_s2idle = intel_idle_s2idle, },
444         {
445                 .name = "C3",
446                 .desc = "MWAIT 0x10",
447                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
448                 .exit_latency = 59,
449                 .target_residency = 156,
450                 .enter = &intel_idle,
451                 .enter_s2idle = intel_idle_s2idle, },
452         {
453                 .name = "C6",
454                 .desc = "MWAIT 0x20",
455                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
456                 .exit_latency = 80,
457                 .target_residency = 300,
458                 .enter = &intel_idle,
459                 .enter_s2idle = intel_idle_s2idle, },
460         {
461                 .name = "C7",
462                 .desc = "MWAIT 0x30",
463                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
464                 .exit_latency = 87,
465                 .target_residency = 300,
466                 .enter = &intel_idle,
467                 .enter_s2idle = intel_idle_s2idle, },
468         {
469                 .enter = NULL }
470 };
471
472 static struct cpuidle_state ivt_cstates[] __initdata = {
473         {
474                 .name = "C1",
475                 .desc = "MWAIT 0x00",
476                 .flags = MWAIT2flg(0x00),
477                 .exit_latency = 1,
478                 .target_residency = 1,
479                 .enter = &intel_idle,
480                 .enter_s2idle = intel_idle_s2idle, },
481         {
482                 .name = "C1E",
483                 .desc = "MWAIT 0x01",
484                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
485                 .exit_latency = 10,
486                 .target_residency = 80,
487                 .enter = &intel_idle,
488                 .enter_s2idle = intel_idle_s2idle, },
489         {
490                 .name = "C3",
491                 .desc = "MWAIT 0x10",
492                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
493                 .exit_latency = 59,
494                 .target_residency = 156,
495                 .enter = &intel_idle,
496                 .enter_s2idle = intel_idle_s2idle, },
497         {
498                 .name = "C6",
499                 .desc = "MWAIT 0x20",
500                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
501                 .exit_latency = 82,
502                 .target_residency = 300,
503                 .enter = &intel_idle,
504                 .enter_s2idle = intel_idle_s2idle, },
505         {
506                 .enter = NULL }
507 };
508
509 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
510         {
511                 .name = "C1",
512                 .desc = "MWAIT 0x00",
513                 .flags = MWAIT2flg(0x00),
514                 .exit_latency = 1,
515                 .target_residency = 1,
516                 .enter = &intel_idle,
517                 .enter_s2idle = intel_idle_s2idle, },
518         {
519                 .name = "C1E",
520                 .desc = "MWAIT 0x01",
521                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
522                 .exit_latency = 10,
523                 .target_residency = 250,
524                 .enter = &intel_idle,
525                 .enter_s2idle = intel_idle_s2idle, },
526         {
527                 .name = "C3",
528                 .desc = "MWAIT 0x10",
529                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
530                 .exit_latency = 59,
531                 .target_residency = 300,
532                 .enter = &intel_idle,
533                 .enter_s2idle = intel_idle_s2idle, },
534         {
535                 .name = "C6",
536                 .desc = "MWAIT 0x20",
537                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
538                 .exit_latency = 84,
539                 .target_residency = 400,
540                 .enter = &intel_idle,
541                 .enter_s2idle = intel_idle_s2idle, },
542         {
543                 .enter = NULL }
544 };
545
546 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
547         {
548                 .name = "C1",
549                 .desc = "MWAIT 0x00",
550                 .flags = MWAIT2flg(0x00),
551                 .exit_latency = 1,
552                 .target_residency = 1,
553                 .enter = &intel_idle,
554                 .enter_s2idle = intel_idle_s2idle, },
555         {
556                 .name = "C1E",
557                 .desc = "MWAIT 0x01",
558                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
559                 .exit_latency = 10,
560                 .target_residency = 500,
561                 .enter = &intel_idle,
562                 .enter_s2idle = intel_idle_s2idle, },
563         {
564                 .name = "C3",
565                 .desc = "MWAIT 0x10",
566                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
567                 .exit_latency = 59,
568                 .target_residency = 600,
569                 .enter = &intel_idle,
570                 .enter_s2idle = intel_idle_s2idle, },
571         {
572                 .name = "C6",
573                 .desc = "MWAIT 0x20",
574                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
575                 .exit_latency = 88,
576                 .target_residency = 700,
577                 .enter = &intel_idle,
578                 .enter_s2idle = intel_idle_s2idle, },
579         {
580                 .enter = NULL }
581 };
582
583 static struct cpuidle_state hsw_cstates[] __initdata = {
584         {
585                 .name = "C1",
586                 .desc = "MWAIT 0x00",
587                 .flags = MWAIT2flg(0x00),
588                 .exit_latency = 2,
589                 .target_residency = 2,
590                 .enter = &intel_idle,
591                 .enter_s2idle = intel_idle_s2idle, },
592         {
593                 .name = "C1E",
594                 .desc = "MWAIT 0x01",
595                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
596                 .exit_latency = 10,
597                 .target_residency = 20,
598                 .enter = &intel_idle,
599                 .enter_s2idle = intel_idle_s2idle, },
600         {
601                 .name = "C3",
602                 .desc = "MWAIT 0x10",
603                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
604                 .exit_latency = 33,
605                 .target_residency = 100,
606                 .enter = &intel_idle,
607                 .enter_s2idle = intel_idle_s2idle, },
608         {
609                 .name = "C6",
610                 .desc = "MWAIT 0x20",
611                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
612                 .exit_latency = 133,
613                 .target_residency = 400,
614                 .enter = &intel_idle,
615                 .enter_s2idle = intel_idle_s2idle, },
616         {
617                 .name = "C7s",
618                 .desc = "MWAIT 0x32",
619                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
620                 .exit_latency = 166,
621                 .target_residency = 500,
622                 .enter = &intel_idle,
623                 .enter_s2idle = intel_idle_s2idle, },
624         {
625                 .name = "C8",
626                 .desc = "MWAIT 0x40",
627                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
628                 .exit_latency = 300,
629                 .target_residency = 900,
630                 .enter = &intel_idle,
631                 .enter_s2idle = intel_idle_s2idle, },
632         {
633                 .name = "C9",
634                 .desc = "MWAIT 0x50",
635                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
636                 .exit_latency = 600,
637                 .target_residency = 1800,
638                 .enter = &intel_idle,
639                 .enter_s2idle = intel_idle_s2idle, },
640         {
641                 .name = "C10",
642                 .desc = "MWAIT 0x60",
643                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
644                 .exit_latency = 2600,
645                 .target_residency = 7700,
646                 .enter = &intel_idle,
647                 .enter_s2idle = intel_idle_s2idle, },
648         {
649                 .enter = NULL }
650 };
651 static struct cpuidle_state bdw_cstates[] __initdata = {
652         {
653                 .name = "C1",
654                 .desc = "MWAIT 0x00",
655                 .flags = MWAIT2flg(0x00),
656                 .exit_latency = 2,
657                 .target_residency = 2,
658                 .enter = &intel_idle,
659                 .enter_s2idle = intel_idle_s2idle, },
660         {
661                 .name = "C1E",
662                 .desc = "MWAIT 0x01",
663                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
664                 .exit_latency = 10,
665                 .target_residency = 20,
666                 .enter = &intel_idle,
667                 .enter_s2idle = intel_idle_s2idle, },
668         {
669                 .name = "C3",
670                 .desc = "MWAIT 0x10",
671                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
672                 .exit_latency = 40,
673                 .target_residency = 100,
674                 .enter = &intel_idle,
675                 .enter_s2idle = intel_idle_s2idle, },
676         {
677                 .name = "C6",
678                 .desc = "MWAIT 0x20",
679                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
680                 .exit_latency = 133,
681                 .target_residency = 400,
682                 .enter = &intel_idle,
683                 .enter_s2idle = intel_idle_s2idle, },
684         {
685                 .name = "C7s",
686                 .desc = "MWAIT 0x32",
687                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
688                 .exit_latency = 166,
689                 .target_residency = 500,
690                 .enter = &intel_idle,
691                 .enter_s2idle = intel_idle_s2idle, },
692         {
693                 .name = "C8",
694                 .desc = "MWAIT 0x40",
695                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
696                 .exit_latency = 300,
697                 .target_residency = 900,
698                 .enter = &intel_idle,
699                 .enter_s2idle = intel_idle_s2idle, },
700         {
701                 .name = "C9",
702                 .desc = "MWAIT 0x50",
703                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
704                 .exit_latency = 600,
705                 .target_residency = 1800,
706                 .enter = &intel_idle,
707                 .enter_s2idle = intel_idle_s2idle, },
708         {
709                 .name = "C10",
710                 .desc = "MWAIT 0x60",
711                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
712                 .exit_latency = 2600,
713                 .target_residency = 7700,
714                 .enter = &intel_idle,
715                 .enter_s2idle = intel_idle_s2idle, },
716         {
717                 .enter = NULL }
718 };
719
720 static struct cpuidle_state skl_cstates[] __initdata = {
721         {
722                 .name = "C1",
723                 .desc = "MWAIT 0x00",
724                 .flags = MWAIT2flg(0x00),
725                 .exit_latency = 2,
726                 .target_residency = 2,
727                 .enter = &intel_idle,
728                 .enter_s2idle = intel_idle_s2idle, },
729         {
730                 .name = "C1E",
731                 .desc = "MWAIT 0x01",
732                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
733                 .exit_latency = 10,
734                 .target_residency = 20,
735                 .enter = &intel_idle,
736                 .enter_s2idle = intel_idle_s2idle, },
737         {
738                 .name = "C3",
739                 .desc = "MWAIT 0x10",
740                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
741                 .exit_latency = 70,
742                 .target_residency = 100,
743                 .enter = &intel_idle,
744                 .enter_s2idle = intel_idle_s2idle, },
745         {
746                 .name = "C6",
747                 .desc = "MWAIT 0x20",
748                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
749                 .exit_latency = 85,
750                 .target_residency = 200,
751                 .enter = &intel_idle,
752                 .enter_s2idle = intel_idle_s2idle, },
753         {
754                 .name = "C7s",
755                 .desc = "MWAIT 0x33",
756                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
757                 .exit_latency = 124,
758                 .target_residency = 800,
759                 .enter = &intel_idle,
760                 .enter_s2idle = intel_idle_s2idle, },
761         {
762                 .name = "C8",
763                 .desc = "MWAIT 0x40",
764                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
765                 .exit_latency = 200,
766                 .target_residency = 800,
767                 .enter = &intel_idle,
768                 .enter_s2idle = intel_idle_s2idle, },
769         {
770                 .name = "C9",
771                 .desc = "MWAIT 0x50",
772                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
773                 .exit_latency = 480,
774                 .target_residency = 5000,
775                 .enter = &intel_idle,
776                 .enter_s2idle = intel_idle_s2idle, },
777         {
778                 .name = "C10",
779                 .desc = "MWAIT 0x60",
780                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
781                 .exit_latency = 890,
782                 .target_residency = 5000,
783                 .enter = &intel_idle,
784                 .enter_s2idle = intel_idle_s2idle, },
785         {
786                 .enter = NULL }
787 };
788
789 static struct cpuidle_state skx_cstates[] __initdata = {
790         {
791                 .name = "C1",
792                 .desc = "MWAIT 0x00",
793                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
794                 .exit_latency = 2,
795                 .target_residency = 2,
796                 .enter = &intel_idle,
797                 .enter_s2idle = intel_idle_s2idle, },
798         {
799                 .name = "C1E",
800                 .desc = "MWAIT 0x01",
801                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
802                 .exit_latency = 10,
803                 .target_residency = 20,
804                 .enter = &intel_idle,
805                 .enter_s2idle = intel_idle_s2idle, },
806         {
807                 .name = "C6",
808                 .desc = "MWAIT 0x20",
809                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
810                 .exit_latency = 133,
811                 .target_residency = 600,
812                 .enter = &intel_idle,
813                 .enter_s2idle = intel_idle_s2idle, },
814         {
815                 .enter = NULL }
816 };
817
818 static struct cpuidle_state icx_cstates[] __initdata = {
819         {
820                 .name = "C1",
821                 .desc = "MWAIT 0x00",
822                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
823                 .exit_latency = 1,
824                 .target_residency = 1,
825                 .enter = &intel_idle,
826                 .enter_s2idle = intel_idle_s2idle, },
827         {
828                 .name = "C1E",
829                 .desc = "MWAIT 0x01",
830                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
831                 .exit_latency = 4,
832                 .target_residency = 4,
833                 .enter = &intel_idle,
834                 .enter_s2idle = intel_idle_s2idle, },
835         {
836                 .name = "C6",
837                 .desc = "MWAIT 0x20",
838                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
839                 .exit_latency = 170,
840                 .target_residency = 600,
841                 .enter = &intel_idle,
842                 .enter_s2idle = intel_idle_s2idle, },
843         {
844                 .enter = NULL }
845 };
846
847 /*
848  * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
849  * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
850  * But in this case there is effectively no C1, because C1 requests are
851  * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
852  * and C1E requests end up with C1, so there is effectively no C1E.
853  *
854  * By default we enable C1E and disable C1 by marking it with
855  * 'CPUIDLE_FLAG_UNUSABLE'.
856  */
857 static struct cpuidle_state adl_cstates[] __initdata = {
858         {
859                 .name = "C1",
860                 .desc = "MWAIT 0x00",
861                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
862                 .exit_latency = 1,
863                 .target_residency = 1,
864                 .enter = &intel_idle,
865                 .enter_s2idle = intel_idle_s2idle, },
866         {
867                 .name = "C1E",
868                 .desc = "MWAIT 0x01",
869                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
870                 .exit_latency = 2,
871                 .target_residency = 4,
872                 .enter = &intel_idle,
873                 .enter_s2idle = intel_idle_s2idle, },
874         {
875                 .name = "C6",
876                 .desc = "MWAIT 0x20",
877                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
878                 .exit_latency = 220,
879                 .target_residency = 600,
880                 .enter = &intel_idle,
881                 .enter_s2idle = intel_idle_s2idle, },
882         {
883                 .name = "C8",
884                 .desc = "MWAIT 0x40",
885                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
886                 .exit_latency = 280,
887                 .target_residency = 800,
888                 .enter = &intel_idle,
889                 .enter_s2idle = intel_idle_s2idle, },
890         {
891                 .name = "C10",
892                 .desc = "MWAIT 0x60",
893                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
894                 .exit_latency = 680,
895                 .target_residency = 2000,
896                 .enter = &intel_idle,
897                 .enter_s2idle = intel_idle_s2idle, },
898         {
899                 .enter = NULL }
900 };
901
902 static struct cpuidle_state adl_l_cstates[] __initdata = {
903         {
904                 .name = "C1",
905                 .desc = "MWAIT 0x00",
906                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
907                 .exit_latency = 1,
908                 .target_residency = 1,
909                 .enter = &intel_idle,
910                 .enter_s2idle = intel_idle_s2idle, },
911         {
912                 .name = "C1E",
913                 .desc = "MWAIT 0x01",
914                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
915                 .exit_latency = 2,
916                 .target_residency = 4,
917                 .enter = &intel_idle,
918                 .enter_s2idle = intel_idle_s2idle, },
919         {
920                 .name = "C6",
921                 .desc = "MWAIT 0x20",
922                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
923                 .exit_latency = 170,
924                 .target_residency = 500,
925                 .enter = &intel_idle,
926                 .enter_s2idle = intel_idle_s2idle, },
927         {
928                 .name = "C8",
929                 .desc = "MWAIT 0x40",
930                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
931                 .exit_latency = 200,
932                 .target_residency = 600,
933                 .enter = &intel_idle,
934                 .enter_s2idle = intel_idle_s2idle, },
935         {
936                 .name = "C10",
937                 .desc = "MWAIT 0x60",
938                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
939                 .exit_latency = 230,
940                 .target_residency = 700,
941                 .enter = &intel_idle,
942                 .enter_s2idle = intel_idle_s2idle, },
943         {
944                 .enter = NULL }
945 };
946
947 static struct cpuidle_state mtl_l_cstates[] __initdata = {
948         {
949                 .name = "C1E",
950                 .desc = "MWAIT 0x01",
951                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
952                 .exit_latency = 1,
953                 .target_residency = 1,
954                 .enter = &intel_idle,
955                 .enter_s2idle = intel_idle_s2idle, },
956         {
957                 .name = "C6",
958                 .desc = "MWAIT 0x20",
959                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
960                 .exit_latency = 140,
961                 .target_residency = 420,
962                 .enter = &intel_idle,
963                 .enter_s2idle = intel_idle_s2idle, },
964         {
965                 .name = "C10",
966                 .desc = "MWAIT 0x60",
967                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
968                 .exit_latency = 310,
969                 .target_residency = 930,
970                 .enter = &intel_idle,
971                 .enter_s2idle = intel_idle_s2idle, },
972         {
973                 .enter = NULL }
974 };
975
976 static struct cpuidle_state gmt_cstates[] __initdata = {
977         {
978                 .name = "C1",
979                 .desc = "MWAIT 0x00",
980                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
981                 .exit_latency = 1,
982                 .target_residency = 1,
983                 .enter = &intel_idle,
984                 .enter_s2idle = intel_idle_s2idle, },
985         {
986                 .name = "C1E",
987                 .desc = "MWAIT 0x01",
988                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
989                 .exit_latency = 2,
990                 .target_residency = 4,
991                 .enter = &intel_idle,
992                 .enter_s2idle = intel_idle_s2idle, },
993         {
994                 .name = "C6",
995                 .desc = "MWAIT 0x20",
996                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
997                 .exit_latency = 195,
998                 .target_residency = 585,
999                 .enter = &intel_idle,
1000                 .enter_s2idle = intel_idle_s2idle, },
1001         {
1002                 .name = "C8",
1003                 .desc = "MWAIT 0x40",
1004                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1005                 .exit_latency = 260,
1006                 .target_residency = 1040,
1007                 .enter = &intel_idle,
1008                 .enter_s2idle = intel_idle_s2idle, },
1009         {
1010                 .name = "C10",
1011                 .desc = "MWAIT 0x60",
1012                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1013                 .exit_latency = 660,
1014                 .target_residency = 1980,
1015                 .enter = &intel_idle,
1016                 .enter_s2idle = intel_idle_s2idle, },
1017         {
1018                 .enter = NULL }
1019 };
1020
1021 static struct cpuidle_state spr_cstates[] __initdata = {
1022         {
1023                 .name = "C1",
1024                 .desc = "MWAIT 0x00",
1025                 .flags = MWAIT2flg(0x00),
1026                 .exit_latency = 1,
1027                 .target_residency = 1,
1028                 .enter = &intel_idle,
1029                 .enter_s2idle = intel_idle_s2idle, },
1030         {
1031                 .name = "C1E",
1032                 .desc = "MWAIT 0x01",
1033                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1034                 .exit_latency = 2,
1035                 .target_residency = 4,
1036                 .enter = &intel_idle,
1037                 .enter_s2idle = intel_idle_s2idle, },
1038         {
1039                 .name = "C6",
1040                 .desc = "MWAIT 0x20",
1041                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1042                                            CPUIDLE_FLAG_INIT_XSTATE,
1043                 .exit_latency = 290,
1044                 .target_residency = 800,
1045                 .enter = &intel_idle,
1046                 .enter_s2idle = intel_idle_s2idle, },
1047         {
1048                 .enter = NULL }
1049 };
1050
1051 static struct cpuidle_state gnr_cstates[] __initdata = {
1052         {
1053                 .name = "C1",
1054                 .desc = "MWAIT 0x00",
1055                 .flags = MWAIT2flg(0x00),
1056                 .exit_latency = 1,
1057                 .target_residency = 1,
1058                 .enter = &intel_idle,
1059                 .enter_s2idle = intel_idle_s2idle, },
1060         {
1061                 .name = "C1E",
1062                 .desc = "MWAIT 0x01",
1063                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1064                 .exit_latency = 4,
1065                 .target_residency = 4,
1066                 .enter = &intel_idle,
1067                 .enter_s2idle = intel_idle_s2idle, },
1068         {
1069                 .name = "C6",
1070                 .desc = "MWAIT 0x20",
1071                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1072                                            CPUIDLE_FLAG_INIT_XSTATE |
1073                                            CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1074                 .exit_latency = 170,
1075                 .target_residency = 650,
1076                 .enter = &intel_idle,
1077                 .enter_s2idle = intel_idle_s2idle, },
1078         {
1079                 .name = "C6P",
1080                 .desc = "MWAIT 0x21",
1081                 .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED |
1082                                            CPUIDLE_FLAG_INIT_XSTATE |
1083                                            CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1084                 .exit_latency = 210,
1085                 .target_residency = 1000,
1086                 .enter = &intel_idle,
1087                 .enter_s2idle = intel_idle_s2idle, },
1088         {
1089                 .enter = NULL }
1090 };
1091
1092 static struct cpuidle_state gnrd_cstates[] __initdata = {
1093         {
1094                 .name = "C1",
1095                 .desc = "MWAIT 0x00",
1096                 .flags = MWAIT2flg(0x00),
1097                 .exit_latency = 1,
1098                 .target_residency = 1,
1099                 .enter = &intel_idle,
1100                 .enter_s2idle = intel_idle_s2idle, },
1101         {
1102                 .name = "C1E",
1103                 .desc = "MWAIT 0x01",
1104                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1105                 .exit_latency = 4,
1106                 .target_residency = 4,
1107                 .enter = &intel_idle,
1108                 .enter_s2idle = intel_idle_s2idle, },
1109         {
1110                 .name = "C6",
1111                 .desc = "MWAIT 0x20",
1112                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1113                                            CPUIDLE_FLAG_INIT_XSTATE |
1114                                            CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1115                 .exit_latency = 220,
1116                 .target_residency = 650,
1117                 .enter = &intel_idle,
1118                 .enter_s2idle = intel_idle_s2idle, },
1119         {
1120                 .name = "C6P",
1121                 .desc = "MWAIT 0x21",
1122                 .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED |
1123                                            CPUIDLE_FLAG_INIT_XSTATE |
1124                                            CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1125                 .exit_latency = 240,
1126                 .target_residency = 750,
1127                 .enter = &intel_idle,
1128                 .enter_s2idle = intel_idle_s2idle, },
1129         {
1130                 .enter = NULL }
1131 };
1132
1133 static struct cpuidle_state atom_cstates[] __initdata = {
1134         {
1135                 .name = "C1E",
1136                 .desc = "MWAIT 0x00",
1137                 .flags = MWAIT2flg(0x00),
1138                 .exit_latency = 10,
1139                 .target_residency = 20,
1140                 .enter = &intel_idle,
1141                 .enter_s2idle = intel_idle_s2idle, },
1142         {
1143                 .name = "C2",
1144                 .desc = "MWAIT 0x10",
1145                 .flags = MWAIT2flg(0x10),
1146                 .exit_latency = 20,
1147                 .target_residency = 80,
1148                 .enter = &intel_idle,
1149                 .enter_s2idle = intel_idle_s2idle, },
1150         {
1151                 .name = "C4",
1152                 .desc = "MWAIT 0x30",
1153                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1154                 .exit_latency = 100,
1155                 .target_residency = 400,
1156                 .enter = &intel_idle,
1157                 .enter_s2idle = intel_idle_s2idle, },
1158         {
1159                 .name = "C6",
1160                 .desc = "MWAIT 0x52",
1161                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1162                 .exit_latency = 140,
1163                 .target_residency = 560,
1164                 .enter = &intel_idle,
1165                 .enter_s2idle = intel_idle_s2idle, },
1166         {
1167                 .enter = NULL }
1168 };
1169 static struct cpuidle_state tangier_cstates[] __initdata = {
1170         {
1171                 .name = "C1",
1172                 .desc = "MWAIT 0x00",
1173                 .flags = MWAIT2flg(0x00),
1174                 .exit_latency = 1,
1175                 .target_residency = 4,
1176                 .enter = &intel_idle,
1177                 .enter_s2idle = intel_idle_s2idle, },
1178         {
1179                 .name = "C4",
1180                 .desc = "MWAIT 0x30",
1181                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1182                 .exit_latency = 100,
1183                 .target_residency = 400,
1184                 .enter = &intel_idle,
1185                 .enter_s2idle = intel_idle_s2idle, },
1186         {
1187                 .name = "C6",
1188                 .desc = "MWAIT 0x52",
1189                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1190                 .exit_latency = 140,
1191                 .target_residency = 560,
1192                 .enter = &intel_idle,
1193                 .enter_s2idle = intel_idle_s2idle, },
1194         {
1195                 .name = "C7",
1196                 .desc = "MWAIT 0x60",
1197                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1198                 .exit_latency = 1200,
1199                 .target_residency = 4000,
1200                 .enter = &intel_idle,
1201                 .enter_s2idle = intel_idle_s2idle, },
1202         {
1203                 .name = "C9",
1204                 .desc = "MWAIT 0x64",
1205                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
1206                 .exit_latency = 10000,
1207                 .target_residency = 20000,
1208                 .enter = &intel_idle,
1209                 .enter_s2idle = intel_idle_s2idle, },
1210         {
1211                 .enter = NULL }
1212 };
1213 static struct cpuidle_state avn_cstates[] __initdata = {
1214         {
1215                 .name = "C1",
1216                 .desc = "MWAIT 0x00",
1217                 .flags = MWAIT2flg(0x00),
1218                 .exit_latency = 2,
1219                 .target_residency = 2,
1220                 .enter = &intel_idle,
1221                 .enter_s2idle = intel_idle_s2idle, },
1222         {
1223                 .name = "C6",
1224                 .desc = "MWAIT 0x51",
1225                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1226                 .exit_latency = 15,
1227                 .target_residency = 45,
1228                 .enter = &intel_idle,
1229                 .enter_s2idle = intel_idle_s2idle, },
1230         {
1231                 .enter = NULL }
1232 };
1233 static struct cpuidle_state knl_cstates[] __initdata = {
1234         {
1235                 .name = "C1",
1236                 .desc = "MWAIT 0x00",
1237                 .flags = MWAIT2flg(0x00),
1238                 .exit_latency = 1,
1239                 .target_residency = 2,
1240                 .enter = &intel_idle,
1241                 .enter_s2idle = intel_idle_s2idle },
1242         {
1243                 .name = "C6",
1244                 .desc = "MWAIT 0x10",
1245                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1246                 .exit_latency = 120,
1247                 .target_residency = 500,
1248                 .enter = &intel_idle,
1249                 .enter_s2idle = intel_idle_s2idle },
1250         {
1251                 .enter = NULL }
1252 };
1253
1254 static struct cpuidle_state bxt_cstates[] __initdata = {
1255         {
1256                 .name = "C1",
1257                 .desc = "MWAIT 0x00",
1258                 .flags = MWAIT2flg(0x00),
1259                 .exit_latency = 2,
1260                 .target_residency = 2,
1261                 .enter = &intel_idle,
1262                 .enter_s2idle = intel_idle_s2idle, },
1263         {
1264                 .name = "C1E",
1265                 .desc = "MWAIT 0x01",
1266                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1267                 .exit_latency = 10,
1268                 .target_residency = 20,
1269                 .enter = &intel_idle,
1270                 .enter_s2idle = intel_idle_s2idle, },
1271         {
1272                 .name = "C6",
1273                 .desc = "MWAIT 0x20",
1274                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1275                 .exit_latency = 133,
1276                 .target_residency = 133,
1277                 .enter = &intel_idle,
1278                 .enter_s2idle = intel_idle_s2idle, },
1279         {
1280                 .name = "C7s",
1281                 .desc = "MWAIT 0x31",
1282                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1283                 .exit_latency = 155,
1284                 .target_residency = 155,
1285                 .enter = &intel_idle,
1286                 .enter_s2idle = intel_idle_s2idle, },
1287         {
1288                 .name = "C8",
1289                 .desc = "MWAIT 0x40",
1290                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1291                 .exit_latency = 1000,
1292                 .target_residency = 1000,
1293                 .enter = &intel_idle,
1294                 .enter_s2idle = intel_idle_s2idle, },
1295         {
1296                 .name = "C9",
1297                 .desc = "MWAIT 0x50",
1298                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1299                 .exit_latency = 2000,
1300                 .target_residency = 2000,
1301                 .enter = &intel_idle,
1302                 .enter_s2idle = intel_idle_s2idle, },
1303         {
1304                 .name = "C10",
1305                 .desc = "MWAIT 0x60",
1306                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1307                 .exit_latency = 10000,
1308                 .target_residency = 10000,
1309                 .enter = &intel_idle,
1310                 .enter_s2idle = intel_idle_s2idle, },
1311         {
1312                 .enter = NULL }
1313 };
1314
1315 static struct cpuidle_state dnv_cstates[] __initdata = {
1316         {
1317                 .name = "C1",
1318                 .desc = "MWAIT 0x00",
1319                 .flags = MWAIT2flg(0x00),
1320                 .exit_latency = 2,
1321                 .target_residency = 2,
1322                 .enter = &intel_idle,
1323                 .enter_s2idle = intel_idle_s2idle, },
1324         {
1325                 .name = "C1E",
1326                 .desc = "MWAIT 0x01",
1327                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1328                 .exit_latency = 10,
1329                 .target_residency = 20,
1330                 .enter = &intel_idle,
1331                 .enter_s2idle = intel_idle_s2idle, },
1332         {
1333                 .name = "C6",
1334                 .desc = "MWAIT 0x20",
1335                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1336                 .exit_latency = 50,
1337                 .target_residency = 500,
1338                 .enter = &intel_idle,
1339                 .enter_s2idle = intel_idle_s2idle, },
1340         {
1341                 .enter = NULL }
1342 };
1343
1344 /*
1345  * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1346  * C6, and this is indicated in the CPUID mwait leaf.
1347  */
1348 static struct cpuidle_state snr_cstates[] __initdata = {
1349         {
1350                 .name = "C1",
1351                 .desc = "MWAIT 0x00",
1352                 .flags = MWAIT2flg(0x00),
1353                 .exit_latency = 2,
1354                 .target_residency = 2,
1355                 .enter = &intel_idle,
1356                 .enter_s2idle = intel_idle_s2idle, },
1357         {
1358                 .name = "C1E",
1359                 .desc = "MWAIT 0x01",
1360                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1361                 .exit_latency = 15,
1362                 .target_residency = 25,
1363                 .enter = &intel_idle,
1364                 .enter_s2idle = intel_idle_s2idle, },
1365         {
1366                 .name = "C6",
1367                 .desc = "MWAIT 0x20",
1368                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1369                 .exit_latency = 130,
1370                 .target_residency = 500,
1371                 .enter = &intel_idle,
1372                 .enter_s2idle = intel_idle_s2idle, },
1373         {
1374                 .enter = NULL }
1375 };
1376
1377 static struct cpuidle_state grr_cstates[] __initdata = {
1378         {
1379                 .name = "C1",
1380                 .desc = "MWAIT 0x00",
1381                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1382                 .exit_latency = 1,
1383                 .target_residency = 1,
1384                 .enter = &intel_idle,
1385                 .enter_s2idle = intel_idle_s2idle, },
1386         {
1387                 .name = "C1E",
1388                 .desc = "MWAIT 0x01",
1389                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1390                 .exit_latency = 2,
1391                 .target_residency = 10,
1392                 .enter = &intel_idle,
1393                 .enter_s2idle = intel_idle_s2idle, },
1394         {
1395                 .name = "C6S",
1396                 .desc = "MWAIT 0x22",
1397                 .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED,
1398                 .exit_latency = 140,
1399                 .target_residency = 500,
1400                 .enter = &intel_idle,
1401                 .enter_s2idle = intel_idle_s2idle, },
1402         {
1403                 .enter = NULL }
1404 };
1405
1406 static struct cpuidle_state srf_cstates[] __initdata = {
1407         {
1408                 .name = "C1",
1409                 .desc = "MWAIT 0x00",
1410                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1411                 .exit_latency = 1,
1412                 .target_residency = 1,
1413                 .enter = &intel_idle,
1414                 .enter_s2idle = intel_idle_s2idle, },
1415         {
1416                 .name = "C1E",
1417                 .desc = "MWAIT 0x01",
1418                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1419                 .exit_latency = 2,
1420                 .target_residency = 10,
1421                 .enter = &intel_idle,
1422                 .enter_s2idle = intel_idle_s2idle, },
1423         {
1424                 .name = "C6S",
1425                 .desc = "MWAIT 0x22",
1426                 .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED |
1427                                            CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1428                 .exit_latency = 270,
1429                 .target_residency = 700,
1430                 .enter = &intel_idle,
1431                 .enter_s2idle = intel_idle_s2idle, },
1432         {
1433                 .name = "C6SP",
1434                 .desc = "MWAIT 0x23",
1435                 .flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED |
1436                                            CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1437                 .exit_latency = 310,
1438                 .target_residency = 900,
1439                 .enter = &intel_idle,
1440                 .enter_s2idle = intel_idle_s2idle, },
1441         {
1442                 .enter = NULL }
1443 };
1444
1445 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1446         .state_table = nehalem_cstates,
1447         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1448         .disable_promotion_to_c1e = true,
1449 };
1450
1451 static const struct idle_cpu idle_cpu_nhx __initconst = {
1452         .state_table = nehalem_cstates,
1453         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1454         .disable_promotion_to_c1e = true,
1455         .use_acpi = true,
1456 };
1457
1458 static const struct idle_cpu idle_cpu_atom __initconst = {
1459         .state_table = atom_cstates,
1460 };
1461
1462 static const struct idle_cpu idle_cpu_tangier __initconst = {
1463         .state_table = tangier_cstates,
1464 };
1465
1466 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1467         .state_table = atom_cstates,
1468         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1469 };
1470
1471 static const struct idle_cpu idle_cpu_snb __initconst = {
1472         .state_table = snb_cstates,
1473         .disable_promotion_to_c1e = true,
1474 };
1475
1476 static const struct idle_cpu idle_cpu_snx __initconst = {
1477         .state_table = snb_cstates,
1478         .disable_promotion_to_c1e = true,
1479         .use_acpi = true,
1480 };
1481
1482 static const struct idle_cpu idle_cpu_byt __initconst = {
1483         .state_table = byt_cstates,
1484         .disable_promotion_to_c1e = true,
1485 };
1486
1487 static const struct idle_cpu idle_cpu_cht __initconst = {
1488         .state_table = cht_cstates,
1489         .disable_promotion_to_c1e = true,
1490 };
1491
1492 static const struct idle_cpu idle_cpu_ivb __initconst = {
1493         .state_table = ivb_cstates,
1494         .disable_promotion_to_c1e = true,
1495 };
1496
1497 static const struct idle_cpu idle_cpu_ivt __initconst = {
1498         .state_table = ivt_cstates,
1499         .disable_promotion_to_c1e = true,
1500         .use_acpi = true,
1501 };
1502
1503 static const struct idle_cpu idle_cpu_hsw __initconst = {
1504         .state_table = hsw_cstates,
1505         .disable_promotion_to_c1e = true,
1506 };
1507
1508 static const struct idle_cpu idle_cpu_hsx __initconst = {
1509         .state_table = hsw_cstates,
1510         .disable_promotion_to_c1e = true,
1511         .use_acpi = true,
1512 };
1513
1514 static const struct idle_cpu idle_cpu_bdw __initconst = {
1515         .state_table = bdw_cstates,
1516         .disable_promotion_to_c1e = true,
1517 };
1518
1519 static const struct idle_cpu idle_cpu_bdx __initconst = {
1520         .state_table = bdw_cstates,
1521         .disable_promotion_to_c1e = true,
1522         .use_acpi = true,
1523 };
1524
1525 static const struct idle_cpu idle_cpu_skl __initconst = {
1526         .state_table = skl_cstates,
1527         .disable_promotion_to_c1e = true,
1528 };
1529
1530 static const struct idle_cpu idle_cpu_skx __initconst = {
1531         .state_table = skx_cstates,
1532         .disable_promotion_to_c1e = true,
1533         .use_acpi = true,
1534 };
1535
1536 static const struct idle_cpu idle_cpu_icx __initconst = {
1537         .state_table = icx_cstates,
1538         .disable_promotion_to_c1e = true,
1539         .use_acpi = true,
1540 };
1541
1542 static const struct idle_cpu idle_cpu_adl __initconst = {
1543         .state_table = adl_cstates,
1544 };
1545
1546 static const struct idle_cpu idle_cpu_adl_l __initconst = {
1547         .state_table = adl_l_cstates,
1548 };
1549
1550 static const struct idle_cpu idle_cpu_mtl_l __initconst = {
1551         .state_table = mtl_l_cstates,
1552 };
1553
1554 static const struct idle_cpu idle_cpu_gmt __initconst = {
1555         .state_table = gmt_cstates,
1556 };
1557
1558 static const struct idle_cpu idle_cpu_spr __initconst = {
1559         .state_table = spr_cstates,
1560         .disable_promotion_to_c1e = true,
1561         .c1_demotion_supported = true,
1562         .use_acpi = true,
1563 };
1564
1565 static const struct idle_cpu idle_cpu_gnr __initconst = {
1566         .state_table = gnr_cstates,
1567         .disable_promotion_to_c1e = true,
1568         .c1_demotion_supported = true,
1569         .use_acpi = true,
1570 };
1571
1572 static const struct idle_cpu idle_cpu_gnrd __initconst = {
1573         .state_table = gnrd_cstates,
1574         .disable_promotion_to_c1e = true,
1575         .c1_demotion_supported = true,
1576         .use_acpi = true,
1577 };
1578
1579 static const struct idle_cpu idle_cpu_avn __initconst = {
1580         .state_table = avn_cstates,
1581         .disable_promotion_to_c1e = true,
1582         .use_acpi = true,
1583 };
1584
1585 static const struct idle_cpu idle_cpu_knl __initconst = {
1586         .state_table = knl_cstates,
1587         .use_acpi = true,
1588 };
1589
1590 static const struct idle_cpu idle_cpu_bxt __initconst = {
1591         .state_table = bxt_cstates,
1592         .disable_promotion_to_c1e = true,
1593 };
1594
1595 static const struct idle_cpu idle_cpu_dnv __initconst = {
1596         .state_table = dnv_cstates,
1597         .disable_promotion_to_c1e = true,
1598         .use_acpi = true,
1599 };
1600
1601 static const struct idle_cpu idle_cpu_tmt __initconst = {
1602         .disable_promotion_to_c1e = true,
1603 };
1604
1605 static const struct idle_cpu idle_cpu_snr __initconst = {
1606         .state_table = snr_cstates,
1607         .disable_promotion_to_c1e = true,
1608         .use_acpi = true,
1609 };
1610
1611 static const struct idle_cpu idle_cpu_grr __initconst = {
1612         .state_table = grr_cstates,
1613         .disable_promotion_to_c1e = true,
1614         .c1_demotion_supported = true,
1615         .use_acpi = true,
1616 };
1617
1618 static const struct idle_cpu idle_cpu_srf __initconst = {
1619         .state_table = srf_cstates,
1620         .disable_promotion_to_c1e = true,
1621         .c1_demotion_supported = true,
1622         .use_acpi = true,
1623 };
1624
1625 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1626         X86_MATCH_VFM(INTEL_NEHALEM_EP,         &idle_cpu_nhx),
1627         X86_MATCH_VFM(INTEL_NEHALEM,            &idle_cpu_nehalem),
1628         X86_MATCH_VFM(INTEL_NEHALEM_G,          &idle_cpu_nehalem),
1629         X86_MATCH_VFM(INTEL_WESTMERE,           &idle_cpu_nehalem),
1630         X86_MATCH_VFM(INTEL_WESTMERE_EP,        &idle_cpu_nhx),
1631         X86_MATCH_VFM(INTEL_NEHALEM_EX,         &idle_cpu_nhx),
1632         X86_MATCH_VFM(INTEL_ATOM_BONNELL,       &idle_cpu_atom),
1633         X86_MATCH_VFM(INTEL_ATOM_BONNELL_MID,   &idle_cpu_lincroft),
1634         X86_MATCH_VFM(INTEL_WESTMERE_EX,        &idle_cpu_nhx),
1635         X86_MATCH_VFM(INTEL_SANDYBRIDGE,        &idle_cpu_snb),
1636         X86_MATCH_VFM(INTEL_SANDYBRIDGE_X,      &idle_cpu_snx),
1637         X86_MATCH_VFM(INTEL_ATOM_SALTWELL,      &idle_cpu_atom),
1638         X86_MATCH_VFM(INTEL_ATOM_SILVERMONT,    &idle_cpu_byt),
1639         X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1640         X86_MATCH_VFM(INTEL_ATOM_AIRMONT,       &idle_cpu_cht),
1641         X86_MATCH_VFM(INTEL_IVYBRIDGE,          &idle_cpu_ivb),
1642         X86_MATCH_VFM(INTEL_IVYBRIDGE_X,        &idle_cpu_ivt),
1643         X86_MATCH_VFM(INTEL_HASWELL,            &idle_cpu_hsw),
1644         X86_MATCH_VFM(INTEL_HASWELL_X,          &idle_cpu_hsx),
1645         X86_MATCH_VFM(INTEL_HASWELL_L,          &idle_cpu_hsw),
1646         X86_MATCH_VFM(INTEL_HASWELL_G,          &idle_cpu_hsw),
1647         X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_D,  &idle_cpu_avn),
1648         X86_MATCH_VFM(INTEL_BROADWELL,          &idle_cpu_bdw),
1649         X86_MATCH_VFM(INTEL_BROADWELL_G,        &idle_cpu_bdw),
1650         X86_MATCH_VFM(INTEL_BROADWELL_X,        &idle_cpu_bdx),
1651         X86_MATCH_VFM(INTEL_BROADWELL_D,        &idle_cpu_bdx),
1652         X86_MATCH_VFM(INTEL_SKYLAKE_L,          &idle_cpu_skl),
1653         X86_MATCH_VFM(INTEL_SKYLAKE,            &idle_cpu_skl),
1654         X86_MATCH_VFM(INTEL_KABYLAKE_L,         &idle_cpu_skl),
1655         X86_MATCH_VFM(INTEL_KABYLAKE,           &idle_cpu_skl),
1656         X86_MATCH_VFM(INTEL_SKYLAKE_X,          &idle_cpu_skx),
1657         X86_MATCH_VFM(INTEL_ICELAKE_X,          &idle_cpu_icx),
1658         X86_MATCH_VFM(INTEL_ICELAKE_D,          &idle_cpu_icx),
1659         X86_MATCH_VFM(INTEL_ALDERLAKE,          &idle_cpu_adl),
1660         X86_MATCH_VFM(INTEL_ALDERLAKE_L,        &idle_cpu_adl_l),
1661         X86_MATCH_VFM(INTEL_METEORLAKE_L,       &idle_cpu_mtl_l),
1662         X86_MATCH_VFM(INTEL_ATOM_GRACEMONT,     &idle_cpu_gmt),
1663         X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X,   &idle_cpu_spr),
1664         X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X,    &idle_cpu_spr),
1665         X86_MATCH_VFM(INTEL_GRANITERAPIDS_X,    &idle_cpu_gnr),
1666         X86_MATCH_VFM(INTEL_GRANITERAPIDS_D,    &idle_cpu_gnrd),
1667         X86_MATCH_VFM(INTEL_XEON_PHI_KNL,       &idle_cpu_knl),
1668         X86_MATCH_VFM(INTEL_XEON_PHI_KNM,       &idle_cpu_knl),
1669         X86_MATCH_VFM(INTEL_ATOM_GOLDMONT,      &idle_cpu_bxt),
1670         X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &idle_cpu_bxt),
1671         X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D,    &idle_cpu_dnv),
1672         X86_MATCH_VFM(INTEL_ATOM_TREMONT,       &idle_cpu_tmt),
1673         X86_MATCH_VFM(INTEL_ATOM_TREMONT_L,     &idle_cpu_tmt),
1674         X86_MATCH_VFM(INTEL_ATOM_TREMONT_D,     &idle_cpu_snr),
1675         X86_MATCH_VFM(INTEL_ATOM_CRESTMONT,     &idle_cpu_grr),
1676         X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X,   &idle_cpu_srf),
1677         X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X,    &idle_cpu_srf),
1678         {}
1679 };
1680
1681 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1682         X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1683         {}
1684 };
1685
1686 static bool __init intel_idle_max_cstate_reached(int cstate)
1687 {
1688         if (cstate + 1 > max_cstate) {
1689                 pr_info("max_cstate %d reached\n", max_cstate);
1690                 return true;
1691         }
1692         return false;
1693 }
1694
1695 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1696 {
1697         unsigned long eax = flg2MWAIT(state->flags);
1698
1699         if (boot_cpu_has(X86_FEATURE_ARAT))
1700                 return false;
1701
1702         /*
1703          * Switch over to one-shot tick broadcast if the target C-state
1704          * is deeper than C1.
1705          */
1706         return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1707 }
1708
1709 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1710 #include <acpi/processor.h>
1711
1712 static bool no_acpi __read_mostly;
1713 module_param(no_acpi, bool, 0444);
1714 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1715
1716 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1717 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1718 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1719
1720 static bool no_native __read_mostly; /* No effect if no_acpi is set. */
1721 module_param_named(no_native, no_native, bool, 0444);
1722 MODULE_PARM_DESC(no_native, "Ignore cpu specific (native) idle states in lieu of ACPI idle states");
1723
1724 static struct acpi_processor_power acpi_state_table __initdata;
1725
1726 /**
1727  * intel_idle_cst_usable - Check if the _CST information can be used.
1728  *
1729  * Check if all of the C-states listed by _CST in the max_cstate range are
1730  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1731  */
1732 static bool __init intel_idle_cst_usable(void)
1733 {
1734         int cstate, limit;
1735
1736         limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1737                       acpi_state_table.count);
1738
1739         for (cstate = 1; cstate < limit; cstate++) {
1740                 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1741
1742                 if (cx->entry_method != ACPI_CSTATE_FFH)
1743                         return false;
1744         }
1745
1746         return true;
1747 }
1748
1749 static bool __init intel_idle_acpi_cst_extract(void)
1750 {
1751         unsigned int cpu;
1752
1753         if (no_acpi) {
1754                 pr_debug("Not allowed to use ACPI _CST\n");
1755                 return false;
1756         }
1757
1758         for_each_possible_cpu(cpu) {
1759                 struct acpi_processor *pr = per_cpu(processors, cpu);
1760
1761                 if (!pr)
1762                         continue;
1763
1764                 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1765                         continue;
1766
1767                 acpi_state_table.count++;
1768
1769                 if (!intel_idle_cst_usable())
1770                         continue;
1771
1772                 if (!acpi_processor_claim_cst_control())
1773                         break;
1774
1775                 return true;
1776         }
1777
1778         acpi_state_table.count = 0;
1779         pr_debug("ACPI _CST not found or not usable\n");
1780         return false;
1781 }
1782
1783 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1784 {
1785         int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1786
1787         /*
1788          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1789          * the interesting states are ACPI_CSTATE_FFH.
1790          */
1791         for (cstate = 1; cstate < limit; cstate++) {
1792                 struct acpi_processor_cx *cx;
1793                 struct cpuidle_state *state;
1794
1795                 if (intel_idle_max_cstate_reached(cstate - 1))
1796                         break;
1797
1798                 cx = &acpi_state_table.states[cstate];
1799
1800                 state = &drv->states[drv->state_count++];
1801
1802                 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1803                 strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1804                 state->exit_latency = cx->latency;
1805                 /*
1806                  * For C1-type C-states use the same number for both the exit
1807                  * latency and target residency, because that is the case for
1808                  * C1 in the majority of the static C-states tables above.
1809                  * For the other types of C-states, however, set the target
1810                  * residency to 3 times the exit latency which should lead to
1811                  * a reasonable balance between energy-efficiency and
1812                  * performance in the majority of interesting cases.
1813                  */
1814                 state->target_residency = cx->latency;
1815                 if (cx->type > ACPI_STATE_C1)
1816                         state->target_residency *= 3;
1817
1818                 state->flags = MWAIT2flg(cx->address);
1819                 if (cx->type > ACPI_STATE_C2)
1820                         state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1821
1822                 if (disabled_states_mask & BIT(cstate))
1823                         state->flags |= CPUIDLE_FLAG_OFF;
1824
1825                 if (intel_idle_state_needs_timer_stop(state))
1826                         state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1827
1828                 if (cx->type > ACPI_STATE_C1 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1829                         mark_tsc_unstable("TSC halts in idle");
1830
1831                 state->enter = intel_idle;
1832                 state->enter_dead = intel_idle_enter_dead;
1833                 state->enter_s2idle = intel_idle_s2idle;
1834         }
1835 }
1836
1837 static bool __init intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
1838 {
1839         int cstate, limit;
1840
1841         /*
1842          * If there are no _CST C-states, do not disable any C-states by
1843          * default.
1844          */
1845         if (!acpi_state_table.count)
1846                 return false;
1847
1848         limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1849         /*
1850          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1851          * the interesting states are ACPI_CSTATE_FFH.
1852          */
1853         for (cstate = 1; cstate < limit; cstate++) {
1854                 u32 acpi_hint = acpi_state_table.states[cstate].address;
1855                 u32 table_hint = mwait_hint;
1856
1857                 if (flags & CPUIDLE_FLAG_PARTIAL_HINT_MATCH) {
1858                         acpi_hint &= ~MWAIT_SUBSTATE_MASK;
1859                         table_hint &= ~MWAIT_SUBSTATE_MASK;
1860                 }
1861
1862                 if (acpi_hint == table_hint)
1863                         return false;
1864         }
1865         return true;
1866 }
1867
1868 static inline bool ignore_native(void)
1869 {
1870         return no_native && !no_acpi;
1871 }
1872 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1873 #define force_use_acpi  (false)
1874
1875 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1876 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1877 static inline bool intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
1878 {
1879         return false;
1880 }
1881 static inline bool ignore_native(void) { return false; }
1882 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1883
1884 /**
1885  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1886  *
1887  * Tune IVT multi-socket targets.
1888  * Assumption: num_sockets == (max_package_num + 1).
1889  */
1890 static void __init ivt_idle_state_table_update(void)
1891 {
1892         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1893         int cpu, package_num, num_sockets = 1;
1894
1895         for_each_online_cpu(cpu) {
1896                 package_num = topology_physical_package_id(cpu);
1897                 if (package_num + 1 > num_sockets) {
1898                         num_sockets = package_num + 1;
1899
1900                         if (num_sockets > 4) {
1901                                 cpuidle_state_table = ivt_cstates_8s;
1902                                 return;
1903                         }
1904                 }
1905         }
1906
1907         if (num_sockets > 2)
1908                 cpuidle_state_table = ivt_cstates_4s;
1909
1910         /* else, 1 and 2 socket systems use default ivt_cstates */
1911 }
1912
1913 /**
1914  * irtl_2_usec - IRTL to microseconds conversion.
1915  * @irtl: IRTL MSR value.
1916  *
1917  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1918  */
1919 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1920 {
1921         static const unsigned int irtl_ns_units[] __initconst = {
1922                 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1923         };
1924         unsigned long long ns;
1925
1926         if (!irtl)
1927                 return 0;
1928
1929         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1930
1931         return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1932 }
1933
1934 /**
1935  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1936  *
1937  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1938  * definitive maximum latency and use the same value for target_residency.
1939  */
1940 static void __init bxt_idle_state_table_update(void)
1941 {
1942         unsigned long long msr;
1943         unsigned int usec;
1944
1945         rdmsrq(MSR_PKGC6_IRTL, msr);
1946         usec = irtl_2_usec(msr);
1947         if (usec) {
1948                 bxt_cstates[2].exit_latency = usec;
1949                 bxt_cstates[2].target_residency = usec;
1950         }
1951
1952         rdmsrq(MSR_PKGC7_IRTL, msr);
1953         usec = irtl_2_usec(msr);
1954         if (usec) {
1955                 bxt_cstates[3].exit_latency = usec;
1956                 bxt_cstates[3].target_residency = usec;
1957         }
1958
1959         rdmsrq(MSR_PKGC8_IRTL, msr);
1960         usec = irtl_2_usec(msr);
1961         if (usec) {
1962                 bxt_cstates[4].exit_latency = usec;
1963                 bxt_cstates[4].target_residency = usec;
1964         }
1965
1966         rdmsrq(MSR_PKGC9_IRTL, msr);
1967         usec = irtl_2_usec(msr);
1968         if (usec) {
1969                 bxt_cstates[5].exit_latency = usec;
1970                 bxt_cstates[5].target_residency = usec;
1971         }
1972
1973         rdmsrq(MSR_PKGC10_IRTL, msr);
1974         usec = irtl_2_usec(msr);
1975         if (usec) {
1976                 bxt_cstates[6].exit_latency = usec;
1977                 bxt_cstates[6].target_residency = usec;
1978         }
1979
1980 }
1981
1982 /**
1983  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1984  *
1985  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1986  */
1987 static void __init sklh_idle_state_table_update(void)
1988 {
1989         unsigned long long msr;
1990         unsigned int eax, ebx, ecx, edx;
1991
1992
1993         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1994         if (max_cstate <= 7)
1995                 return;
1996
1997         /* if PC10 not present in CPUID.MWAIT.EDX */
1998         if ((mwait_substates & (0xF << 28)) == 0)
1999                 return;
2000
2001         rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
2002
2003         /* PC10 is not enabled in PKG C-state limit */
2004         if ((msr & 0xF) != 8)
2005                 return;
2006
2007         ecx = 0;
2008         cpuid(7, &eax, &ebx, &ecx, &edx);
2009
2010         /* if SGX is present */
2011         if (ebx & (1 << 2)) {
2012
2013                 rdmsrq(MSR_IA32_FEAT_CTL, msr);
2014
2015                 /* if SGX is enabled */
2016                 if (msr & (1 << 18))
2017                         return;
2018         }
2019
2020         skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
2021         skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
2022 }
2023
2024 /**
2025  * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
2026  * idle states table.
2027  */
2028 static void __init skx_idle_state_table_update(void)
2029 {
2030         unsigned long long msr;
2031
2032         rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
2033
2034         /*
2035          * 000b: C0/C1 (no package C-state support)
2036          * 001b: C2
2037          * 010b: C6 (non-retention)
2038          * 011b: C6 (retention)
2039          * 111b: No Package C state limits.
2040          */
2041         if ((msr & 0x7) < 2) {
2042                 /*
2043                  * Uses the CC6 + PC0 latency and 3 times of
2044                  * latency for target_residency if the PC6
2045                  * is disabled in BIOS. This is consistent
2046                  * with how intel_idle driver uses _CST
2047                  * to set the target_residency.
2048                  */
2049                 skx_cstates[2].exit_latency = 92;
2050                 skx_cstates[2].target_residency = 276;
2051         }
2052 }
2053
2054 /**
2055  * adl_idle_state_table_update - Adjust AlderLake idle states table.
2056  */
2057 static void __init adl_idle_state_table_update(void)
2058 {
2059         /* Check if user prefers C1 over C1E. */
2060         if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
2061                 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
2062                 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
2063
2064                 /* Disable C1E by clearing the "C1E promotion" bit. */
2065                 c1e_promotion = C1E_PROMOTION_DISABLE;
2066                 return;
2067         }
2068
2069         /* Make sure C1E is enabled by default */
2070         c1e_promotion = C1E_PROMOTION_ENABLE;
2071 }
2072
2073 /**
2074  * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
2075  */
2076 static void __init spr_idle_state_table_update(void)
2077 {
2078         unsigned long long msr;
2079
2080         /*
2081          * By default, the C6 state assumes the worst-case scenario of package
2082          * C6. However, if PC6 is disabled, we update the numbers to match
2083          * core C6.
2084          */
2085         rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
2086
2087         /* Limit value 2 and above allow for PC6. */
2088         if ((msr & 0x7) < 2) {
2089                 spr_cstates[2].exit_latency = 190;
2090                 spr_cstates[2].target_residency = 600;
2091         }
2092 }
2093
2094 /**
2095  * byt_cht_auto_demotion_disable - Disable Bay/Cherry Trail auto-demotion.
2096  */
2097 static void __init byt_cht_auto_demotion_disable(void)
2098 {
2099         wrmsrq(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
2100         wrmsrq(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
2101 }
2102
2103 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
2104 {
2105         unsigned int mwait_cstate = (MWAIT_HINT2CSTATE(mwait_hint) + 1) &
2106                                         MWAIT_CSTATE_MASK;
2107         unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
2108                                         MWAIT_SUBSTATE_MASK;
2109
2110         /* Ignore the C-state if there are NO sub-states in CPUID for it. */
2111         if (num_substates == 0)
2112                 return false;
2113
2114         if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
2115                 mark_tsc_unstable("TSC halts in idle states deeper than C2");
2116
2117         return true;
2118 }
2119
2120 static void state_update_enter_method(struct cpuidle_state *state, int cstate)
2121 {
2122         if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
2123                 /*
2124                  * Combining with XSTATE with IBRS or IRQ_ENABLE flags
2125                  * is not currently supported but this driver.
2126                  */
2127                 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
2128                 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
2129                 state->enter = intel_idle_xstate;
2130                 return;
2131         }
2132
2133         if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
2134                         ((state->flags & CPUIDLE_FLAG_IBRS) || ibrs_off)) {
2135                 /*
2136                  * IBRS mitigation requires that C-states are entered
2137                  * with interrupts disabled.
2138                  */
2139                 if (ibrs_off && (state->flags & CPUIDLE_FLAG_IRQ_ENABLE))
2140                         state->flags &= ~CPUIDLE_FLAG_IRQ_ENABLE;
2141                 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
2142                 state->enter = intel_idle_ibrs;
2143                 return;
2144         }
2145
2146         if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
2147                 state->enter = intel_idle_irq;
2148                 return;
2149         }
2150
2151         if (force_irq_on) {
2152                 pr_info("forced intel_idle_irq for state %d\n", cstate);
2153                 state->enter = intel_idle_irq;
2154         }
2155 }
2156
2157 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
2158 {
2159         int cstate;
2160
2161         switch (boot_cpu_data.x86_vfm) {
2162         case INTEL_IVYBRIDGE_X:
2163                 ivt_idle_state_table_update();
2164                 break;
2165         case INTEL_ATOM_GOLDMONT:
2166         case INTEL_ATOM_GOLDMONT_PLUS:
2167                 bxt_idle_state_table_update();
2168                 break;
2169         case INTEL_SKYLAKE:
2170                 sklh_idle_state_table_update();
2171                 break;
2172         case INTEL_SKYLAKE_X:
2173                 skx_idle_state_table_update();
2174                 break;
2175         case INTEL_SAPPHIRERAPIDS_X:
2176         case INTEL_EMERALDRAPIDS_X:
2177                 spr_idle_state_table_update();
2178                 break;
2179         case INTEL_ALDERLAKE:
2180         case INTEL_ALDERLAKE_L:
2181         case INTEL_ATOM_GRACEMONT:
2182                 adl_idle_state_table_update();
2183                 break;
2184         case INTEL_ATOM_SILVERMONT:
2185         case INTEL_ATOM_AIRMONT:
2186                 byt_cht_auto_demotion_disable();
2187                 break;
2188         }
2189
2190         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
2191                 struct cpuidle_state *state;
2192                 unsigned int mwait_hint;
2193
2194                 if (intel_idle_max_cstate_reached(cstate))
2195                         break;
2196
2197                 if (!cpuidle_state_table[cstate].enter &&
2198                     !cpuidle_state_table[cstate].enter_s2idle)
2199                         break;
2200
2201                 if (!cpuidle_state_table[cstate].enter_dead)
2202                         cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead;
2203
2204                 /* If marked as unusable, skip this state. */
2205                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
2206                         pr_debug("state %s is disabled\n",
2207                                  cpuidle_state_table[cstate].name);
2208                         continue;
2209                 }
2210
2211                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
2212                 if (!intel_idle_verify_cstate(mwait_hint))
2213                         continue;
2214
2215                 /* Structure copy. */
2216                 drv->states[drv->state_count] = cpuidle_state_table[cstate];
2217                 state = &drv->states[drv->state_count];
2218
2219                 state_update_enter_method(state, cstate);
2220
2221
2222                 if ((disabled_states_mask & BIT(drv->state_count)) ||
2223                     ((icpu->use_acpi || force_use_acpi) &&
2224                      intel_idle_off_by_default(state->flags, mwait_hint) &&
2225                      !(state->flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
2226                         state->flags |= CPUIDLE_FLAG_OFF;
2227
2228                 if (intel_idle_state_needs_timer_stop(state))
2229                         state->flags |= CPUIDLE_FLAG_TIMER_STOP;
2230
2231                 drv->state_count++;
2232         }
2233 }
2234
2235 /**
2236  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
2237  * @drv: cpuidle driver structure to initialize.
2238  */
2239 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
2240 {
2241         cpuidle_poll_state_init(drv);
2242
2243         if (disabled_states_mask & BIT(0))
2244                 drv->states[0].flags |= CPUIDLE_FLAG_OFF;
2245
2246         drv->state_count = 1;
2247
2248         if (icpu && icpu->state_table)
2249                 intel_idle_init_cstates_icpu(drv);
2250         else
2251                 intel_idle_init_cstates_acpi(drv);
2252 }
2253
2254 static void auto_demotion_disable(void)
2255 {
2256         unsigned long long msr_bits;
2257
2258         rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
2259         msr_bits &= ~auto_demotion_disable_flags;
2260         wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
2261 }
2262
2263 static void c1e_promotion_enable(void)
2264 {
2265         unsigned long long msr_bits;
2266
2267         rdmsrq(MSR_IA32_POWER_CTL, msr_bits);
2268         msr_bits |= 0x2;
2269         wrmsrq(MSR_IA32_POWER_CTL, msr_bits);
2270 }
2271
2272 static void c1e_promotion_disable(void)
2273 {
2274         unsigned long long msr_bits;
2275
2276         rdmsrq(MSR_IA32_POWER_CTL, msr_bits);
2277         msr_bits &= ~0x2;
2278         wrmsrq(MSR_IA32_POWER_CTL, msr_bits);
2279 }
2280
2281 /**
2282  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
2283  * @cpu: CPU to initialize.
2284  *
2285  * Register a cpuidle device object for @cpu and update its MSRs in accordance
2286  * with the processor model flags.
2287  */
2288 static int intel_idle_cpu_init(unsigned int cpu)
2289 {
2290         struct cpuidle_device *dev;
2291
2292         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
2293         dev->cpu = cpu;
2294
2295         if (cpuidle_register_device(dev)) {
2296                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
2297                 return -EIO;
2298         }
2299
2300         if (auto_demotion_disable_flags)
2301                 auto_demotion_disable();
2302
2303         if (c1e_promotion == C1E_PROMOTION_ENABLE)
2304                 c1e_promotion_enable();
2305         else if (c1e_promotion == C1E_PROMOTION_DISABLE)
2306                 c1e_promotion_disable();
2307
2308         return 0;
2309 }
2310
2311 static int intel_idle_cpu_online(unsigned int cpu)
2312 {
2313         struct cpuidle_device *dev;
2314
2315         if (!boot_cpu_has(X86_FEATURE_ARAT))
2316                 tick_broadcast_enable();
2317
2318         /*
2319          * Some systems can hotplug a cpu at runtime after
2320          * the kernel has booted, we have to initialize the
2321          * driver in this case
2322          */
2323         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
2324         if (!dev->registered)
2325                 return intel_idle_cpu_init(cpu);
2326
2327         return 0;
2328 }
2329
2330 /**
2331  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
2332  */
2333 static void __init intel_idle_cpuidle_devices_uninit(void)
2334 {
2335         int i;
2336
2337         for_each_online_cpu(i)
2338                 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
2339 }
2340
2341 static void intel_c1_demotion_toggle(void *enable)
2342 {
2343         unsigned long long msr_val;
2344
2345         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2346         /*
2347          * Enable/disable C1 undemotion along with C1 demotion, as this is the
2348          * most sensible configuration in general.
2349          */
2350         if (enable)
2351                 msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE;
2352         else
2353                 msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE);
2354         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2355 }
2356
2357 static ssize_t intel_c1_demotion_store(struct device *dev,
2358                                        struct device_attribute *attr,
2359                                        const char *buf, size_t count)
2360 {
2361         bool enable;
2362         int err;
2363
2364         err = kstrtobool(buf, &enable);
2365         if (err)
2366                 return err;
2367
2368         mutex_lock(&c1_demotion_mutex);
2369         /* Enable/disable C1 demotion on all CPUs */
2370         on_each_cpu(intel_c1_demotion_toggle, (void *)enable, 1);
2371         mutex_unlock(&c1_demotion_mutex);
2372
2373         return count;
2374 }
2375
2376 static ssize_t intel_c1_demotion_show(struct device *dev,
2377                                       struct device_attribute *attr, char *buf)
2378 {
2379         unsigned long long msr_val;
2380
2381         /*
2382          * Read the MSR value for a CPU and assume it is the same for all CPUs. Any other
2383          * configuration would be a BIOS bug.
2384          */
2385         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2386         return sysfs_emit(buf, "%d\n", !!(msr_val & NHM_C1_AUTO_DEMOTE));
2387 }
2388 static DEVICE_ATTR_RW(intel_c1_demotion);
2389
2390 static int __init intel_idle_sysfs_init(void)
2391 {
2392         int err;
2393
2394         if (!c1_demotion_supported)
2395                 return 0;
2396
2397         sysfs_root = bus_get_dev_root(&cpu_subsys);
2398         if (!sysfs_root)
2399                 return 0;
2400
2401         err = sysfs_add_file_to_group(&sysfs_root->kobj,
2402                                       &dev_attr_intel_c1_demotion.attr,
2403                                       "cpuidle");
2404         if (err) {
2405                 put_device(sysfs_root);
2406                 return err;
2407         }
2408
2409         return 0;
2410 }
2411
2412 static void __init intel_idle_sysfs_uninit(void)
2413 {
2414         if (!sysfs_root)
2415                 return;
2416
2417         sysfs_remove_file_from_group(&sysfs_root->kobj,
2418                                      &dev_attr_intel_c1_demotion.attr,
2419                                      "cpuidle");
2420         put_device(sysfs_root);
2421 }
2422
2423 static int __init intel_idle_init(void)
2424 {
2425         const struct x86_cpu_id *id;
2426         unsigned int eax, ebx, ecx;
2427         int retval;
2428
2429         /* Do not load intel_idle at all for now if idle= is passed */
2430         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
2431                 return -ENODEV;
2432
2433         if (max_cstate == 0) {
2434                 pr_debug("disabled\n");
2435                 return -EPERM;
2436         }
2437
2438         id = x86_match_cpu(intel_idle_ids);
2439         if (id) {
2440                 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
2441                         pr_debug("Please enable MWAIT in BIOS SETUP\n");
2442                         return -ENODEV;
2443                 }
2444         } else {
2445                 id = x86_match_cpu(intel_mwait_ids);
2446                 if (!id)
2447                         return -ENODEV;
2448         }
2449
2450         cpuid(CPUID_LEAF_MWAIT, &eax, &ebx, &ecx, &mwait_substates);
2451
2452         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
2453             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
2454             !mwait_substates)
2455                         return -ENODEV;
2456
2457         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
2458
2459         icpu = (const struct idle_cpu *)id->driver_data;
2460         if (icpu && ignore_native()) {
2461                 pr_debug("ignoring native CPU idle states\n");
2462                 icpu = NULL;
2463         }
2464         if (icpu) {
2465                 if (icpu->state_table)
2466                         cpuidle_state_table = icpu->state_table;
2467                 else if (!intel_idle_acpi_cst_extract())
2468                         return -ENODEV;
2469
2470                 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
2471                 if (icpu->disable_promotion_to_c1e)
2472                         c1e_promotion = C1E_PROMOTION_DISABLE;
2473                 if (icpu->c1_demotion_supported)
2474                         c1_demotion_supported = true;
2475                 if (icpu->use_acpi || force_use_acpi)
2476                         intel_idle_acpi_cst_extract();
2477         } else if (!intel_idle_acpi_cst_extract()) {
2478                 return -ENODEV;
2479         }
2480
2481         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
2482                  boot_cpu_data.x86_model);
2483
2484         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
2485         if (!intel_idle_cpuidle_devices)
2486                 return -ENOMEM;
2487
2488         retval = intel_idle_sysfs_init();
2489         if (retval)
2490                 pr_warn("failed to initialized sysfs");
2491
2492         intel_idle_cpuidle_driver_init(&intel_idle_driver);
2493
2494         retval = cpuidle_register_driver(&intel_idle_driver);
2495         if (retval) {
2496                 struct cpuidle_driver *drv = cpuidle_get_driver();
2497                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
2498                        drv ? drv->name : "none");
2499                 goto init_driver_fail;
2500         }
2501
2502         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
2503                                    intel_idle_cpu_online, NULL);
2504         if (retval < 0)
2505                 goto hp_setup_fail;
2506
2507         pr_debug("Local APIC timer is reliable in %s\n",
2508                  boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2509
2510         arch_cpu_rescan_dead_smt_siblings();
2511
2512         return 0;
2513
2514 hp_setup_fail:
2515         intel_idle_cpuidle_devices_uninit();
2516         cpuidle_unregister_driver(&intel_idle_driver);
2517 init_driver_fail:
2518         intel_idle_sysfs_uninit();
2519         free_percpu(intel_idle_cpuidle_devices);
2520         return retval;
2521
2522 }
2523 subsys_initcall_sync(intel_idle_init);
2524
2525 /*
2526  * We are not really modular, but we used to support that.  Meaning we also
2527  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2528  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2529  * is the easiest way (currently) to continue doing that.
2530  */
2531 module_param(max_cstate, int, 0444);
2532 /*
2533  * The positions of the bits that are set in this number are the indices of the
2534  * idle states to be disabled by default (as reflected by the names of the
2535  * corresponding idle state directories in sysfs, "state0", "state1" ...
2536  * "state<i>" ..., where <i> is the index of the given state).
2537  */
2538 module_param_named(states_off, disabled_states_mask, uint, 0444);
2539 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2540 /*
2541  * Some platforms come with mutually exclusive C-states, so that if one is
2542  * enabled, the other C-states must not be used. Example: C1 and C1E on
2543  * Sapphire Rapids platform. This parameter allows for selecting the
2544  * preferred C-states among the groups of mutually exclusive C-states - the
2545  * selected C-states will be registered, the other C-states from the mutually
2546  * exclusive group won't be registered. If the platform has no mutually
2547  * exclusive C-states, this parameter has no effect.
2548  */
2549 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
2550 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");
2551 /*
2552  * Debugging option that forces the driver to enter all C-states with
2553  * interrupts enabled. Does not apply to C-states with
2554  * 'CPUIDLE_FLAG_INIT_XSTATE' and 'CPUIDLE_FLAG_IBRS' flags.
2555  */
2556 module_param(force_irq_on, bool, 0444);
2557 /*
2558  * Force the disabling of IBRS when X86_FEATURE_KERNEL_IBRS is on and
2559  * CPUIDLE_FLAG_IRQ_ENABLE isn't set.
2560  */
2561 module_param(ibrs_off, bool, 0444);
2562 MODULE_PARM_DESC(ibrs_off, "Disable IBRS when idle");