Commit | Line | Data |
---|---|---|
5e76b2ab TC |
1 | /* |
2 | * itmt.c: Support Intel Turbo Boost Max Technology 3.0 | |
3 | * | |
4 | * (C) Copyright 2016 Intel Corporation | |
5 | * Author: Tim Chen <tim.c.chen@linux.intel.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation; version 2 | |
10 | * of the License. | |
11 | * | |
12 | * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), | |
13 | * the maximum turbo frequencies of some cores in a CPU package may be | |
14 | * higher than for the other cores in the same package. In that case, | |
15 | * better performance can be achieved by making the scheduler prefer | |
16 | * to run tasks on the CPUs with higher max turbo frequencies. | |
17 | * | |
18 | * This file provides functions and data structures for enabling the | |
19 | * scheduler to favor scheduling on cores can be boosted to a higher | |
20 | * frequency under ITMT. | |
21 | */ | |
22 | ||
23 | #include <linux/sched.h> | |
24 | #include <linux/cpumask.h> | |
25 | #include <linux/cpuset.h> | |
a293b395 | 26 | #include <linux/mutex.h> |
5e76b2ab TC |
27 | #include <linux/sched.h> |
28 | #include <linux/sysctl.h> | |
29 | #include <linux/nodemask.h> | |
30 | ||
31 | static DEFINE_MUTEX(itmt_update_mutex); | |
32 | DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); | |
33 | ||
34 | /* Boolean to track if system has ITMT capabilities */ | |
35 | static bool __read_mostly sched_itmt_capable; | |
36 | ||
f9793e34 TC |
37 | /* |
38 | * Boolean to control whether we want to move processes to cpu capable | |
39 | * of higher turbo frequency for cpus supporting Intel Turbo Boost Max | |
40 | * Technology 3.0. | |
41 | * | |
42 | * It can be set via /proc/sys/kernel/sched_itmt_enabled | |
43 | */ | |
44 | unsigned int __read_mostly sysctl_sched_itmt_enabled; | |
45 | ||
46 | static int sched_itmt_update_handler(struct ctl_table *table, int write, | |
47 | void __user *buffer, size_t *lenp, | |
48 | loff_t *ppos) | |
49 | { | |
50 | unsigned int old_sysctl; | |
51 | int ret; | |
52 | ||
53 | mutex_lock(&itmt_update_mutex); | |
54 | ||
55 | if (!sched_itmt_capable) { | |
56 | mutex_unlock(&itmt_update_mutex); | |
57 | return -EINVAL; | |
58 | } | |
59 | ||
60 | old_sysctl = sysctl_sched_itmt_enabled; | |
61 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | |
62 | ||
63 | if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { | |
64 | x86_topology_update = true; | |
65 | rebuild_sched_domains(); | |
66 | } | |
67 | ||
68 | mutex_unlock(&itmt_update_mutex); | |
69 | ||
70 | return ret; | |
71 | } | |
72 | ||
73 | static unsigned int zero; | |
74 | static unsigned int one = 1; | |
75 | static struct ctl_table itmt_kern_table[] = { | |
76 | { | |
77 | .procname = "sched_itmt_enabled", | |
78 | .data = &sysctl_sched_itmt_enabled, | |
79 | .maxlen = sizeof(unsigned int), | |
80 | .mode = 0644, | |
81 | .proc_handler = sched_itmt_update_handler, | |
82 | .extra1 = &zero, | |
83 | .extra2 = &one, | |
84 | }, | |
85 | {} | |
86 | }; | |
87 | ||
88 | static struct ctl_table itmt_root_table[] = { | |
89 | { | |
90 | .procname = "kernel", | |
91 | .mode = 0555, | |
92 | .child = itmt_kern_table, | |
93 | }, | |
94 | {} | |
95 | }; | |
96 | ||
97 | static struct ctl_table_header *itmt_sysctl_header; | |
98 | ||
5e76b2ab TC |
99 | /** |
100 | * sched_set_itmt_support() - Indicate platform supports ITMT | |
101 | * | |
102 | * This function is used by the OS to indicate to scheduler that the platform | |
103 | * is capable of supporting the ITMT feature. | |
104 | * | |
105 | * The current scheme has the pstate driver detects if the system | |
106 | * is ITMT capable and call sched_set_itmt_support. | |
107 | * | |
108 | * This must be done only after sched_set_itmt_core_prio | |
109 | * has been called to set the cpus' priorities. | |
f9793e34 TC |
110 | * It must not be called with cpu hot plug lock |
111 | * held as we need to acquire the lock to rebuild sched domains | |
112 | * later. | |
113 | * | |
114 | * Return: 0 on success | |
5e76b2ab | 115 | */ |
f9793e34 | 116 | int sched_set_itmt_support(void) |
5e76b2ab TC |
117 | { |
118 | mutex_lock(&itmt_update_mutex); | |
119 | ||
f9793e34 TC |
120 | if (sched_itmt_capable) { |
121 | mutex_unlock(&itmt_update_mutex); | |
122 | return 0; | |
123 | } | |
124 | ||
125 | itmt_sysctl_header = register_sysctl_table(itmt_root_table); | |
126 | if (!itmt_sysctl_header) { | |
127 | mutex_unlock(&itmt_update_mutex); | |
128 | return -ENOMEM; | |
129 | } | |
130 | ||
5e76b2ab TC |
131 | sched_itmt_capable = true; |
132 | ||
f9793e34 TC |
133 | sysctl_sched_itmt_enabled = 1; |
134 | ||
02cfdc95 TC |
135 | x86_topology_update = true; |
136 | rebuild_sched_domains(); | |
f9793e34 | 137 | |
5e76b2ab | 138 | mutex_unlock(&itmt_update_mutex); |
f9793e34 TC |
139 | |
140 | return 0; | |
5e76b2ab TC |
141 | } |
142 | ||
143 | /** | |
144 | * sched_clear_itmt_support() - Revoke platform's support of ITMT | |
145 | * | |
146 | * This function is used by the OS to indicate that it has | |
147 | * revoked the platform's support of ITMT feature. | |
148 | * | |
f9793e34 TC |
149 | * It must not be called with cpu hot plug lock |
150 | * held as we need to acquire the lock to rebuild sched domains | |
151 | * later. | |
5e76b2ab TC |
152 | */ |
153 | void sched_clear_itmt_support(void) | |
154 | { | |
155 | mutex_lock(&itmt_update_mutex); | |
156 | ||
f9793e34 TC |
157 | if (!sched_itmt_capable) { |
158 | mutex_unlock(&itmt_update_mutex); | |
159 | return; | |
160 | } | |
5e76b2ab TC |
161 | sched_itmt_capable = false; |
162 | ||
f9793e34 TC |
163 | if (itmt_sysctl_header) { |
164 | unregister_sysctl_table(itmt_sysctl_header); | |
165 | itmt_sysctl_header = NULL; | |
166 | } | |
167 | ||
168 | if (sysctl_sched_itmt_enabled) { | |
169 | /* disable sched_itmt if we are no longer ITMT capable */ | |
170 | sysctl_sched_itmt_enabled = 0; | |
171 | x86_topology_update = true; | |
172 | rebuild_sched_domains(); | |
173 | } | |
174 | ||
5e76b2ab TC |
175 | mutex_unlock(&itmt_update_mutex); |
176 | } | |
177 | ||
178 | int arch_asym_cpu_priority(int cpu) | |
179 | { | |
180 | return per_cpu(sched_core_priority, cpu); | |
181 | } | |
182 | ||
183 | /** | |
184 | * sched_set_itmt_core_prio() - Set CPU priority based on ITMT | |
185 | * @prio: Priority of cpu core | |
186 | * @core_cpu: The cpu number associated with the core | |
187 | * | |
188 | * The pstate driver will find out the max boost frequency | |
189 | * and call this function to set a priority proportional | |
190 | * to the max boost frequency. CPU with higher boost | |
191 | * frequency will receive higher priority. | |
192 | * | |
193 | * No need to rebuild sched domain after updating | |
194 | * the CPU priorities. The sched domains have no | |
195 | * dependency on CPU priorities. | |
196 | */ | |
197 | void sched_set_itmt_core_prio(int prio, int core_cpu) | |
198 | { | |
199 | int cpu, i = 1; | |
200 | ||
201 | for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { | |
202 | int smt_prio; | |
203 | ||
204 | /* | |
205 | * Ensure that the siblings are moved to the end | |
206 | * of the priority chain and only used when | |
207 | * all other high priority cpus are out of capacity. | |
208 | */ | |
209 | smt_prio = prio * smp_num_siblings / i; | |
210 | per_cpu(sched_core_priority, cpu) = smt_prio; | |
211 | i++; | |
212 | } | |
213 | } |