Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
[linux-2.6-block.git] / arch / x86 / kernel / itmt.c
CommitLineData
5e76b2ab
TC
1/*
2 * itmt.c: Support Intel Turbo Boost Max Technology 3.0
3 *
4 * (C) Copyright 2016 Intel Corporation
5 * Author: Tim Chen <tim.c.chen@linux.intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 *
12 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
13 * the maximum turbo frequencies of some cores in a CPU package may be
14 * higher than for the other cores in the same package. In that case,
15 * better performance can be achieved by making the scheduler prefer
16 * to run tasks on the CPUs with higher max turbo frequencies.
17 *
18 * This file provides functions and data structures for enabling the
19 * scheduler to favor scheduling on cores can be boosted to a higher
20 * frequency under ITMT.
21 */
22
23#include <linux/sched.h>
24#include <linux/cpumask.h>
25#include <linux/cpuset.h>
a293b395 26#include <linux/mutex.h>
5e76b2ab
TC
27#include <linux/sysctl.h>
28#include <linux/nodemask.h>
29
30static DEFINE_MUTEX(itmt_update_mutex);
31DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
32
33/* Boolean to track if system has ITMT capabilities */
34static bool __read_mostly sched_itmt_capable;
35
f9793e34
TC
36/*
37 * Boolean to control whether we want to move processes to cpu capable
38 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
39 * Technology 3.0.
40 *
41 * It can be set via /proc/sys/kernel/sched_itmt_enabled
42 */
43unsigned int __read_mostly sysctl_sched_itmt_enabled;
44
45static int sched_itmt_update_handler(struct ctl_table *table, int write,
46 void __user *buffer, size_t *lenp,
47 loff_t *ppos)
48{
49 unsigned int old_sysctl;
50 int ret;
51
52 mutex_lock(&itmt_update_mutex);
53
54 if (!sched_itmt_capable) {
55 mutex_unlock(&itmt_update_mutex);
56 return -EINVAL;
57 }
58
59 old_sysctl = sysctl_sched_itmt_enabled;
60 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
61
62 if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
63 x86_topology_update = true;
64 rebuild_sched_domains();
65 }
66
67 mutex_unlock(&itmt_update_mutex);
68
69 return ret;
70}
71
72static unsigned int zero;
73static unsigned int one = 1;
74static struct ctl_table itmt_kern_table[] = {
75 {
76 .procname = "sched_itmt_enabled",
77 .data = &sysctl_sched_itmt_enabled,
78 .maxlen = sizeof(unsigned int),
79 .mode = 0644,
80 .proc_handler = sched_itmt_update_handler,
81 .extra1 = &zero,
82 .extra2 = &one,
83 },
84 {}
85};
86
87static struct ctl_table itmt_root_table[] = {
88 {
89 .procname = "kernel",
90 .mode = 0555,
91 .child = itmt_kern_table,
92 },
93 {}
94};
95
96static struct ctl_table_header *itmt_sysctl_header;
97
5e76b2ab
TC
98/**
99 * sched_set_itmt_support() - Indicate platform supports ITMT
100 *
101 * This function is used by the OS to indicate to scheduler that the platform
102 * is capable of supporting the ITMT feature.
103 *
104 * The current scheme has the pstate driver detects if the system
105 * is ITMT capable and call sched_set_itmt_support.
106 *
107 * This must be done only after sched_set_itmt_core_prio
108 * has been called to set the cpus' priorities.
f9793e34
TC
109 * It must not be called with cpu hot plug lock
110 * held as we need to acquire the lock to rebuild sched domains
111 * later.
112 *
113 * Return: 0 on success
5e76b2ab 114 */
f9793e34 115int sched_set_itmt_support(void)
5e76b2ab
TC
116{
117 mutex_lock(&itmt_update_mutex);
118
f9793e34
TC
119 if (sched_itmt_capable) {
120 mutex_unlock(&itmt_update_mutex);
121 return 0;
122 }
123
124 itmt_sysctl_header = register_sysctl_table(itmt_root_table);
125 if (!itmt_sysctl_header) {
126 mutex_unlock(&itmt_update_mutex);
127 return -ENOMEM;
128 }
129
5e76b2ab
TC
130 sched_itmt_capable = true;
131
f9793e34
TC
132 sysctl_sched_itmt_enabled = 1;
133
02cfdc95
TC
134 x86_topology_update = true;
135 rebuild_sched_domains();
f9793e34 136
5e76b2ab 137 mutex_unlock(&itmt_update_mutex);
f9793e34
TC
138
139 return 0;
5e76b2ab
TC
140}
141
142/**
143 * sched_clear_itmt_support() - Revoke platform's support of ITMT
144 *
145 * This function is used by the OS to indicate that it has
146 * revoked the platform's support of ITMT feature.
147 *
f9793e34
TC
148 * It must not be called with cpu hot plug lock
149 * held as we need to acquire the lock to rebuild sched domains
150 * later.
5e76b2ab
TC
151 */
152void sched_clear_itmt_support(void)
153{
154 mutex_lock(&itmt_update_mutex);
155
f9793e34
TC
156 if (!sched_itmt_capable) {
157 mutex_unlock(&itmt_update_mutex);
158 return;
159 }
5e76b2ab
TC
160 sched_itmt_capable = false;
161
f9793e34
TC
162 if (itmt_sysctl_header) {
163 unregister_sysctl_table(itmt_sysctl_header);
164 itmt_sysctl_header = NULL;
165 }
166
167 if (sysctl_sched_itmt_enabled) {
168 /* disable sched_itmt if we are no longer ITMT capable */
169 sysctl_sched_itmt_enabled = 0;
170 x86_topology_update = true;
171 rebuild_sched_domains();
172 }
173
5e76b2ab
TC
174 mutex_unlock(&itmt_update_mutex);
175}
176
177int arch_asym_cpu_priority(int cpu)
178{
179 return per_cpu(sched_core_priority, cpu);
180}
181
182/**
183 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
184 * @prio: Priority of cpu core
185 * @core_cpu: The cpu number associated with the core
186 *
187 * The pstate driver will find out the max boost frequency
188 * and call this function to set a priority proportional
189 * to the max boost frequency. CPU with higher boost
190 * frequency will receive higher priority.
191 *
192 * No need to rebuild sched domain after updating
193 * the CPU priorities. The sched domains have no
194 * dependency on CPU priorities.
195 */
196void sched_set_itmt_core_prio(int prio, int core_cpu)
197{
198 int cpu, i = 1;
199
200 for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
201 int smt_prio;
202
203 /*
204 * Ensure that the siblings are moved to the end
205 * of the priority chain and only used when
206 * all other high priority cpus are out of capacity.
207 */
208 smt_prio = prio * smp_num_siblings / i;
209 per_cpu(sched_core_priority, cpu) = smt_prio;
210 i++;
211 }
212}