Merge tag 'vfio-v6.4-rc1' of https://github.com/awilliam/linux-vfio
[linux-block.git] / arch / x86 / kernel / itmt.c
CommitLineData
b886d83c 1// SPDX-License-Identifier: GPL-2.0-only
5e76b2ab
TC
2/*
3 * itmt.c: Support Intel Turbo Boost Max Technology 3.0
4 *
5 * (C) Copyright 2016 Intel Corporation
6 * Author: Tim Chen <tim.c.chen@linux.intel.com>
7 *
5e76b2ab
TC
8 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
9 * the maximum turbo frequencies of some cores in a CPU package may be
10 * higher than for the other cores in the same package. In that case,
11 * better performance can be achieved by making the scheduler prefer
12 * to run tasks on the CPUs with higher max turbo frequencies.
13 *
14 * This file provides functions and data structures for enabling the
15 * scheduler to favor scheduling on cores can be boosted to a higher
16 * frequency under ITMT.
17 */
18
19#include <linux/sched.h>
20#include <linux/cpumask.h>
21#include <linux/cpuset.h>
a293b395 22#include <linux/mutex.h>
5e76b2ab
TC
23#include <linux/sysctl.h>
24#include <linux/nodemask.h>
25
26static DEFINE_MUTEX(itmt_update_mutex);
27DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
28
29/* Boolean to track if system has ITMT capabilities */
30static bool __read_mostly sched_itmt_capable;
31
f9793e34
TC
32/*
33 * Boolean to control whether we want to move processes to cpu capable
34 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
35 * Technology 3.0.
36 *
37 * It can be set via /proc/sys/kernel/sched_itmt_enabled
38 */
39unsigned int __read_mostly sysctl_sched_itmt_enabled;
40
41static int sched_itmt_update_handler(struct ctl_table *table, int write,
32927393 42 void *buffer, size_t *lenp, loff_t *ppos)
f9793e34
TC
43{
44 unsigned int old_sysctl;
45 int ret;
46
47 mutex_lock(&itmt_update_mutex);
48
49 if (!sched_itmt_capable) {
50 mutex_unlock(&itmt_update_mutex);
51 return -EINVAL;
52 }
53
54 old_sysctl = sysctl_sched_itmt_enabled;
55 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
56
57 if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
58 x86_topology_update = true;
59 rebuild_sched_domains();
60 }
61
62 mutex_unlock(&itmt_update_mutex);
63
64 return ret;
65}
66
f9793e34
TC
67static struct ctl_table itmt_kern_table[] = {
68 {
69 .procname = "sched_itmt_enabled",
70 .data = &sysctl_sched_itmt_enabled,
71 .maxlen = sizeof(unsigned int),
72 .mode = 0644,
73 .proc_handler = sched_itmt_update_handler,
eec4844f
MC
74 .extra1 = SYSCTL_ZERO,
75 .extra2 = SYSCTL_ONE,
f9793e34
TC
76 },
77 {}
78};
79
f9793e34
TC
80static struct ctl_table_header *itmt_sysctl_header;
81
5e76b2ab
TC
82/**
83 * sched_set_itmt_support() - Indicate platform supports ITMT
84 *
85 * This function is used by the OS to indicate to scheduler that the platform
86 * is capable of supporting the ITMT feature.
87 *
88 * The current scheme has the pstate driver detects if the system
89 * is ITMT capable and call sched_set_itmt_support.
90 *
91 * This must be done only after sched_set_itmt_core_prio
92 * has been called to set the cpus' priorities.
f9793e34
TC
93 * It must not be called with cpu hot plug lock
94 * held as we need to acquire the lock to rebuild sched domains
95 * later.
96 *
97 * Return: 0 on success
5e76b2ab 98 */
f9793e34 99int sched_set_itmt_support(void)
5e76b2ab
TC
100{
101 mutex_lock(&itmt_update_mutex);
102
f9793e34
TC
103 if (sched_itmt_capable) {
104 mutex_unlock(&itmt_update_mutex);
105 return 0;
106 }
107
89d7971e 108 itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table);
f9793e34
TC
109 if (!itmt_sysctl_header) {
110 mutex_unlock(&itmt_update_mutex);
111 return -ENOMEM;
112 }
113
5e76b2ab
TC
114 sched_itmt_capable = true;
115
f9793e34
TC
116 sysctl_sched_itmt_enabled = 1;
117
02cfdc95
TC
118 x86_topology_update = true;
119 rebuild_sched_domains();
f9793e34 120
5e76b2ab 121 mutex_unlock(&itmt_update_mutex);
f9793e34
TC
122
123 return 0;
5e76b2ab
TC
124}
125
126/**
127 * sched_clear_itmt_support() - Revoke platform's support of ITMT
128 *
129 * This function is used by the OS to indicate that it has
130 * revoked the platform's support of ITMT feature.
131 *
f9793e34
TC
132 * It must not be called with cpu hot plug lock
133 * held as we need to acquire the lock to rebuild sched domains
134 * later.
5e76b2ab
TC
135 */
136void sched_clear_itmt_support(void)
137{
138 mutex_lock(&itmt_update_mutex);
139
f9793e34
TC
140 if (!sched_itmt_capable) {
141 mutex_unlock(&itmt_update_mutex);
142 return;
143 }
5e76b2ab
TC
144 sched_itmt_capable = false;
145
f9793e34
TC
146 if (itmt_sysctl_header) {
147 unregister_sysctl_table(itmt_sysctl_header);
148 itmt_sysctl_header = NULL;
149 }
150
151 if (sysctl_sched_itmt_enabled) {
152 /* disable sched_itmt if we are no longer ITMT capable */
153 sysctl_sched_itmt_enabled = 0;
154 x86_topology_update = true;
155 rebuild_sched_domains();
156 }
157
5e76b2ab
TC
158 mutex_unlock(&itmt_update_mutex);
159}
160
161int arch_asym_cpu_priority(int cpu)
162{
163 return per_cpu(sched_core_priority, cpu);
164}
165
166/**
167 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
168 * @prio: Priority of cpu core
169 * @core_cpu: The cpu number associated with the core
170 *
171 * The pstate driver will find out the max boost frequency
172 * and call this function to set a priority proportional
173 * to the max boost frequency. CPU with higher boost
174 * frequency will receive higher priority.
175 *
176 * No need to rebuild sched domain after updating
177 * the CPU priorities. The sched domains have no
178 * dependency on CPU priorities.
179 */
180void sched_set_itmt_core_prio(int prio, int core_cpu)
181{
182 int cpu, i = 1;
183
184 for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
185 int smt_prio;
186
187 /*
188 * Ensure that the siblings are moved to the end
189 * of the priority chain and only used when
190 * all other high priority cpus are out of capacity.
191 */
183b8ec3 192 smt_prio = prio * smp_num_siblings / (i * i);
5e76b2ab
TC
193 per_cpu(sched_core_priority, cpu) = smt_prio;
194 i++;
195 }
196}