Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * include/linux/topology.h | |
3 | * | |
4 | * Written by: Matthew Dobson, IBM Corporation | |
5 | * | |
6 | * Copyright (C) 2002, IBM Corp. | |
7 | * | |
32525d02 | 8 | * All rights reserved. |
1da177e4 LT |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of the GNU General Public License as published by | |
12 | * the Free Software Foundation; either version 2 of the License, or | |
13 | * (at your option) any later version. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, but | |
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | |
18 | * NON INFRINGEMENT. See the GNU General Public License for more | |
19 | * details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
24 | * | |
25 | * Send feedback to <colpatch@us.ibm.com> | |
26 | */ | |
27 | #ifndef _LINUX_TOPOLOGY_H | |
28 | #define _LINUX_TOPOLOGY_H | |
29 | ||
60c1b220 | 30 | #include <linux/arch_topology.h> |
1da177e4 | 31 | #include <linux/cpumask.h> |
895ee6a2 | 32 | #include <linux/nodemask.h> |
1da177e4 LT |
33 | #include <linux/bitops.h> |
34 | #include <linux/mmzone.h> | |
35 | #include <linux/smp.h> | |
72812019 | 36 | #include <linux/percpu.h> |
1da177e4 LT |
37 | #include <asm/topology.h> |
38 | ||
1da177e4 | 39 | #ifndef nr_cpus_node |
a70f7302 | 40 | #define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node)) |
1da177e4 LT |
41 | #endif |
42 | ||
ee79d1bd | 43 | int arch_update_cpu_topology(void); |
22e52b07 | 44 | |
1da177e4 LT |
45 | /* Conform to ACPI 2.0 SLIT distance definitions */ |
46 | #define LOCAL_DISTANCE 10 | |
47 | #define REMOTE_DISTANCE 20 | |
620a6dc4 | 48 | #define DISTANCE_BITS 8 |
f787a503 | 49 | #ifndef node_distance |
1da177e4 LT |
50 | #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) |
51 | #endif | |
9eeff239 CL |
52 | #ifndef RECLAIM_DISTANCE |
53 | /* | |
54 | * If the distance between nodes in a system is larger than RECLAIM_DISTANCE | |
55 | * (in whatever arch specific measurement units returned by node_distance()) | |
a5f5f91d | 56 | * and node_reclaim_mode is enabled then the VM will only call node_reclaim() |
4f9b16a6 | 57 | * on nodes within this distance. |
9eeff239 | 58 | */ |
32e45ff4 | 59 | #define RECLAIM_DISTANCE 30 |
9eeff239 | 60 | #endif |
a55c7454 MF |
61 | |
62 | /* | |
63 | * The following tunable allows platforms to override the default node | |
64 | * reclaim distance (RECLAIM_DISTANCE) if remote memory accesses are | |
65 | * sufficiently fast that the default value actually hurts | |
66 | * performance. | |
67 | * | |
68 | * AMD EPYC machines use this because even though the 2-hop distance | |
69 | * is 32 (3.2x slower than a local memory access) performance actually | |
70 | * *improves* if allowed to reclaim memory and load balance tasks | |
71 | * between NUMA nodes 2-hops apart. | |
72 | */ | |
73 | extern int __read_mostly node_reclaim_distance; | |
74 | ||
1da177e4 LT |
75 | #ifndef PENALTY_FOR_NODE_WITH_CPUS |
76 | #define PENALTY_FOR_NODE_WITH_CPUS (1) | |
77 | #endif | |
78 | ||
72812019 LS |
79 | #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID |
80 | DECLARE_PER_CPU(int, numa_node); | |
81 | ||
82 | #ifndef numa_node_id | |
83 | /* Returns the number of the current Node. */ | |
84 | static inline int numa_node_id(void) | |
85 | { | |
dc322a99 | 86 | return raw_cpu_read(numa_node); |
72812019 LS |
87 | } |
88 | #endif | |
89 | ||
90 | #ifndef cpu_to_node | |
91 | static inline int cpu_to_node(int cpu) | |
92 | { | |
93 | return per_cpu(numa_node, cpu); | |
94 | } | |
95 | #endif | |
96 | ||
97 | #ifndef set_numa_node | |
98 | static inline void set_numa_node(int node) | |
99 | { | |
c6ae41e7 | 100 | this_cpu_write(numa_node, node); |
72812019 LS |
101 | } |
102 | #endif | |
103 | ||
104 | #ifndef set_cpu_numa_node | |
105 | static inline void set_cpu_numa_node(int cpu, int node) | |
106 | { | |
107 | per_cpu(numa_node, cpu) = node; | |
108 | } | |
109 | #endif | |
110 | ||
111 | #else /* !CONFIG_USE_PERCPU_NUMA_NODE_ID */ | |
112 | ||
113 | /* Returns the number of the current Node. */ | |
114 | #ifndef numa_node_id | |
115 | static inline int numa_node_id(void) | |
116 | { | |
117 | return cpu_to_node(raw_smp_processor_id()); | |
118 | } | |
119 | #endif | |
120 | ||
121 | #endif /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */ | |
122 | ||
7aac7898 LS |
123 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES |
124 | ||
125 | /* | |
126 | * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. | |
127 | * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined. | |
128 | * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem(). | |
129 | */ | |
130 | DECLARE_PER_CPU(int, _numa_mem_); | |
131 | ||
132 | #ifndef set_numa_mem | |
133 | static inline void set_numa_mem(int node) | |
134 | { | |
c6ae41e7 | 135 | this_cpu_write(_numa_mem_, node); |
7aac7898 LS |
136 | } |
137 | #endif | |
138 | ||
139 | #ifndef numa_mem_id | |
140 | /* Returns the number of the nearest Node with memory */ | |
141 | static inline int numa_mem_id(void) | |
142 | { | |
dc322a99 | 143 | return raw_cpu_read(_numa_mem_); |
7aac7898 LS |
144 | } |
145 | #endif | |
146 | ||
147 | #ifndef cpu_to_mem | |
148 | static inline int cpu_to_mem(int cpu) | |
149 | { | |
150 | return per_cpu(_numa_mem_, cpu); | |
151 | } | |
152 | #endif | |
153 | ||
154 | #ifndef set_cpu_numa_mem | |
155 | static inline void set_cpu_numa_mem(int cpu, int node) | |
156 | { | |
157 | per_cpu(_numa_mem_, cpu) = node; | |
158 | } | |
159 | #endif | |
160 | ||
161 | #else /* !CONFIG_HAVE_MEMORYLESS_NODES */ | |
162 | ||
7aac7898 LS |
163 | #ifndef numa_mem_id |
164 | /* Returns the number of the nearest Node with memory */ | |
165 | static inline int numa_mem_id(void) | |
166 | { | |
167 | return numa_node_id(); | |
168 | } | |
169 | #endif | |
170 | ||
171 | #ifndef cpu_to_mem | |
172 | static inline int cpu_to_mem(int cpu) | |
173 | { | |
174 | return cpu_to_node(cpu); | |
175 | } | |
176 | #endif | |
177 | ||
178 | #endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */ | |
179 | ||
2c4dcd7f HC |
180 | #if defined(topology_die_id) && defined(topology_die_cpumask) |
181 | #define TOPOLOGY_DIE_SYSFS | |
182 | #endif | |
e7957077 HC |
183 | #if defined(topology_cluster_id) && defined(topology_cluster_cpumask) |
184 | #define TOPOLOGY_CLUSTER_SYSFS | |
185 | #endif | |
f1045056 HC |
186 | #if defined(topology_book_id) && defined(topology_book_cpumask) |
187 | #define TOPOLOGY_BOOK_SYSFS | |
188 | #endif | |
189 | #if defined(topology_drawer_id) && defined(topology_drawer_cpumask) | |
190 | #define TOPOLOGY_DRAWER_SYSFS | |
191 | #endif | |
2c4dcd7f | 192 | |
c50cbb05 BH |
193 | #ifndef topology_physical_package_id |
194 | #define topology_physical_package_id(cpu) ((void)(cpu), -1) | |
195 | #endif | |
0e344d8c LB |
196 | #ifndef topology_die_id |
197 | #define topology_die_id(cpu) ((void)(cpu), -1) | |
198 | #endif | |
c5e22fef JC |
199 | #ifndef topology_cluster_id |
200 | #define topology_cluster_id(cpu) ((void)(cpu), -1) | |
201 | #endif | |
c50cbb05 BH |
202 | #ifndef topology_core_id |
203 | #define topology_core_id(cpu) ((void)(cpu), 0) | |
204 | #endif | |
f1045056 HC |
205 | #ifndef topology_book_id |
206 | #define topology_book_id(cpu) ((void)(cpu), -1) | |
207 | #endif | |
208 | #ifndef topology_drawer_id | |
209 | #define topology_drawer_id(cpu) ((void)(cpu), -1) | |
210 | #endif | |
ab28e944 TL |
211 | #ifndef topology_ppin |
212 | #define topology_ppin(cpu) ((void)(cpu), 0ull) | |
213 | #endif | |
06931e62 BG |
214 | #ifndef topology_sibling_cpumask |
215 | #define topology_sibling_cpumask(cpu) cpumask_of(cpu) | |
fbd59a8d RR |
216 | #endif |
217 | #ifndef topology_core_cpumask | |
218 | #define topology_core_cpumask(cpu) cpumask_of(cpu) | |
219 | #endif | |
c5e22fef JC |
220 | #ifndef topology_cluster_cpumask |
221 | #define topology_cluster_cpumask(cpu) cpumask_of(cpu) | |
222 | #endif | |
2e4c54da LB |
223 | #ifndef topology_die_cpumask |
224 | #define topology_die_cpumask(cpu) cpumask_of(cpu) | |
225 | #endif | |
f1045056 HC |
226 | #ifndef topology_book_cpumask |
227 | #define topology_book_cpumask(cpu) cpumask_of(cpu) | |
228 | #endif | |
229 | #ifndef topology_drawer_cpumask | |
230 | #define topology_drawer_cpumask(cpu) cpumask_of(cpu) | |
231 | #endif | |
c50cbb05 | 232 | |
3babbe44 | 233 | #if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask) |
143e1e28 VG |
234 | static inline const struct cpumask *cpu_smt_mask(int cpu) |
235 | { | |
06931e62 | 236 | return topology_sibling_cpumask(cpu); |
143e1e28 VG |
237 | } |
238 | #endif | |
239 | ||
4b455f59 YY |
240 | #ifndef topology_is_primary_thread |
241 | ||
242 | static inline bool topology_is_primary_thread(unsigned int cpu) | |
243 | { | |
244 | /* | |
245 | * When disabling SMT, the primary thread of the SMT will remain | |
246 | * enabled/active. Architectures that have a special primary thread | |
247 | * (e.g. x86) need to override this function. Otherwise the first | |
248 | * thread in the SMT can be made the primary thread. | |
249 | * | |
250 | * The sibling cpumask of an offline CPU always contains the CPU | |
251 | * itself on architectures using the implementation of | |
252 | * CONFIG_GENERIC_ARCH_TOPOLOGY for building their topology. | |
253 | * Other architectures not using CONFIG_GENERIC_ARCH_TOPOLOGY for | |
254 | * building their topology have to check whether to use this default | |
255 | * implementation or to override it. | |
256 | */ | |
257 | return cpu == cpumask_first(topology_sibling_cpumask(cpu)); | |
258 | } | |
259 | #define topology_is_primary_thread topology_is_primary_thread | |
260 | ||
261 | #endif | |
262 | ||
143e1e28 VG |
263 | static inline const struct cpumask *cpu_cpu_mask(int cpu) |
264 | { | |
265 | return cpumask_of_node(cpu_to_node(cpu)); | |
266 | } | |
267 | ||
cd7f5535 YN |
268 | #ifdef CONFIG_NUMA |
269 | int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node); | |
9feae658 | 270 | extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops); |
cd7f5535 YN |
271 | #else |
272 | static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) | |
273 | { | |
8ab63d41 | 274 | return cpumask_nth_and(cpu, cpus, cpu_online_mask); |
cd7f5535 | 275 | } |
9feae658 VS |
276 | |
277 | static inline const struct cpumask * | |
278 | sched_numa_hop_mask(unsigned int node, unsigned int hops) | |
279 | { | |
280 | return ERR_PTR(-EOPNOTSUPP); | |
281 | } | |
cd7f5535 | 282 | #endif /* CONFIG_NUMA */ |
143e1e28 | 283 | |
f09177ca AR |
284 | /** |
285 | * for_each_node_numadist() - iterate over nodes in increasing distance | |
286 | * order, starting from a given node | |
287 | * @node: the iteration variable and the starting node. | |
288 | * @unvisited: a nodemask to keep track of the unvisited nodes. | |
289 | * | |
290 | * This macro iterates over NUMA node IDs in increasing distance from the | |
291 | * starting @node and yields MAX_NUMNODES when all the nodes have been | |
292 | * visited. | |
293 | * | |
294 | * Note that by the time the loop completes, the @unvisited nodemask will | |
295 | * be fully cleared, unless the loop exits early. | |
296 | * | |
297 | * The difference between for_each_node() and for_each_node_numadist() is | |
298 | * that the former allows to iterate over nodes in numerical order, whereas | |
299 | * the latter iterates over nodes in increasing order of distance. | |
300 | * | |
301 | * This complexity of this iterator is O(N^2), where N represents the | |
302 | * number of nodes, as each iteration involves scanning all nodes to | |
303 | * find the one with the shortest distance. | |
304 | * | |
305 | * Requires rcu_lock to be held. | |
306 | */ | |
307 | #define for_each_node_numadist(node, unvisited) \ | |
308 | for (int __start = (node), \ | |
309 | (node) = nearest_node_nodemask((__start), &(unvisited)); \ | |
310 | (node) < MAX_NUMNODES; \ | |
311 | node_clear((node), (unvisited)), \ | |
312 | (node) = nearest_node_nodemask((__start), &(unvisited))) | |
313 | ||
06ac0172 VS |
314 | /** |
315 | * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance | |
316 | * from a given node. | |
317 | * @mask: the iteration variable. | |
318 | * @node: the NUMA node to start the search from. | |
319 | * | |
320 | * Requires rcu_lock to be held. | |
321 | * | |
322 | * Yields cpu_online_mask for @node == NUMA_NO_NODE. | |
323 | */ | |
324 | #define for_each_numa_hop_mask(mask, node) \ | |
325 | for (unsigned int __hops = 0; \ | |
326 | mask = (node != NUMA_NO_NODE || __hops) ? \ | |
327 | sched_numa_hop_mask(node, __hops) : \ | |
328 | cpu_online_mask, \ | |
329 | !IS_ERR_OR_NULL(mask); \ | |
330 | __hops++) | |
331 | ||
6bceea7a RN |
332 | DECLARE_PER_CPU(unsigned long, cpu_scale); |
333 | ||
334 | static inline unsigned long topology_get_cpu_scale(int cpu) | |
335 | { | |
336 | return per_cpu(cpu_scale, cpu); | |
337 | } | |
338 | ||
339 | void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); | |
340 | ||
1da177e4 | 341 | #endif /* _LINUX_TOPOLOGY_H */ |