Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * acpi_numa.c - ACPI NUMA support | |
3 | * | |
4 | * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com> | |
5 | * | |
6 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2 of the License, or | |
11 | * (at your option) any later version. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
1da177e4 LT |
18 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
19 | * | |
20 | */ | |
ac906a6d HG |
21 | |
22 | #define pr_fmt(fmt) "ACPI: " fmt | |
23 | ||
1da177e4 | 24 | #include <linux/module.h> |
1da177e4 LT |
25 | #include <linux/init.h> |
26 | #include <linux/kernel.h> | |
27 | #include <linux/types.h> | |
28 | #include <linux/errno.h> | |
29 | #include <linux/acpi.h> | |
b552a8c5 | 30 | #include <linux/numa.h> |
99759869 TK |
31 | #include <linux/nodemask.h> |
32 | #include <linux/topology.h> | |
1da177e4 | 33 | |
762834e8 | 34 | static nodemask_t nodes_found_map = NODE_MASK_NONE; |
762834e8 YG |
35 | |
36 | /* maps to convert between proximity domain and logical node ID */ | |
ffada891 | 37 | static int pxm_to_node_map[MAX_PXM_DOMAINS] |
b552a8c5 | 38 | = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE }; |
ffada891 | 39 | static int node_to_pxm_map[MAX_NUMNODES] |
b552a8c5 | 40 | = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; |
762834e8 | 41 | |
8df0eb7c KG |
42 | unsigned char acpi_srat_revision __initdata; |
43 | ||
f363d16f | 44 | int pxm_to_node(int pxm) |
762834e8 YG |
45 | { |
46 | if (pxm < 0) | |
b552a8c5 | 47 | return NUMA_NO_NODE; |
762834e8 YG |
48 | return pxm_to_node_map[pxm]; |
49 | } | |
50 | ||
f363d16f | 51 | int node_to_pxm(int node) |
762834e8 YG |
52 | { |
53 | if (node < 0) | |
54 | return PXM_INVAL; | |
55 | return node_to_pxm_map[node]; | |
56 | } | |
57 | ||
d79ed248 | 58 | static void __acpi_map_pxm_to_node(int pxm, int node) |
3484d798 | 59 | { |
0f9b75ef DR |
60 | if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm]) |
61 | pxm_to_node_map[pxm] = node; | |
62 | if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node]) | |
63 | node_to_pxm_map[node] = pxm; | |
3484d798 DR |
64 | } |
65 | ||
8ff6f48d | 66 | int acpi_map_pxm_to_node(int pxm) |
762834e8 | 67 | { |
99759869 TK |
68 | int node; |
69 | ||
70 | if (pxm < 0 || pxm >= MAX_PXM_DOMAINS) | |
71 | return NUMA_NO_NODE; | |
72 | ||
73 | node = pxm_to_node_map[pxm]; | |
762834e8 | 74 | |
1bb25df0 | 75 | if (node == NUMA_NO_NODE) { |
762834e8 | 76 | if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) |
b552a8c5 | 77 | return NUMA_NO_NODE; |
762834e8 | 78 | node = first_unset_node(nodes_found_map); |
3484d798 | 79 | __acpi_map_pxm_to_node(pxm, node); |
762834e8 YG |
80 | node_set(node, nodes_found_map); |
81 | } | |
82 | ||
83 | return node; | |
84 | } | |
85 | ||
99759869 TK |
86 | /** |
87 | * acpi_map_pxm_to_online_node - Map proximity ID to online node | |
88 | * @pxm: ACPI proximity ID | |
89 | * | |
90 | * This is similar to acpi_map_pxm_to_node(), but always returns an online | |
91 | * node. When the mapped node from a given proximity ID is offline, it | |
92 | * looks up the node distance table and returns the nearest online node. | |
93 | * | |
94 | * ACPI device drivers, which are called after the NUMA initialization has | |
95 | * completed in the kernel, can call this interface to obtain their device | |
96 | * NUMA topology from ACPI tables. Such drivers do not have to deal with | |
97 | * offline nodes. A node may be offline when a device proximity ID is | |
98 | * unique, SRAT memory entry does not exist, or NUMA is disabled, ex. | |
99 | * "numa=off" on x86. | |
100 | */ | |
101 | int acpi_map_pxm_to_online_node(int pxm) | |
102 | { | |
103 | int node, n, dist, min_dist; | |
104 | ||
105 | node = acpi_map_pxm_to_node(pxm); | |
106 | ||
107 | if (node == NUMA_NO_NODE) | |
108 | node = 0; | |
109 | ||
110 | if (!node_online(node)) { | |
111 | min_dist = INT_MAX; | |
112 | for_each_online_node(n) { | |
113 | dist = node_distance(node, n); | |
114 | if (dist < min_dist) { | |
115 | min_dist = dist; | |
116 | node = n; | |
117 | } | |
118 | } | |
119 | } | |
120 | ||
121 | return node; | |
122 | } | |
123 | EXPORT_SYMBOL(acpi_map_pxm_to_online_node); | |
124 | ||
ae2c6dcf DR |
125 | static void __init |
126 | acpi_table_print_srat_entry(struct acpi_subtable_header *header) | |
1da177e4 | 127 | { |
1da177e4 | 128 | switch (header->type) { |
15a58ed1 | 129 | case ACPI_SRAT_TYPE_CPU_AFFINITY: |
4be44fcd | 130 | { |
15a58ed1 AS |
131 | struct acpi_srat_cpu_affinity *p = |
132 | (struct acpi_srat_cpu_affinity *)header; | |
3dda4481 HG |
133 | pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n", |
134 | p->apic_id, p->local_sapic_eid, | |
135 | p->proximity_domain_lo, | |
136 | (p->flags & ACPI_SRAT_CPU_ENABLED) ? | |
137 | "enabled" : "disabled"); | |
4be44fcd | 138 | } |
1da177e4 LT |
139 | break; |
140 | ||
15a58ed1 | 141 | case ACPI_SRAT_TYPE_MEMORY_AFFINITY: |
4be44fcd | 142 | { |
15a58ed1 AS |
143 | struct acpi_srat_mem_affinity *p = |
144 | (struct acpi_srat_mem_affinity *)header; | |
3dda4481 HG |
145 | pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n", |
146 | (unsigned long)p->base_address, | |
147 | (unsigned long)p->length, | |
148 | p->proximity_domain, | |
149 | (p->flags & ACPI_SRAT_MEM_ENABLED) ? | |
150 | "enabled" : "disabled", | |
151 | (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? | |
152 | " hot-pluggable" : "", | |
153 | (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ? | |
154 | " non-volatile" : ""); | |
4be44fcd | 155 | } |
1da177e4 LT |
156 | break; |
157 | ||
7237d3de | 158 | case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: |
7237d3de SS |
159 | { |
160 | struct acpi_srat_x2apic_cpu_affinity *p = | |
161 | (struct acpi_srat_x2apic_cpu_affinity *)header; | |
3dda4481 HG |
162 | pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n", |
163 | p->apic_id, | |
164 | p->proximity_domain, | |
165 | (p->flags & ACPI_SRAT_CPU_ENABLED) ? | |
166 | "enabled" : "disabled"); | |
7237d3de | 167 | } |
7237d3de | 168 | break; |
3dda4481 | 169 | |
1da177e4 | 170 | default: |
ac906a6d HG |
171 | pr_warn("Found unsupported SRAT entry (type = 0x%x)\n", |
172 | header->type); | |
1da177e4 LT |
173 | break; |
174 | } | |
175 | } | |
176 | ||
39b8931b FY |
177 | /* |
178 | * A lot of BIOS fill in 10 (= no distance) everywhere. This messes | |
179 | * up the NUMA heuristics which wants the local node to have a smaller | |
180 | * distance than the others. | |
181 | * Do some quick checks here and only use the SLIT if it passes. | |
182 | */ | |
40e31856 | 183 | static int __init slit_valid(struct acpi_table_slit *slit) |
39b8931b FY |
184 | { |
185 | int i, j; | |
186 | int d = slit->locality_count; | |
187 | for (i = 0; i < d; i++) { | |
188 | for (j = 0; j < d; j++) { | |
189 | u8 val = slit->entry[d*i + j]; | |
190 | if (i == j) { | |
191 | if (val != LOCAL_DISTANCE) | |
192 | return 0; | |
193 | } else if (val <= LOCAL_DISTANCE) | |
194 | return 0; | |
195 | } | |
196 | } | |
197 | return 1; | |
198 | } | |
199 | ||
6525afdf HG |
200 | #if defined(CONFIG_X86) || defined(CONFIG_ARM64) |
201 | /* | |
202 | * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for | |
203 | * I/O localities since SRAT does not list them. I/O localities are | |
204 | * not supported at this point. | |
205 | */ | |
206 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | |
207 | { | |
208 | int i, j; | |
209 | ||
210 | for (i = 0; i < slit->locality_count; i++) { | |
211 | const int from_node = pxm_to_node(i); | |
212 | ||
213 | if (from_node == NUMA_NO_NODE) | |
214 | continue; | |
215 | ||
216 | for (j = 0; j < slit->locality_count; j++) { | |
217 | const int to_node = pxm_to_node(j); | |
218 | ||
219 | if (to_node == NUMA_NO_NODE) | |
220 | continue; | |
221 | ||
222 | numa_set_distance(from_node, to_node, | |
223 | slit->entry[slit->locality_count * i + j]); | |
224 | } | |
225 | } | |
226 | } | |
227 | #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */ | |
228 | ||
15a58ed1 | 229 | static int __init acpi_parse_slit(struct acpi_table_header *table) |
1da177e4 | 230 | { |
2fad9308 | 231 | struct acpi_table_slit *slit = (struct acpi_table_slit *)table; |
1da177e4 | 232 | |
39b8931b | 233 | if (!slit_valid(slit)) { |
ac906a6d | 234 | pr_info("SLIT table looks invalid. Not used.\n"); |
39b8931b FY |
235 | return -EINVAL; |
236 | } | |
1da177e4 LT |
237 | acpi_numa_slit_init(slit); |
238 | ||
239 | return 0; | |
240 | } | |
241 | ||
beffbe54 | 242 | void __init __weak |
7237d3de SS |
243 | acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) |
244 | { | |
ac906a6d | 245 | pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id); |
7237d3de SS |
246 | } |
247 | ||
7237d3de SS |
248 | static int __init |
249 | acpi_parse_x2apic_affinity(struct acpi_subtable_header *header, | |
250 | const unsigned long end) | |
251 | { | |
252 | struct acpi_srat_x2apic_cpu_affinity *processor_affinity; | |
253 | ||
254 | processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header; | |
255 | if (!processor_affinity) | |
256 | return -EINVAL; | |
257 | ||
258 | acpi_table_print_srat_entry(header); | |
259 | ||
260 | /* let architecture-dependent part to do it */ | |
261 | acpi_numa_x2apic_affinity_init(processor_affinity); | |
262 | ||
263 | return 0; | |
264 | } | |
265 | ||
1da177e4 | 266 | static int __init |
7237d3de | 267 | acpi_parse_processor_affinity(struct acpi_subtable_header *header, |
4be44fcd | 268 | const unsigned long end) |
1da177e4 | 269 | { |
15a58ed1 | 270 | struct acpi_srat_cpu_affinity *processor_affinity; |
1da177e4 | 271 | |
15a58ed1 | 272 | processor_affinity = (struct acpi_srat_cpu_affinity *)header; |
1da177e4 LT |
273 | if (!processor_affinity) |
274 | return -EINVAL; | |
275 | ||
276 | acpi_table_print_srat_entry(header); | |
277 | ||
278 | /* let architecture-dependent part to do it */ | |
279 | acpi_numa_processor_affinity_init(processor_affinity); | |
280 | ||
281 | return 0; | |
282 | } | |
283 | ||
095adbb6 TR |
284 | static int __initdata parsed_numa_memblks; |
285 | ||
1da177e4 | 286 | static int __init |
15a58ed1 | 287 | acpi_parse_memory_affinity(struct acpi_subtable_header * header, |
4be44fcd | 288 | const unsigned long end) |
1da177e4 | 289 | { |
15a58ed1 | 290 | struct acpi_srat_mem_affinity *memory_affinity; |
1da177e4 | 291 | |
15a58ed1 | 292 | memory_affinity = (struct acpi_srat_mem_affinity *)header; |
1da177e4 LT |
293 | if (!memory_affinity) |
294 | return -EINVAL; | |
295 | ||
296 | acpi_table_print_srat_entry(header); | |
297 | ||
298 | /* let architecture-dependent part to do it */ | |
095adbb6 TR |
299 | if (!acpi_numa_memory_affinity_init(memory_affinity)) |
300 | parsed_numa_memblks++; | |
1da177e4 LT |
301 | return 0; |
302 | } | |
303 | ||
15a58ed1 | 304 | static int __init acpi_parse_srat(struct acpi_table_header *table) |
1da177e4 | 305 | { |
2fad9308 | 306 | struct acpi_table_srat *srat = (struct acpi_table_srat *)table; |
1da177e4 | 307 | |
8df0eb7c KG |
308 | acpi_srat_revision = srat->header.revision; |
309 | ||
cfa806f0 | 310 | /* Real work done in acpi_table_parse_srat below. */ |
1da177e4 LT |
311 | |
312 | return 0; | |
313 | } | |
314 | ||
ae2c6dcf | 315 | static int __init |
15a58ed1 | 316 | acpi_table_parse_srat(enum acpi_srat_type id, |
b43e1065 | 317 | acpi_tbl_entry_handler handler, unsigned int max_entries) |
1da177e4 | 318 | { |
6eb87fed | 319 | return acpi_table_parse_entries(ACPI_SIG_SRAT, |
4be44fcd LB |
320 | sizeof(struct acpi_table_srat), id, |
321 | handler, max_entries); | |
1da177e4 LT |
322 | } |
323 | ||
20e6926d | 324 | int __init acpi_numa_init(void) |
e8d19552 | 325 | { |
20e6926d YL |
326 | int cnt = 0; |
327 | ||
d3bd0588 YL |
328 | /* |
329 | * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= | |
330 | * SRAT cpu entries could have different order with that in MADT. | |
331 | * So go over all cpu entries in SRAT to get apicid to node mapping. | |
332 | */ | |
8716273c | 333 | |
1da177e4 | 334 | /* SRAT: Static Resource Affinity Table */ |
7f8f97c3 | 335 | if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { |
702b07fc LA |
336 | struct acpi_subtable_proc srat_proc[2]; |
337 | ||
338 | memset(srat_proc, 0, sizeof(srat_proc)); | |
339 | srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY; | |
340 | srat_proc[0].handler = acpi_parse_processor_affinity; | |
341 | srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY; | |
342 | srat_proc[1].handler = acpi_parse_x2apic_affinity; | |
343 | ||
344 | acpi_table_parse_entries_array(ACPI_SIG_SRAT, | |
345 | sizeof(struct acpi_table_srat), | |
346 | srat_proc, ARRAY_SIZE(srat_proc), 0); | |
347 | ||
20e6926d YL |
348 | cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, |
349 | acpi_parse_memory_affinity, | |
350 | NR_NODE_MEMBLKS); | |
1da177e4 LT |
351 | } |
352 | ||
353 | /* SLIT: System Locality Information Table */ | |
7f8f97c3 | 354 | acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); |
1da177e4 | 355 | |
20e6926d YL |
356 | if (cnt < 0) |
357 | return cnt; | |
095adbb6 | 358 | else if (!parsed_numa_memblks) |
f3946fb6 | 359 | return -ENOENT; |
940fed2e | 360 | return 0; |
1da177e4 LT |
361 | } |
362 | ||
d79ed248 | 363 | static int acpi_get_pxm(acpi_handle h) |
1da177e4 | 364 | { |
27663c58 | 365 | unsigned long long pxm; |
1da177e4 LT |
366 | acpi_status status; |
367 | acpi_handle handle; | |
368 | acpi_handle phandle = h; | |
369 | ||
370 | do { | |
371 | handle = phandle; | |
372 | status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm); | |
373 | if (ACPI_SUCCESS(status)) | |
50dd0969 | 374 | return pxm; |
1da177e4 | 375 | status = acpi_get_parent(handle, &phandle); |
4be44fcd | 376 | } while (ACPI_SUCCESS(status)); |
1da177e4 LT |
377 | return -1; |
378 | } | |
1e3590e2 | 379 | |
486c79b5 | 380 | int acpi_get_node(acpi_handle handle) |
1e3590e2 | 381 | { |
962fe9c9 | 382 | int pxm; |
1e3590e2 | 383 | |
1e3590e2 | 384 | pxm = acpi_get_pxm(handle); |
1e3590e2 | 385 | |
962fe9c9 | 386 | return acpi_map_pxm_to_node(pxm); |
1e3590e2 YG |
387 | } |
388 | EXPORT_SYMBOL(acpi_get_node); |