Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * acpi_numa.c - ACPI NUMA support | |
3 | * | |
4 | * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com> | |
5 | * | |
6 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2 of the License, or | |
11 | * (at your option) any later version. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
21 | * | |
22 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
23 | * | |
24 | */ | |
25 | #include <linux/module.h> | |
1da177e4 LT |
26 | #include <linux/init.h> |
27 | #include <linux/kernel.h> | |
28 | #include <linux/types.h> | |
29 | #include <linux/errno.h> | |
30 | #include <linux/acpi.h> | |
b552a8c5 | 31 | #include <linux/numa.h> |
99759869 TK |
32 | #include <linux/nodemask.h> |
33 | #include <linux/topology.h> | |
1da177e4 | 34 | |
a192a958 LB |
35 | #define PREFIX "ACPI: " |
36 | ||
1da177e4 LT |
37 | #define ACPI_NUMA 0x80000000 |
38 | #define _COMPONENT ACPI_NUMA | |
f52fd66d | 39 | ACPI_MODULE_NAME("numa"); |
1da177e4 | 40 | |
762834e8 | 41 | static nodemask_t nodes_found_map = NODE_MASK_NONE; |
762834e8 YG |
42 | |
43 | /* maps to convert between proximity domain and logical node ID */ | |
ffada891 | 44 | static int pxm_to_node_map[MAX_PXM_DOMAINS] |
b552a8c5 | 45 | = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE }; |
ffada891 | 46 | static int node_to_pxm_map[MAX_NUMNODES] |
b552a8c5 | 47 | = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; |
762834e8 | 48 | |
8df0eb7c KG |
49 | unsigned char acpi_srat_revision __initdata; |
50 | ||
f363d16f | 51 | int pxm_to_node(int pxm) |
762834e8 YG |
52 | { |
53 | if (pxm < 0) | |
b552a8c5 | 54 | return NUMA_NO_NODE; |
762834e8 YG |
55 | return pxm_to_node_map[pxm]; |
56 | } | |
57 | ||
f363d16f | 58 | int node_to_pxm(int node) |
762834e8 YG |
59 | { |
60 | if (node < 0) | |
61 | return PXM_INVAL; | |
62 | return node_to_pxm_map[node]; | |
63 | } | |
64 | ||
d79ed248 | 65 | static void __acpi_map_pxm_to_node(int pxm, int node) |
3484d798 | 66 | { |
0f9b75ef DR |
67 | if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm]) |
68 | pxm_to_node_map[pxm] = node; | |
69 | if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node]) | |
70 | node_to_pxm_map[node] = pxm; | |
3484d798 DR |
71 | } |
72 | ||
8ff6f48d | 73 | int acpi_map_pxm_to_node(int pxm) |
762834e8 | 74 | { |
99759869 TK |
75 | int node; |
76 | ||
77 | if (pxm < 0 || pxm >= MAX_PXM_DOMAINS) | |
78 | return NUMA_NO_NODE; | |
79 | ||
80 | node = pxm_to_node_map[pxm]; | |
762834e8 | 81 | |
1bb25df0 | 82 | if (node == NUMA_NO_NODE) { |
762834e8 | 83 | if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) |
b552a8c5 | 84 | return NUMA_NO_NODE; |
762834e8 | 85 | node = first_unset_node(nodes_found_map); |
3484d798 | 86 | __acpi_map_pxm_to_node(pxm, node); |
762834e8 YG |
87 | node_set(node, nodes_found_map); |
88 | } | |
89 | ||
90 | return node; | |
91 | } | |
92 | ||
99759869 TK |
93 | /** |
94 | * acpi_map_pxm_to_online_node - Map proximity ID to online node | |
95 | * @pxm: ACPI proximity ID | |
96 | * | |
97 | * This is similar to acpi_map_pxm_to_node(), but always returns an online | |
98 | * node. When the mapped node from a given proximity ID is offline, it | |
99 | * looks up the node distance table and returns the nearest online node. | |
100 | * | |
101 | * ACPI device drivers, which are called after the NUMA initialization has | |
102 | * completed in the kernel, can call this interface to obtain their device | |
103 | * NUMA topology from ACPI tables. Such drivers do not have to deal with | |
104 | * offline nodes. A node may be offline when a device proximity ID is | |
105 | * unique, SRAT memory entry does not exist, or NUMA is disabled, ex. | |
106 | * "numa=off" on x86. | |
107 | */ | |
108 | int acpi_map_pxm_to_online_node(int pxm) | |
109 | { | |
110 | int node, n, dist, min_dist; | |
111 | ||
112 | node = acpi_map_pxm_to_node(pxm); | |
113 | ||
114 | if (node == NUMA_NO_NODE) | |
115 | node = 0; | |
116 | ||
117 | if (!node_online(node)) { | |
118 | min_dist = INT_MAX; | |
119 | for_each_online_node(n) { | |
120 | dist = node_distance(node, n); | |
121 | if (dist < min_dist) { | |
122 | min_dist = dist; | |
123 | node = n; | |
124 | } | |
125 | } | |
126 | } | |
127 | ||
128 | return node; | |
129 | } | |
130 | EXPORT_SYMBOL(acpi_map_pxm_to_online_node); | |
131 | ||
ae2c6dcf DR |
132 | static void __init |
133 | acpi_table_print_srat_entry(struct acpi_subtable_header *header) | |
1da177e4 LT |
134 | { |
135 | ||
4be44fcd | 136 | ACPI_FUNCTION_NAME("acpi_table_print_srat_entry"); |
1da177e4 LT |
137 | |
138 | if (!header) | |
139 | return; | |
140 | ||
141 | switch (header->type) { | |
142 | ||
15a58ed1 | 143 | case ACPI_SRAT_TYPE_CPU_AFFINITY: |
1da177e4 | 144 | #ifdef ACPI_DEBUG_OUTPUT |
4be44fcd | 145 | { |
15a58ed1 AS |
146 | struct acpi_srat_cpu_affinity *p = |
147 | (struct acpi_srat_cpu_affinity *)header; | |
4be44fcd LB |
148 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
149 | "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n", | |
15a58ed1 AS |
150 | p->apic_id, p->local_sapic_eid, |
151 | p->proximity_domain_lo, | |
152 | (p->flags & ACPI_SRAT_CPU_ENABLED)? | |
153 | "enabled" : "disabled")); | |
4be44fcd LB |
154 | } |
155 | #endif /* ACPI_DEBUG_OUTPUT */ | |
1da177e4 LT |
156 | break; |
157 | ||
15a58ed1 | 158 | case ACPI_SRAT_TYPE_MEMORY_AFFINITY: |
1da177e4 | 159 | #ifdef ACPI_DEBUG_OUTPUT |
4be44fcd | 160 | { |
15a58ed1 AS |
161 | struct acpi_srat_mem_affinity *p = |
162 | (struct acpi_srat_mem_affinity *)header; | |
4be44fcd | 163 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
208f6cc9 | 164 | "SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n", |
15a58ed1 AS |
165 | (unsigned long)p->base_address, |
166 | (unsigned long)p->length, | |
19d0cfe9 | 167 | p->proximity_domain, |
15a58ed1 AS |
168 | (p->flags & ACPI_SRAT_MEM_ENABLED)? |
169 | "enabled" : "disabled", | |
170 | (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)? | |
208f6cc9 DB |
171 | " hot-pluggable" : "", |
172 | (p->flags & ACPI_SRAT_MEM_NON_VOLATILE)? | |
173 | " non-volatile" : "")); | |
4be44fcd LB |
174 | } |
175 | #endif /* ACPI_DEBUG_OUTPUT */ | |
1da177e4 LT |
176 | break; |
177 | ||
7237d3de SS |
178 | case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: |
179 | #ifdef ACPI_DEBUG_OUTPUT | |
180 | { | |
181 | struct acpi_srat_x2apic_cpu_affinity *p = | |
182 | (struct acpi_srat_x2apic_cpu_affinity *)header; | |
183 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | |
184 | "SRAT Processor (x2apicid[0x%08x]) in" | |
185 | " proximity domain %d %s\n", | |
186 | p->apic_id, | |
187 | p->proximity_domain, | |
188 | (p->flags & ACPI_SRAT_CPU_ENABLED) ? | |
189 | "enabled" : "disabled")); | |
190 | } | |
191 | #endif /* ACPI_DEBUG_OUTPUT */ | |
192 | break; | |
1da177e4 | 193 | default: |
4be44fcd LB |
194 | printk(KERN_WARNING PREFIX |
195 | "Found unsupported SRAT entry (type = 0x%x)\n", | |
196 | header->type); | |
1da177e4 LT |
197 | break; |
198 | } | |
199 | } | |
200 | ||
39b8931b FY |
201 | /* |
202 | * A lot of BIOS fill in 10 (= no distance) everywhere. This messes | |
203 | * up the NUMA heuristics which wants the local node to have a smaller | |
204 | * distance than the others. | |
205 | * Do some quick checks here and only use the SLIT if it passes. | |
206 | */ | |
40e31856 | 207 | static int __init slit_valid(struct acpi_table_slit *slit) |
39b8931b FY |
208 | { |
209 | int i, j; | |
210 | int d = slit->locality_count; | |
211 | for (i = 0; i < d; i++) { | |
212 | for (j = 0; j < d; j++) { | |
213 | u8 val = slit->entry[d*i + j]; | |
214 | if (i == j) { | |
215 | if (val != LOCAL_DISTANCE) | |
216 | return 0; | |
217 | } else if (val <= LOCAL_DISTANCE) | |
218 | return 0; | |
219 | } | |
220 | } | |
221 | return 1; | |
222 | } | |
223 | ||
15a58ed1 | 224 | static int __init acpi_parse_slit(struct acpi_table_header *table) |
1da177e4 | 225 | { |
2fad9308 | 226 | struct acpi_table_slit *slit = (struct acpi_table_slit *)table; |
1da177e4 | 227 | |
39b8931b FY |
228 | if (!slit_valid(slit)) { |
229 | printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n"); | |
230 | return -EINVAL; | |
231 | } | |
1da177e4 LT |
232 | acpi_numa_slit_init(slit); |
233 | ||
234 | return 0; | |
235 | } | |
236 | ||
beffbe54 | 237 | void __init __weak |
7237d3de SS |
238 | acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) |
239 | { | |
240 | printk(KERN_WARNING PREFIX | |
241 | "Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id); | |
242 | return; | |
243 | } | |
244 | ||
245 | ||
246 | static int __init | |
247 | acpi_parse_x2apic_affinity(struct acpi_subtable_header *header, | |
248 | const unsigned long end) | |
249 | { | |
250 | struct acpi_srat_x2apic_cpu_affinity *processor_affinity; | |
251 | ||
252 | processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header; | |
253 | if (!processor_affinity) | |
254 | return -EINVAL; | |
255 | ||
256 | acpi_table_print_srat_entry(header); | |
257 | ||
258 | /* let architecture-dependent part to do it */ | |
259 | acpi_numa_x2apic_affinity_init(processor_affinity); | |
260 | ||
261 | return 0; | |
262 | } | |
263 | ||
1da177e4 | 264 | static int __init |
7237d3de | 265 | acpi_parse_processor_affinity(struct acpi_subtable_header *header, |
4be44fcd | 266 | const unsigned long end) |
1da177e4 | 267 | { |
15a58ed1 | 268 | struct acpi_srat_cpu_affinity *processor_affinity; |
1da177e4 | 269 | |
15a58ed1 | 270 | processor_affinity = (struct acpi_srat_cpu_affinity *)header; |
1da177e4 LT |
271 | if (!processor_affinity) |
272 | return -EINVAL; | |
273 | ||
274 | acpi_table_print_srat_entry(header); | |
275 | ||
276 | /* let architecture-dependent part to do it */ | |
277 | acpi_numa_processor_affinity_init(processor_affinity); | |
278 | ||
279 | return 0; | |
280 | } | |
281 | ||
095adbb6 TR |
282 | static int __initdata parsed_numa_memblks; |
283 | ||
1da177e4 | 284 | static int __init |
15a58ed1 | 285 | acpi_parse_memory_affinity(struct acpi_subtable_header * header, |
4be44fcd | 286 | const unsigned long end) |
1da177e4 | 287 | { |
15a58ed1 | 288 | struct acpi_srat_mem_affinity *memory_affinity; |
1da177e4 | 289 | |
15a58ed1 | 290 | memory_affinity = (struct acpi_srat_mem_affinity *)header; |
1da177e4 LT |
291 | if (!memory_affinity) |
292 | return -EINVAL; | |
293 | ||
294 | acpi_table_print_srat_entry(header); | |
295 | ||
296 | /* let architecture-dependent part to do it */ | |
095adbb6 TR |
297 | if (!acpi_numa_memory_affinity_init(memory_affinity)) |
298 | parsed_numa_memblks++; | |
1da177e4 LT |
299 | return 0; |
300 | } | |
301 | ||
15a58ed1 | 302 | static int __init acpi_parse_srat(struct acpi_table_header *table) |
1da177e4 | 303 | { |
2fad9308 | 304 | struct acpi_table_srat *srat = (struct acpi_table_srat *)table; |
1da177e4 | 305 | |
8df0eb7c KG |
306 | acpi_srat_revision = srat->header.revision; |
307 | ||
cfa806f0 | 308 | /* Real work done in acpi_table_parse_srat below. */ |
1da177e4 LT |
309 | |
310 | return 0; | |
311 | } | |
312 | ||
ae2c6dcf | 313 | static int __init |
15a58ed1 | 314 | acpi_table_parse_srat(enum acpi_srat_type id, |
b43e1065 | 315 | acpi_tbl_entry_handler handler, unsigned int max_entries) |
1da177e4 | 316 | { |
6eb87fed | 317 | return acpi_table_parse_entries(ACPI_SIG_SRAT, |
4be44fcd LB |
318 | sizeof(struct acpi_table_srat), id, |
319 | handler, max_entries); | |
1da177e4 LT |
320 | } |
321 | ||
20e6926d | 322 | int __init acpi_numa_init(void) |
e8d19552 | 323 | { |
20e6926d YL |
324 | int cnt = 0; |
325 | ||
d3bd0588 YL |
326 | /* |
327 | * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= | |
328 | * SRAT cpu entries could have different order with that in MADT. | |
329 | * So go over all cpu entries in SRAT to get apicid to node mapping. | |
330 | */ | |
8716273c | 331 | |
1da177e4 | 332 | /* SRAT: Static Resource Affinity Table */ |
7f8f97c3 | 333 | if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { |
7237d3de | 334 | acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, |
20e6926d | 335 | acpi_parse_x2apic_affinity, 0); |
ae2c6dcf | 336 | acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, |
20e6926d YL |
337 | acpi_parse_processor_affinity, 0); |
338 | cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, | |
339 | acpi_parse_memory_affinity, | |
340 | NR_NODE_MEMBLKS); | |
1da177e4 LT |
341 | } |
342 | ||
343 | /* SLIT: System Locality Information Table */ | |
7f8f97c3 | 344 | acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); |
1da177e4 LT |
345 | |
346 | acpi_numa_arch_fixup(); | |
940fed2e | 347 | |
20e6926d YL |
348 | if (cnt < 0) |
349 | return cnt; | |
095adbb6 | 350 | else if (!parsed_numa_memblks) |
f3946fb6 | 351 | return -ENOENT; |
940fed2e | 352 | return 0; |
1da177e4 LT |
353 | } |
354 | ||
d79ed248 | 355 | static int acpi_get_pxm(acpi_handle h) |
1da177e4 | 356 | { |
27663c58 | 357 | unsigned long long pxm; |
1da177e4 LT |
358 | acpi_status status; |
359 | acpi_handle handle; | |
360 | acpi_handle phandle = h; | |
361 | ||
362 | do { | |
363 | handle = phandle; | |
364 | status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm); | |
365 | if (ACPI_SUCCESS(status)) | |
50dd0969 | 366 | return pxm; |
1da177e4 | 367 | status = acpi_get_parent(handle, &phandle); |
4be44fcd | 368 | } while (ACPI_SUCCESS(status)); |
1da177e4 LT |
369 | return -1; |
370 | } | |
1e3590e2 | 371 | |
486c79b5 | 372 | int acpi_get_node(acpi_handle handle) |
1e3590e2 | 373 | { |
962fe9c9 | 374 | int pxm; |
1e3590e2 | 375 | |
1e3590e2 | 376 | pxm = acpi_get_pxm(handle); |
1e3590e2 | 377 | |
962fe9c9 | 378 | return acpi_map_pxm_to_node(pxm); |
1e3590e2 YG |
379 | } |
380 | EXPORT_SYMBOL(acpi_get_node); |