Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 | 2 | /* |
10fbcf4c | 3 | * Basic Node interface support |
1da177e4 LT |
4 | */ |
5 | ||
1da177e4 LT |
6 | #include <linux/module.h> |
7 | #include <linux/init.h> | |
8 | #include <linux/mm.h> | |
c04fc586 | 9 | #include <linux/memory.h> |
fa25c503 | 10 | #include <linux/vmstat.h> |
6e259e7d | 11 | #include <linux/notifier.h> |
1da177e4 LT |
12 | #include <linux/node.h> |
13 | #include <linux/hugetlb.h> | |
ed4a6d7f | 14 | #include <linux/compaction.h> |
1da177e4 LT |
15 | #include <linux/cpumask.h> |
16 | #include <linux/topology.h> | |
17 | #include <linux/nodemask.h> | |
76b67ed9 | 18 | #include <linux/cpu.h> |
bde631a5 | 19 | #include <linux/device.h> |
08d9dbe7 | 20 | #include <linux/pm_runtime.h> |
af936a16 | 21 | #include <linux/swap.h> |
18e5b539 | 22 | #include <linux/slab.h> |
1da177e4 | 23 | |
580fc9c7 | 24 | static const struct bus_type node_subsys = { |
af5ca3f4 | 25 | .name = "node", |
10fbcf4c | 26 | .dev_name = "node", |
1da177e4 LT |
27 | }; |
28 | ||
75bd50fa TT |
29 | static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj, |
30 | struct bin_attribute *attr, char *buf, | |
31 | loff_t off, size_t count) | |
1da177e4 | 32 | { |
75bd50fa | 33 | struct device *dev = kobj_to_dev(kobj); |
1da177e4 | 34 | struct node *node_dev = to_node(dev); |
75bd50fa TT |
35 | cpumask_var_t mask; |
36 | ssize_t n; | |
1da177e4 | 37 | |
064f0e93 ZL |
38 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) |
39 | return 0; | |
40 | ||
41 | cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); | |
75bd50fa | 42 | n = cpumap_print_bitmask_to_buf(buf, mask, off, count); |
064f0e93 ZL |
43 | free_cpumask_var(mask); |
44 | ||
45 | return n; | |
1da177e4 LT |
46 | } |
47 | ||
7ee951ac | 48 | static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES); |
75bd50fa TT |
49 | |
50 | static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj, | |
51 | struct bin_attribute *attr, char *buf, | |
52 | loff_t off, size_t count) | |
39106dcf | 53 | { |
75bd50fa TT |
54 | struct device *dev = kobj_to_dev(kobj); |
55 | struct node *node_dev = to_node(dev); | |
56 | cpumask_var_t mask; | |
57 | ssize_t n; | |
58 | ||
59 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | |
60 | return 0; | |
948b3edb | 61 | |
75bd50fa TT |
62 | cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); |
63 | n = cpumap_print_list_to_buf(buf, mask, off, count); | |
64 | free_cpumask_var(mask); | |
948b3edb | 65 | |
75bd50fa | 66 | return n; |
39106dcf MT |
67 | } |
68 | ||
7ee951ac | 69 | static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES); |
1da177e4 | 70 | |
08d9dbe7 KB |
71 | /** |
72 | * struct node_access_nodes - Access class device to hold user visible | |
73 | * relationships to other nodes. | |
74 | * @dev: Device for this memory access class | |
75 | * @list_node: List element in the node's access list | |
76 | * @access: The access class rank | |
6a954e94 | 77 | * @coord: Heterogeneous memory performance coordinates |
08d9dbe7 KB |
78 | */ |
79 | struct node_access_nodes { | |
80 | struct device dev; | |
81 | struct list_head list_node; | |
e7deeb9d | 82 | unsigned int access; |
e1cf33aa | 83 | #ifdef CONFIG_HMEM_REPORTING |
6a954e94 | 84 | struct access_coordinate coord; |
e1cf33aa | 85 | #endif |
08d9dbe7 KB |
86 | }; |
87 | #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev) | |
88 | ||
89 | static struct attribute *node_init_access_node_attrs[] = { | |
90 | NULL, | |
91 | }; | |
92 | ||
93 | static struct attribute *node_targ_access_node_attrs[] = { | |
94 | NULL, | |
95 | }; | |
96 | ||
97 | static const struct attribute_group initiators = { | |
98 | .name = "initiators", | |
99 | .attrs = node_init_access_node_attrs, | |
100 | }; | |
101 | ||
102 | static const struct attribute_group targets = { | |
103 | .name = "targets", | |
104 | .attrs = node_targ_access_node_attrs, | |
105 | }; | |
106 | ||
107 | static const struct attribute_group *node_access_node_groups[] = { | |
108 | &initiators, | |
109 | &targets, | |
110 | NULL, | |
111 | }; | |
112 | ||
113 | static void node_remove_accesses(struct node *node) | |
114 | { | |
115 | struct node_access_nodes *c, *cnext; | |
116 | ||
117 | list_for_each_entry_safe(c, cnext, &node->access_list, list_node) { | |
118 | list_del(&c->list_node); | |
119 | device_unregister(&c->dev); | |
120 | } | |
121 | } | |
122 | ||
123 | static void node_access_release(struct device *dev) | |
124 | { | |
125 | kfree(to_access_nodes(dev)); | |
126 | } | |
127 | ||
128 | static struct node_access_nodes *node_init_node_access(struct node *node, | |
e7deeb9d | 129 | unsigned int access) |
08d9dbe7 KB |
130 | { |
131 | struct node_access_nodes *access_node; | |
132 | struct device *dev; | |
133 | ||
134 | list_for_each_entry(access_node, &node->access_list, list_node) | |
135 | if (access_node->access == access) | |
136 | return access_node; | |
137 | ||
138 | access_node = kzalloc(sizeof(*access_node), GFP_KERNEL); | |
139 | if (!access_node) | |
140 | return NULL; | |
141 | ||
142 | access_node->access = access; | |
143 | dev = &access_node->dev; | |
144 | dev->parent = &node->dev; | |
145 | dev->release = node_access_release; | |
146 | dev->groups = node_access_node_groups; | |
147 | if (dev_set_name(dev, "access%u", access)) | |
148 | goto free; | |
149 | ||
150 | if (device_register(dev)) | |
151 | goto free_name; | |
152 | ||
153 | pm_runtime_no_callbacks(dev); | |
154 | list_add_tail(&access_node->list_node, &node->access_list); | |
155 | return access_node; | |
156 | free_name: | |
157 | kfree_const(dev->kobj.name); | |
158 | free: | |
159 | kfree(access_node); | |
160 | return NULL; | |
161 | } | |
162 | ||
e1cf33aa | 163 | #ifdef CONFIG_HMEM_REPORTING |
7810f4dc DJ |
164 | #define ACCESS_ATTR(property) \ |
165 | static ssize_t property##_show(struct device *dev, \ | |
948b3edb JP |
166 | struct device_attribute *attr, \ |
167 | char *buf) \ | |
168 | { \ | |
169 | return sysfs_emit(buf, "%u\n", \ | |
6a954e94 | 170 | to_access_nodes(dev)->coord.property); \ |
948b3edb | 171 | } \ |
7810f4dc | 172 | static DEVICE_ATTR_RO(property) |
e1cf33aa | 173 | |
6284a6e8 JP |
174 | ACCESS_ATTR(read_bandwidth); |
175 | ACCESS_ATTR(read_latency); | |
176 | ACCESS_ATTR(write_bandwidth); | |
177 | ACCESS_ATTR(write_latency); | |
e1cf33aa KB |
178 | |
179 | static struct attribute *access_attrs[] = { | |
180 | &dev_attr_read_bandwidth.attr, | |
181 | &dev_attr_read_latency.attr, | |
182 | &dev_attr_write_bandwidth.attr, | |
183 | &dev_attr_write_latency.attr, | |
184 | NULL, | |
185 | }; | |
186 | ||
187 | /** | |
188 | * node_set_perf_attrs - Set the performance values for given access class | |
189 | * @nid: Node identifier to be set | |
6a954e94 | 190 | * @coord: Heterogeneous memory performance coordinates |
e1cf33aa KB |
191 | * @access: The access class the for the given attributes |
192 | */ | |
6a954e94 | 193 | void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, |
e7deeb9d | 194 | unsigned int access) |
e1cf33aa KB |
195 | { |
196 | struct node_access_nodes *c; | |
197 | struct node *node; | |
198 | int i; | |
199 | ||
200 | if (WARN_ON_ONCE(!node_online(nid))) | |
201 | return; | |
202 | ||
203 | node = node_devices[nid]; | |
204 | c = node_init_node_access(node, access); | |
205 | if (!c) | |
206 | return; | |
207 | ||
6a954e94 | 208 | c->coord = *coord; |
e1cf33aa KB |
209 | for (i = 0; access_attrs[i] != NULL; i++) { |
210 | if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i], | |
211 | "initiators")) { | |
212 | pr_info("failed to add performance attribute to node %d\n", | |
213 | nid); | |
214 | break; | |
215 | } | |
216 | } | |
217 | } | |
acc02a10 KB |
218 | |
219 | /** | |
220 | * struct node_cache_info - Internal tracking for memory node caches | |
221 | * @dev: Device represeting the cache level | |
222 | * @node: List element for tracking in the node | |
223 | * @cache_attrs:Attributes for this cache level | |
224 | */ | |
225 | struct node_cache_info { | |
226 | struct device dev; | |
227 | struct list_head node; | |
228 | struct node_cache_attrs cache_attrs; | |
229 | }; | |
230 | #define to_cache_info(device) container_of(device, struct node_cache_info, dev) | |
231 | ||
232 | #define CACHE_ATTR(name, fmt) \ | |
233 | static ssize_t name##_show(struct device *dev, \ | |
234 | struct device_attribute *attr, \ | |
235 | char *buf) \ | |
236 | { \ | |
948b3edb JP |
237 | return sysfs_emit(buf, fmt "\n", \ |
238 | to_cache_info(dev)->cache_attrs.name); \ | |
acc02a10 | 239 | } \ |
fd03c075 | 240 | static DEVICE_ATTR_RO(name); |
acc02a10 KB |
241 | |
242 | CACHE_ATTR(size, "%llu") | |
243 | CACHE_ATTR(line_size, "%u") | |
244 | CACHE_ATTR(indexing, "%u") | |
245 | CACHE_ATTR(write_policy, "%u") | |
246 | ||
247 | static struct attribute *cache_attrs[] = { | |
248 | &dev_attr_indexing.attr, | |
249 | &dev_attr_size.attr, | |
250 | &dev_attr_line_size.attr, | |
251 | &dev_attr_write_policy.attr, | |
252 | NULL, | |
253 | }; | |
254 | ATTRIBUTE_GROUPS(cache); | |
255 | ||
256 | static void node_cache_release(struct device *dev) | |
257 | { | |
258 | kfree(dev); | |
259 | } | |
260 | ||
261 | static void node_cacheinfo_release(struct device *dev) | |
262 | { | |
263 | struct node_cache_info *info = to_cache_info(dev); | |
264 | kfree(info); | |
265 | } | |
266 | ||
267 | static void node_init_cache_dev(struct node *node) | |
268 | { | |
269 | struct device *dev; | |
270 | ||
271 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); | |
272 | if (!dev) | |
273 | return; | |
274 | ||
4ce535ec | 275 | device_initialize(dev); |
acc02a10 KB |
276 | dev->parent = &node->dev; |
277 | dev->release = node_cache_release; | |
278 | if (dev_set_name(dev, "memory_side_cache")) | |
4ce535ec | 279 | goto put_device; |
acc02a10 | 280 | |
4ce535ec DC |
281 | if (device_add(dev)) |
282 | goto put_device; | |
acc02a10 KB |
283 | |
284 | pm_runtime_no_callbacks(dev); | |
285 | node->cache_dev = dev; | |
286 | return; | |
4ce535ec DC |
287 | put_device: |
288 | put_device(dev); | |
acc02a10 KB |
289 | } |
290 | ||
291 | /** | |
292 | * node_add_cache() - add cache attribute to a memory node | |
293 | * @nid: Node identifier that has new cache attributes | |
294 | * @cache_attrs: Attributes for the cache being added | |
295 | */ | |
296 | void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) | |
297 | { | |
298 | struct node_cache_info *info; | |
299 | struct device *dev; | |
300 | struct node *node; | |
301 | ||
302 | if (!node_online(nid) || !node_devices[nid]) | |
303 | return; | |
304 | ||
305 | node = node_devices[nid]; | |
306 | list_for_each_entry(info, &node->cache_attrs, node) { | |
307 | if (info->cache_attrs.level == cache_attrs->level) { | |
308 | dev_warn(&node->dev, | |
309 | "attempt to add duplicate cache level:%d\n", | |
310 | cache_attrs->level); | |
311 | return; | |
312 | } | |
313 | } | |
314 | ||
315 | if (!node->cache_dev) | |
316 | node_init_cache_dev(node); | |
317 | if (!node->cache_dev) | |
318 | return; | |
319 | ||
320 | info = kzalloc(sizeof(*info), GFP_KERNEL); | |
321 | if (!info) | |
322 | return; | |
323 | ||
324 | dev = &info->dev; | |
4ce535ec | 325 | device_initialize(dev); |
acc02a10 KB |
326 | dev->parent = node->cache_dev; |
327 | dev->release = node_cacheinfo_release; | |
328 | dev->groups = cache_groups; | |
329 | if (dev_set_name(dev, "index%d", cache_attrs->level)) | |
4ce535ec | 330 | goto put_device; |
acc02a10 KB |
331 | |
332 | info->cache_attrs = *cache_attrs; | |
4ce535ec | 333 | if (device_add(dev)) { |
acc02a10 KB |
334 | dev_warn(&node->dev, "failed to add cache level:%d\n", |
335 | cache_attrs->level); | |
4ce535ec | 336 | goto put_device; |
acc02a10 KB |
337 | } |
338 | pm_runtime_no_callbacks(dev); | |
339 | list_add_tail(&info->node, &node->cache_attrs); | |
340 | return; | |
4ce535ec DC |
341 | put_device: |
342 | put_device(dev); | |
acc02a10 KB |
343 | } |
344 | ||
345 | static void node_remove_caches(struct node *node) | |
346 | { | |
347 | struct node_cache_info *info, *next; | |
348 | ||
349 | if (!node->cache_dev) | |
350 | return; | |
351 | ||
352 | list_for_each_entry_safe(info, next, &node->cache_attrs, node) { | |
353 | list_del(&info->node); | |
354 | device_unregister(&info->dev); | |
355 | } | |
356 | device_unregister(node->cache_dev); | |
357 | } | |
358 | ||
359 | static void node_init_caches(unsigned int nid) | |
360 | { | |
361 | INIT_LIST_HEAD(&node_devices[nid]->cache_attrs); | |
362 | } | |
363 | #else | |
364 | static void node_init_caches(unsigned int nid) { } | |
365 | static void node_remove_caches(struct node *node) { } | |
e1cf33aa KB |
366 | #endif |
367 | ||
1da177e4 | 368 | #define K(x) ((x) << (PAGE_SHIFT - 10)) |
10fbcf4c KS |
369 | static ssize_t node_read_meminfo(struct device *dev, |
370 | struct device_attribute *attr, char *buf) | |
1da177e4 | 371 | { |
948b3edb | 372 | int len = 0; |
1da177e4 | 373 | int nid = dev->id; |
599d0c95 | 374 | struct pglist_data *pgdat = NODE_DATA(nid); |
1da177e4 | 375 | struct sysinfo i; |
61f94e18 | 376 | unsigned long sreclaimable, sunreclaimable; |
b6038942 | 377 | unsigned long swapcached = 0; |
1da177e4 LT |
378 | |
379 | si_meminfo_node(&i, nid); | |
d42f3245 RG |
380 | sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B); |
381 | sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B); | |
b6038942 SB |
382 | #ifdef CONFIG_SWAP |
383 | swapcached = node_page_state_pages(pgdat, NR_SWAPCACHE); | |
384 | #endif | |
948b3edb JP |
385 | len = sysfs_emit_at(buf, len, |
386 | "Node %d MemTotal: %8lu kB\n" | |
387 | "Node %d MemFree: %8lu kB\n" | |
388 | "Node %d MemUsed: %8lu kB\n" | |
b6038942 | 389 | "Node %d SwapCached: %8lu kB\n" |
948b3edb JP |
390 | "Node %d Active: %8lu kB\n" |
391 | "Node %d Inactive: %8lu kB\n" | |
392 | "Node %d Active(anon): %8lu kB\n" | |
393 | "Node %d Inactive(anon): %8lu kB\n" | |
394 | "Node %d Active(file): %8lu kB\n" | |
395 | "Node %d Inactive(file): %8lu kB\n" | |
396 | "Node %d Unevictable: %8lu kB\n" | |
397 | "Node %d Mlocked: %8lu kB\n", | |
398 | nid, K(i.totalram), | |
399 | nid, K(i.freeram), | |
400 | nid, K(i.totalram - i.freeram), | |
b6038942 | 401 | nid, K(swapcached), |
948b3edb JP |
402 | nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) + |
403 | node_page_state(pgdat, NR_ACTIVE_FILE)), | |
404 | nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) + | |
405 | node_page_state(pgdat, NR_INACTIVE_FILE)), | |
406 | nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)), | |
407 | nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)), | |
408 | nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)), | |
409 | nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)), | |
410 | nid, K(node_page_state(pgdat, NR_UNEVICTABLE)), | |
411 | nid, K(sum_zone_node_page_state(nid, NR_MLOCK))); | |
7ee92255 | 412 | |
182e8e23 | 413 | #ifdef CONFIG_HIGHMEM |
948b3edb JP |
414 | len += sysfs_emit_at(buf, len, |
415 | "Node %d HighTotal: %8lu kB\n" | |
416 | "Node %d HighFree: %8lu kB\n" | |
417 | "Node %d LowTotal: %8lu kB\n" | |
418 | "Node %d LowFree: %8lu kB\n", | |
419 | nid, K(i.totalhigh), | |
420 | nid, K(i.freehigh), | |
421 | nid, K(i.totalram - i.totalhigh), | |
422 | nid, K(i.freeram - i.freehigh)); | |
182e8e23 | 423 | #endif |
948b3edb JP |
424 | len += sysfs_emit_at(buf, len, |
425 | "Node %d Dirty: %8lu kB\n" | |
426 | "Node %d Writeback: %8lu kB\n" | |
427 | "Node %d FilePages: %8lu kB\n" | |
428 | "Node %d Mapped: %8lu kB\n" | |
429 | "Node %d AnonPages: %8lu kB\n" | |
430 | "Node %d Shmem: %8lu kB\n" | |
431 | "Node %d KernelStack: %8lu kB\n" | |
628d06a4 | 432 | #ifdef CONFIG_SHADOW_CALL_STACK |
948b3edb | 433 | "Node %d ShadowCallStack:%8lu kB\n" |
628d06a4 | 434 | #endif |
948b3edb | 435 | "Node %d PageTables: %8lu kB\n" |
ebc97a52 | 436 | "Node %d SecPageTables: %8lu kB\n" |
948b3edb JP |
437 | "Node %d NFS_Unstable: %8lu kB\n" |
438 | "Node %d Bounce: %8lu kB\n" | |
439 | "Node %d WritebackTmp: %8lu kB\n" | |
440 | "Node %d KReclaimable: %8lu kB\n" | |
441 | "Node %d Slab: %8lu kB\n" | |
442 | "Node %d SReclaimable: %8lu kB\n" | |
443 | "Node %d SUnreclaim: %8lu kB\n" | |
05b258e9 | 444 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
948b3edb JP |
445 | "Node %d AnonHugePages: %8lu kB\n" |
446 | "Node %d ShmemHugePages: %8lu kB\n" | |
447 | "Node %d ShmemPmdMapped: %8lu kB\n" | |
4b5b7850 HD |
448 | "Node %d FileHugePages: %8lu kB\n" |
449 | "Node %d FilePmdMapped: %8lu kB\n" | |
dcdfdd40 KS |
450 | #endif |
451 | #ifdef CONFIG_UNACCEPTED_MEMORY | |
452 | "Node %d Unaccepted: %8lu kB\n" | |
05b258e9 | 453 | #endif |
948b3edb JP |
454 | , |
455 | nid, K(node_page_state(pgdat, NR_FILE_DIRTY)), | |
456 | nid, K(node_page_state(pgdat, NR_WRITEBACK)), | |
457 | nid, K(node_page_state(pgdat, NR_FILE_PAGES)), | |
458 | nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), | |
459 | nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), | |
460 | nid, K(i.sharedram), | |
461 | nid, node_page_state(pgdat, NR_KERNEL_STACK_KB), | |
628d06a4 | 462 | #ifdef CONFIG_SHADOW_CALL_STACK |
948b3edb | 463 | nid, node_page_state(pgdat, NR_KERNEL_SCS_KB), |
628d06a4 | 464 | #endif |
f0c0c115 | 465 | nid, K(node_page_state(pgdat, NR_PAGETABLE)), |
ebc97a52 | 466 | nid, K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)), |
948b3edb JP |
467 | nid, 0UL, |
468 | nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), | |
469 | nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), | |
470 | nid, K(sreclaimable + | |
471 | node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)), | |
472 | nid, K(sreclaimable + sunreclaimable), | |
473 | nid, K(sreclaimable), | |
474 | nid, K(sunreclaimable) | |
05b258e9 | 475 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
948b3edb | 476 | , |
69473e5d | 477 | nid, K(node_page_state(pgdat, NR_ANON_THPS)), |
57b2847d | 478 | nid, K(node_page_state(pgdat, NR_SHMEM_THPS)), |
a1528e21 | 479 | nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)), |
bf9ecead | 480 | nid, K(node_page_state(pgdat, NR_FILE_THPS)), |
380780e7 | 481 | nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED)) |
dcdfdd40 KS |
482 | #endif |
483 | #ifdef CONFIG_UNACCEPTED_MEMORY | |
484 | , | |
485 | nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED)) | |
05b258e9 | 486 | #endif |
948b3edb | 487 | ); |
7981593b | 488 | len += hugetlb_report_node_meminfo(buf, len, nid); |
948b3edb | 489 | return len; |
1da177e4 LT |
490 | } |
491 | ||
492 | #undef K | |
948b3edb | 493 | static DEVICE_ATTR(meminfo, 0444, node_read_meminfo, NULL); |
1da177e4 | 494 | |
10fbcf4c | 495 | static ssize_t node_read_numastat(struct device *dev, |
948b3edb | 496 | struct device_attribute *attr, char *buf) |
1da177e4 | 497 | { |
f19298b9 | 498 | fold_vm_numa_events(); |
aa838896 JP |
499 | return sysfs_emit(buf, |
500 | "numa_hit %lu\n" | |
501 | "numa_miss %lu\n" | |
502 | "numa_foreign %lu\n" | |
503 | "interleave_hit %lu\n" | |
504 | "local_node %lu\n" | |
505 | "other_node %lu\n", | |
f19298b9 MG |
506 | sum_zone_numa_event_state(dev->id, NUMA_HIT), |
507 | sum_zone_numa_event_state(dev->id, NUMA_MISS), | |
508 | sum_zone_numa_event_state(dev->id, NUMA_FOREIGN), | |
509 | sum_zone_numa_event_state(dev->id, NUMA_INTERLEAVE_HIT), | |
510 | sum_zone_numa_event_state(dev->id, NUMA_LOCAL), | |
511 | sum_zone_numa_event_state(dev->id, NUMA_OTHER)); | |
1da177e4 | 512 | } |
948b3edb | 513 | static DEVICE_ATTR(numastat, 0444, node_read_numastat, NULL); |
1da177e4 | 514 | |
10fbcf4c KS |
515 | static ssize_t node_read_vmstat(struct device *dev, |
516 | struct device_attribute *attr, char *buf) | |
2ac39037 MR |
517 | { |
518 | int nid = dev->id; | |
75ef7184 | 519 | struct pglist_data *pgdat = NODE_DATA(nid); |
fa25c503 | 520 | int i; |
948b3edb | 521 | int len = 0; |
fa25c503 KM |
522 | |
523 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
948b3edb JP |
524 | len += sysfs_emit_at(buf, len, "%s %lu\n", |
525 | zone_stat_name(i), | |
526 | sum_zone_node_page_state(nid, i)); | |
75ef7184 | 527 | |
3a321d2a | 528 | #ifdef CONFIG_NUMA |
f19298b9 MG |
529 | fold_vm_numa_events(); |
530 | for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) | |
948b3edb JP |
531 | len += sysfs_emit_at(buf, len, "%s %lu\n", |
532 | numa_stat_name(i), | |
f19298b9 | 533 | sum_zone_numa_event_state(nid, i)); |
3a321d2a | 534 | |
948b3edb | 535 | #endif |
69473e5d MS |
536 | for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { |
537 | unsigned long pages = node_page_state_pages(pgdat, i); | |
538 | ||
539 | if (vmstat_item_print_in_thp(i)) | |
540 | pages /= HPAGE_PMD_NR; | |
541 | len += sysfs_emit_at(buf, len, "%s %lu\n", node_stat_name(i), | |
542 | pages); | |
543 | } | |
fa25c503 | 544 | |
948b3edb | 545 | return len; |
2ac39037 | 546 | } |
948b3edb | 547 | static DEVICE_ATTR(vmstat, 0444, node_read_vmstat, NULL); |
2ac39037 | 548 | |
10fbcf4c | 549 | static ssize_t node_read_distance(struct device *dev, |
948b3edb | 550 | struct device_attribute *attr, char *buf) |
1da177e4 LT |
551 | { |
552 | int nid = dev->id; | |
553 | int len = 0; | |
554 | int i; | |
555 | ||
12ee3c0a DR |
556 | /* |
557 | * buf is currently PAGE_SIZE in length and each node needs 4 chars | |
558 | * at the most (distance + space or newline). | |
559 | */ | |
560 | BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE); | |
1da177e4 | 561 | |
948b3edb JP |
562 | for_each_online_node(i) { |
563 | len += sysfs_emit_at(buf, len, "%s%d", | |
564 | i ? " " : "", node_distance(nid, i)); | |
565 | } | |
1da177e4 | 566 | |
948b3edb | 567 | len += sysfs_emit_at(buf, len, "\n"); |
1da177e4 LT |
568 | return len; |
569 | } | |
948b3edb | 570 | static DEVICE_ATTR(distance, 0444, node_read_distance, NULL); |
1da177e4 | 571 | |
3c9b8aaf | 572 | static struct attribute *node_dev_attrs[] = { |
3c9b8aaf TI |
573 | &dev_attr_meminfo.attr, |
574 | &dev_attr_numastat.attr, | |
575 | &dev_attr_distance.attr, | |
576 | &dev_attr_vmstat.attr, | |
577 | NULL | |
578 | }; | |
75bd50fa TT |
579 | |
580 | static struct bin_attribute *node_dev_bin_attrs[] = { | |
581 | &bin_attr_cpumap, | |
582 | &bin_attr_cpulist, | |
583 | NULL | |
584 | }; | |
585 | ||
586 | static const struct attribute_group node_dev_group = { | |
587 | .attrs = node_dev_attrs, | |
588 | .bin_attrs = node_dev_bin_attrs | |
589 | }; | |
590 | ||
591 | static const struct attribute_group *node_dev_groups[] = { | |
592 | &node_dev_group, | |
50468e43 JS |
593 | #ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP |
594 | &arch_node_dev_group, | |
44b8f8bf JY |
595 | #endif |
596 | #ifdef CONFIG_MEMORY_FAILURE | |
597 | &memory_failure_attr_group, | |
50468e43 | 598 | #endif |
75bd50fa TT |
599 | NULL |
600 | }; | |
3c9b8aaf | 601 | |
8c7b5b4e YI |
602 | static void node_device_release(struct device *dev) |
603 | { | |
b958d4d0 | 604 | kfree(to_node(dev)); |
8c7b5b4e | 605 | } |
1da177e4 LT |
606 | |
607 | /* | |
405ae7d3 | 608 | * register_node - Setup a sysfs device for a node. |
1da177e4 LT |
609 | * @num - Node number to use when creating the device. |
610 | * | |
611 | * Initialize and register the node device. | |
612 | */ | |
a7be6e5a | 613 | static int register_node(struct node *node, int num) |
1da177e4 LT |
614 | { |
615 | int error; | |
616 | ||
10fbcf4c KS |
617 | node->dev.id = num; |
618 | node->dev.bus = &node_subsys; | |
8c7b5b4e | 619 | node->dev.release = node_device_release; |
7ca7ec40 | 620 | node->dev.groups = node_dev_groups; |
10fbcf4c | 621 | error = device_register(&node->dev); |
1da177e4 | 622 | |
a4a00b45 | 623 | if (error) { |
c1cc0d51 | 624 | put_device(&node->dev); |
a4a00b45 | 625 | } else { |
9a305230 | 626 | hugetlb_register_node(node); |
ed4a6d7f | 627 | compaction_register_node(node); |
1da177e4 | 628 | } |
b958d4d0 | 629 | |
1da177e4 LT |
630 | return error; |
631 | } | |
632 | ||
4b45099b KT |
633 | /** |
634 | * unregister_node - unregister a node device | |
635 | * @node: node going away | |
636 | * | |
637 | * Unregisters a node device @node. All the devices on the node must be | |
638 | * unregistered before calling this function. | |
639 | */ | |
640 | void unregister_node(struct node *node) | |
641 | { | |
a4a00b45 | 642 | hugetlb_unregister_node(node); |
da63dc84 | 643 | compaction_unregister_node(node); |
08d9dbe7 | 644 | node_remove_accesses(node); |
acc02a10 | 645 | node_remove_caches(node); |
10fbcf4c | 646 | device_unregister(&node->dev); |
4b45099b | 647 | } |
1da177e4 | 648 | |
8732794b | 649 | struct node *node_devices[MAX_NUMNODES]; |
0fc44159 | 650 | |
76b67ed9 KH |
651 | /* |
652 | * register cpu under node | |
653 | */ | |
654 | int register_cpu_under_node(unsigned int cpu, unsigned int nid) | |
655 | { | |
1830794a | 656 | int ret; |
8a25a2fd | 657 | struct device *obj; |
76b67ed9 | 658 | |
f8246f31 AC |
659 | if (!node_online(nid)) |
660 | return 0; | |
661 | ||
8a25a2fd | 662 | obj = get_cpu_device(cpu); |
f8246f31 AC |
663 | if (!obj) |
664 | return 0; | |
665 | ||
8732794b | 666 | ret = sysfs_create_link(&node_devices[nid]->dev.kobj, |
f8246f31 AC |
667 | &obj->kobj, |
668 | kobject_name(&obj->kobj)); | |
1830794a AC |
669 | if (ret) |
670 | return ret; | |
671 | ||
672 | return sysfs_create_link(&obj->kobj, | |
8732794b WC |
673 | &node_devices[nid]->dev.kobj, |
674 | kobject_name(&node_devices[nid]->dev.kobj)); | |
76b67ed9 KH |
675 | } |
676 | ||
08d9dbe7 KB |
677 | /** |
678 | * register_memory_node_under_compute_node - link memory node to its compute | |
679 | * node for a given access class. | |
58cb346c MCC |
680 | * @mem_nid: Memory node number |
681 | * @cpu_nid: Cpu node number | |
08d9dbe7 KB |
682 | * @access: Access class to register |
683 | * | |
684 | * Description: | |
685 | * For use with platforms that may have separate memory and compute nodes. | |
686 | * This function will export node relationships linking which memory | |
687 | * initiator nodes can access memory targets at a given ranked access | |
688 | * class. | |
689 | */ | |
690 | int register_memory_node_under_compute_node(unsigned int mem_nid, | |
691 | unsigned int cpu_nid, | |
e7deeb9d | 692 | unsigned int access) |
08d9dbe7 KB |
693 | { |
694 | struct node *init_node, *targ_node; | |
695 | struct node_access_nodes *initiator, *target; | |
696 | int ret; | |
697 | ||
698 | if (!node_online(cpu_nid) || !node_online(mem_nid)) | |
699 | return -ENODEV; | |
700 | ||
701 | init_node = node_devices[cpu_nid]; | |
702 | targ_node = node_devices[mem_nid]; | |
703 | initiator = node_init_node_access(init_node, access); | |
704 | target = node_init_node_access(targ_node, access); | |
705 | if (!initiator || !target) | |
706 | return -ENOMEM; | |
707 | ||
708 | ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets", | |
709 | &targ_node->dev.kobj, | |
710 | dev_name(&targ_node->dev)); | |
711 | if (ret) | |
712 | return ret; | |
713 | ||
714 | ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators", | |
715 | &init_node->dev.kobj, | |
716 | dev_name(&init_node->dev)); | |
717 | if (ret) | |
718 | goto err; | |
719 | ||
720 | return 0; | |
721 | err: | |
722 | sysfs_remove_link_from_group(&initiator->dev.kobj, "targets", | |
723 | dev_name(&targ_node->dev)); | |
724 | return ret; | |
725 | } | |
726 | ||
76b67ed9 KH |
727 | int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) |
728 | { | |
8a25a2fd | 729 | struct device *obj; |
b9d52dad AC |
730 | |
731 | if (!node_online(nid)) | |
732 | return 0; | |
733 | ||
8a25a2fd | 734 | obj = get_cpu_device(cpu); |
b9d52dad AC |
735 | if (!obj) |
736 | return 0; | |
737 | ||
8732794b | 738 | sysfs_remove_link(&node_devices[nid]->dev.kobj, |
b9d52dad | 739 | kobject_name(&obj->kobj)); |
1830794a | 740 | sysfs_remove_link(&obj->kobj, |
8732794b | 741 | kobject_name(&node_devices[nid]->dev.kobj)); |
b9d52dad | 742 | |
76b67ed9 KH |
743 | return 0; |
744 | } | |
745 | ||
50f9481e | 746 | #ifdef CONFIG_MEMORY_HOTPLUG |
bd721ea7 | 747 | static int __ref get_nid_for_pfn(unsigned long pfn) |
c04fc586 | 748 | { |
3a80a7fa | 749 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
8cdde385 | 750 | if (system_state < SYSTEM_RUNNING) |
3a80a7fa MG |
751 | return early_pfn_to_nid(pfn); |
752 | #endif | |
c04fc586 GH |
753 | return pfn_to_nid(pfn); |
754 | } | |
755 | ||
90c7eaeb | 756 | static void do_register_memory_block_under_node(int nid, |
395f6081 DH |
757 | struct memory_block *mem_blk, |
758 | enum meminit_context context) | |
f85086f9 LD |
759 | { |
760 | int ret; | |
761 | ||
395f6081 | 762 | memory_block_add_nid(mem_blk, nid, context); |
f85086f9 LD |
763 | |
764 | ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, | |
765 | &mem_blk->dev.kobj, | |
766 | kobject_name(&mem_blk->dev.kobj)); | |
90c7eaeb LD |
767 | if (ret && ret != -EEXIST) |
768 | dev_err_ratelimited(&node_devices[nid]->dev, | |
769 | "can't create link to %s in sysfs (%d)\n", | |
770 | kobject_name(&mem_blk->dev.kobj), ret); | |
f85086f9 | 771 | |
90c7eaeb | 772 | ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj, |
f85086f9 LD |
773 | &node_devices[nid]->dev.kobj, |
774 | kobject_name(&node_devices[nid]->dev.kobj)); | |
90c7eaeb LD |
775 | if (ret && ret != -EEXIST) |
776 | dev_err_ratelimited(&mem_blk->dev, | |
777 | "can't create link to %s in sysfs (%d)\n", | |
778 | kobject_name(&node_devices[nid]->dev.kobj), | |
779 | ret); | |
f85086f9 LD |
780 | } |
781 | ||
c04fc586 | 782 | /* register memory section under specified node if it spans that node */ |
f85086f9 LD |
783 | static int register_mem_block_under_node_early(struct memory_block *mem_blk, |
784 | void *arg) | |
c04fc586 | 785 | { |
b6c88d3b DH |
786 | unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE; |
787 | unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); | |
788 | unsigned long end_pfn = start_pfn + memory_block_pfns - 1; | |
f85086f9 | 789 | int nid = *(int *)arg; |
b6c88d3b | 790 | unsigned long pfn; |
c04fc586 | 791 | |
b6c88d3b | 792 | for (pfn = start_pfn; pfn <= end_pfn; pfn++) { |
c04fc586 GH |
793 | int page_nid; |
794 | ||
04697858 YL |
795 | /* |
796 | * memory block could have several absent sections from start. | |
797 | * skip pfn range from absent section | |
798 | */ | |
e03d1f78 | 799 | if (!pfn_in_present_section(pfn)) { |
04697858 YL |
800 | pfn = round_down(pfn + PAGES_PER_SECTION, |
801 | PAGES_PER_SECTION) - 1; | |
802 | continue; | |
803 | } | |
804 | ||
fc44f7f9 | 805 | /* |
f85086f9 LD |
806 | * We need to check if page belongs to nid only at the boot |
807 | * case because node's ranges can be interleaved. | |
d84f2f5a | 808 | */ |
f85086f9 LD |
809 | page_nid = get_nid_for_pfn(pfn); |
810 | if (page_nid < 0) | |
811 | continue; | |
812 | if (page_nid != nid) | |
813 | continue; | |
dee5d0d5 | 814 | |
395f6081 | 815 | do_register_memory_block_under_node(nid, mem_blk, MEMINIT_EARLY); |
90c7eaeb | 816 | return 0; |
c04fc586 GH |
817 | } |
818 | /* mem section does not span the specified node */ | |
819 | return 0; | |
820 | } | |
821 | ||
f85086f9 LD |
822 | /* |
823 | * During hotplug we know that all pages in the memory block belong to the same | |
824 | * node. | |
825 | */ | |
826 | static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk, | |
827 | void *arg) | |
828 | { | |
829 | int nid = *(int *)arg; | |
830 | ||
395f6081 | 831 | do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG); |
90c7eaeb | 832 | return 0; |
f85086f9 LD |
833 | } |
834 | ||
4c4b7f9b | 835 | /* |
d84f2f5a DH |
836 | * Unregister a memory block device under the node it spans. Memory blocks |
837 | * with multiple nodes cannot be offlined and therefore also never be removed. | |
4c4b7f9b | 838 | */ |
a31b264c | 839 | void unregister_memory_block_under_nodes(struct memory_block *mem_blk) |
c04fc586 | 840 | { |
d84f2f5a DH |
841 | if (mem_blk->nid == NUMA_NO_NODE) |
842 | return; | |
c04fc586 | 843 | |
d84f2f5a DH |
844 | sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj, |
845 | kobject_name(&mem_blk->dev.kobj)); | |
846 | sysfs_remove_link(&mem_blk->dev.kobj, | |
847 | kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); | |
c04fc586 GH |
848 | } |
849 | ||
cc651559 DH |
850 | void register_memory_blocks_under_node(int nid, unsigned long start_pfn, |
851 | unsigned long end_pfn, | |
852 | enum meminit_context context) | |
c04fc586 | 853 | { |
f85086f9 LD |
854 | walk_memory_blocks_func_t func; |
855 | ||
856 | if (context == MEMINIT_HOTPLUG) | |
857 | func = register_mem_block_under_node_hotplug; | |
858 | else | |
859 | func = register_mem_block_under_node_early; | |
860 | ||
90c7eaeb LD |
861 | walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), |
862 | (void *)&nid, func); | |
863 | return; | |
c04fc586 | 864 | } |
50f9481e | 865 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
4faf8d95 | 866 | |
9037a993 | 867 | int __register_one_node(int nid) |
0fc44159 | 868 | { |
9037a993 | 869 | int error; |
76b67ed9 | 870 | int cpu; |
48b5928e | 871 | struct node *node; |
0fc44159 | 872 | |
48b5928e GP |
873 | node = kzalloc(sizeof(struct node), GFP_KERNEL); |
874 | if (!node) | |
9037a993 | 875 | return -ENOMEM; |
c04fc586 | 876 | |
48b5928e GP |
877 | INIT_LIST_HEAD(&node->access_list); |
878 | node_devices[nid] = node; | |
879 | ||
a7be6e5a | 880 | error = register_node(node_devices[nid], nid); |
39da08cb | 881 | |
9037a993 MH |
882 | /* link cpu under this node */ |
883 | for_each_present_cpu(cpu) { | |
884 | if (cpu_to_node(cpu) == nid) | |
885 | register_cpu_under_node(cpu, nid); | |
0fc44159 YG |
886 | } |
887 | ||
acc02a10 | 888 | node_init_caches(nid); |
0fc44159 | 889 | |
9037a993 | 890 | return error; |
0fc44159 YG |
891 | } |
892 | ||
893 | void unregister_one_node(int nid) | |
894 | { | |
92d585ef XQ |
895 | if (!node_devices[nid]) |
896 | return; | |
897 | ||
8732794b | 898 | unregister_node(node_devices[nid]); |
8732794b | 899 | node_devices[nid] = NULL; |
0fc44159 YG |
900 | } |
901 | ||
bde631a5 LS |
902 | /* |
903 | * node states attributes | |
904 | */ | |
905 | ||
b15f562f | 906 | struct node_attr { |
10fbcf4c | 907 | struct device_attribute attr; |
b15f562f AK |
908 | enum node_states state; |
909 | }; | |
bde631a5 | 910 | |
10fbcf4c KS |
911 | static ssize_t show_node_state(struct device *dev, |
912 | struct device_attribute *attr, char *buf) | |
bde631a5 | 913 | { |
b15f562f | 914 | struct node_attr *na = container_of(attr, struct node_attr, attr); |
948b3edb JP |
915 | |
916 | return sysfs_emit(buf, "%*pbl\n", | |
917 | nodemask_pr_args(&node_states[na->state])); | |
bde631a5 LS |
918 | } |
919 | ||
b15f562f | 920 | #define _NODE_ATTR(name, state) \ |
10fbcf4c | 921 | { __ATTR(name, 0444, show_node_state, NULL), state } |
bde631a5 | 922 | |
b15f562f | 923 | static struct node_attr node_state_attr[] = { |
fcf07d22 LJ |
924 | [N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE), |
925 | [N_ONLINE] = _NODE_ATTR(online, N_ONLINE), | |
926 | [N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY), | |
bde631a5 | 927 | #ifdef CONFIG_HIGHMEM |
fcf07d22 | 928 | [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY), |
20b2f52b | 929 | #endif |
20b2f52b | 930 | [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), |
fcf07d22 | 931 | [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), |
894c26a1 JC |
932 | [N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator, |
933 | N_GENERIC_INITIATOR), | |
bde631a5 LS |
934 | }; |
935 | ||
10fbcf4c | 936 | static struct attribute *node_state_attrs[] = { |
fcf07d22 LJ |
937 | &node_state_attr[N_POSSIBLE].attr.attr, |
938 | &node_state_attr[N_ONLINE].attr.attr, | |
939 | &node_state_attr[N_NORMAL_MEMORY].attr.attr, | |
3701cde6 | 940 | #ifdef CONFIG_HIGHMEM |
fcf07d22 | 941 | &node_state_attr[N_HIGH_MEMORY].attr.attr, |
20b2f52b | 942 | #endif |
20b2f52b | 943 | &node_state_attr[N_MEMORY].attr.attr, |
fcf07d22 | 944 | &node_state_attr[N_CPU].attr.attr, |
894c26a1 | 945 | &node_state_attr[N_GENERIC_INITIATOR].attr.attr, |
3701cde6 AK |
946 | NULL |
947 | }; | |
bde631a5 | 948 | |
5a576764 | 949 | static const struct attribute_group memory_root_attr_group = { |
10fbcf4c KS |
950 | .attrs = node_state_attrs, |
951 | }; | |
952 | ||
953 | static const struct attribute_group *cpu_root_attr_groups[] = { | |
954 | &memory_root_attr_group, | |
955 | NULL, | |
956 | }; | |
957 | ||
2848a28b | 958 | void __init node_dev_init(void) |
1da177e4 | 959 | { |
2848a28b | 960 | int ret, i; |
bde631a5 | 961 | |
3701cde6 AK |
962 | BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); |
963 | BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES); | |
964 | ||
10fbcf4c | 965 | ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); |
2848a28b DH |
966 | if (ret) |
967 | panic("%s() failed to register subsystem: %d\n", __func__, ret); | |
968 | ||
bde631a5 | 969 | /* |
2848a28b DH |
970 | * Create all node devices, which will properly link the node |
971 | * to applicable memory block devices and already created cpu devices. | |
bde631a5 | 972 | */ |
2848a28b DH |
973 | for_each_online_node(i) { |
974 | ret = register_one_node(i); | |
975 | if (ret) | |
976 | panic("%s() failed to add node: %d\n", __func__, ret); | |
977 | } | |
1da177e4 | 978 | } |