Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 | 2 | /* |
10fbcf4c | 3 | * Basic Node interface support |
1da177e4 LT |
4 | */ |
5 | ||
1da177e4 LT |
6 | #include <linux/module.h> |
7 | #include <linux/init.h> | |
8 | #include <linux/mm.h> | |
c04fc586 | 9 | #include <linux/memory.h> |
fa25c503 | 10 | #include <linux/vmstat.h> |
6e259e7d | 11 | #include <linux/notifier.h> |
1da177e4 LT |
12 | #include <linux/node.h> |
13 | #include <linux/hugetlb.h> | |
ed4a6d7f | 14 | #include <linux/compaction.h> |
1da177e4 LT |
15 | #include <linux/cpumask.h> |
16 | #include <linux/topology.h> | |
17 | #include <linux/nodemask.h> | |
76b67ed9 | 18 | #include <linux/cpu.h> |
bde631a5 | 19 | #include <linux/device.h> |
08d9dbe7 | 20 | #include <linux/pm_runtime.h> |
af936a16 | 21 | #include <linux/swap.h> |
18e5b539 | 22 | #include <linux/slab.h> |
a4a00b45 | 23 | #include <linux/hugetlb.h> |
1da177e4 | 24 | |
10fbcf4c | 25 | static struct bus_type node_subsys = { |
af5ca3f4 | 26 | .name = "node", |
10fbcf4c | 27 | .dev_name = "node", |
1da177e4 LT |
28 | }; |
29 | ||
75bd50fa TT |
30 | static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj, |
31 | struct bin_attribute *attr, char *buf, | |
32 | loff_t off, size_t count) | |
1da177e4 | 33 | { |
75bd50fa | 34 | struct device *dev = kobj_to_dev(kobj); |
1da177e4 | 35 | struct node *node_dev = to_node(dev); |
75bd50fa TT |
36 | cpumask_var_t mask; |
37 | ssize_t n; | |
1da177e4 | 38 | |
064f0e93 ZL |
39 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) |
40 | return 0; | |
41 | ||
42 | cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); | |
75bd50fa | 43 | n = cpumap_print_bitmask_to_buf(buf, mask, off, count); |
064f0e93 ZL |
44 | free_cpumask_var(mask); |
45 | ||
46 | return n; | |
1da177e4 LT |
47 | } |
48 | ||
7ee951ac | 49 | static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES); |
75bd50fa TT |
50 | |
51 | static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj, | |
52 | struct bin_attribute *attr, char *buf, | |
53 | loff_t off, size_t count) | |
39106dcf | 54 | { |
75bd50fa TT |
55 | struct device *dev = kobj_to_dev(kobj); |
56 | struct node *node_dev = to_node(dev); | |
57 | cpumask_var_t mask; | |
58 | ssize_t n; | |
59 | ||
60 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | |
61 | return 0; | |
948b3edb | 62 | |
75bd50fa TT |
63 | cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); |
64 | n = cpumap_print_list_to_buf(buf, mask, off, count); | |
65 | free_cpumask_var(mask); | |
948b3edb | 66 | |
75bd50fa | 67 | return n; |
39106dcf MT |
68 | } |
69 | ||
7ee951ac | 70 | static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES); |
1da177e4 | 71 | |
08d9dbe7 KB |
72 | /** |
73 | * struct node_access_nodes - Access class device to hold user visible | |
74 | * relationships to other nodes. | |
75 | * @dev: Device for this memory access class | |
76 | * @list_node: List element in the node's access list | |
77 | * @access: The access class rank | |
58cb346c | 78 | * @hmem_attrs: Heterogeneous memory performance attributes |
08d9dbe7 KB |
79 | */ |
80 | struct node_access_nodes { | |
81 | struct device dev; | |
82 | struct list_head list_node; | |
e7deeb9d | 83 | unsigned int access; |
e1cf33aa KB |
84 | #ifdef CONFIG_HMEM_REPORTING |
85 | struct node_hmem_attrs hmem_attrs; | |
86 | #endif | |
08d9dbe7 KB |
87 | }; |
88 | #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev) | |
89 | ||
90 | static struct attribute *node_init_access_node_attrs[] = { | |
91 | NULL, | |
92 | }; | |
93 | ||
94 | static struct attribute *node_targ_access_node_attrs[] = { | |
95 | NULL, | |
96 | }; | |
97 | ||
98 | static const struct attribute_group initiators = { | |
99 | .name = "initiators", | |
100 | .attrs = node_init_access_node_attrs, | |
101 | }; | |
102 | ||
103 | static const struct attribute_group targets = { | |
104 | .name = "targets", | |
105 | .attrs = node_targ_access_node_attrs, | |
106 | }; | |
107 | ||
108 | static const struct attribute_group *node_access_node_groups[] = { | |
109 | &initiators, | |
110 | &targets, | |
111 | NULL, | |
112 | }; | |
113 | ||
114 | static void node_remove_accesses(struct node *node) | |
115 | { | |
116 | struct node_access_nodes *c, *cnext; | |
117 | ||
118 | list_for_each_entry_safe(c, cnext, &node->access_list, list_node) { | |
119 | list_del(&c->list_node); | |
120 | device_unregister(&c->dev); | |
121 | } | |
122 | } | |
123 | ||
124 | static void node_access_release(struct device *dev) | |
125 | { | |
126 | kfree(to_access_nodes(dev)); | |
127 | } | |
128 | ||
129 | static struct node_access_nodes *node_init_node_access(struct node *node, | |
e7deeb9d | 130 | unsigned int access) |
08d9dbe7 KB |
131 | { |
132 | struct node_access_nodes *access_node; | |
133 | struct device *dev; | |
134 | ||
135 | list_for_each_entry(access_node, &node->access_list, list_node) | |
136 | if (access_node->access == access) | |
137 | return access_node; | |
138 | ||
139 | access_node = kzalloc(sizeof(*access_node), GFP_KERNEL); | |
140 | if (!access_node) | |
141 | return NULL; | |
142 | ||
143 | access_node->access = access; | |
144 | dev = &access_node->dev; | |
145 | dev->parent = &node->dev; | |
146 | dev->release = node_access_release; | |
147 | dev->groups = node_access_node_groups; | |
148 | if (dev_set_name(dev, "access%u", access)) | |
149 | goto free; | |
150 | ||
151 | if (device_register(dev)) | |
152 | goto free_name; | |
153 | ||
154 | pm_runtime_no_callbacks(dev); | |
155 | list_add_tail(&access_node->list_node, &node->access_list); | |
156 | return access_node; | |
157 | free_name: | |
158 | kfree_const(dev->kobj.name); | |
159 | free: | |
160 | kfree(access_node); | |
161 | return NULL; | |
162 | } | |
163 | ||
e1cf33aa | 164 | #ifdef CONFIG_HMEM_REPORTING |
7810f4dc DJ |
165 | #define ACCESS_ATTR(property) \ |
166 | static ssize_t property##_show(struct device *dev, \ | |
948b3edb JP |
167 | struct device_attribute *attr, \ |
168 | char *buf) \ | |
169 | { \ | |
170 | return sysfs_emit(buf, "%u\n", \ | |
7810f4dc | 171 | to_access_nodes(dev)->hmem_attrs.property); \ |
948b3edb | 172 | } \ |
7810f4dc | 173 | static DEVICE_ATTR_RO(property) |
e1cf33aa | 174 | |
6284a6e8 JP |
175 | ACCESS_ATTR(read_bandwidth); |
176 | ACCESS_ATTR(read_latency); | |
177 | ACCESS_ATTR(write_bandwidth); | |
178 | ACCESS_ATTR(write_latency); | |
e1cf33aa KB |
179 | |
180 | static struct attribute *access_attrs[] = { | |
181 | &dev_attr_read_bandwidth.attr, | |
182 | &dev_attr_read_latency.attr, | |
183 | &dev_attr_write_bandwidth.attr, | |
184 | &dev_attr_write_latency.attr, | |
185 | NULL, | |
186 | }; | |
187 | ||
188 | /** | |
189 | * node_set_perf_attrs - Set the performance values for given access class | |
190 | * @nid: Node identifier to be set | |
191 | * @hmem_attrs: Heterogeneous memory performance attributes | |
192 | * @access: The access class the for the given attributes | |
193 | */ | |
194 | void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, | |
e7deeb9d | 195 | unsigned int access) |
e1cf33aa KB |
196 | { |
197 | struct node_access_nodes *c; | |
198 | struct node *node; | |
199 | int i; | |
200 | ||
201 | if (WARN_ON_ONCE(!node_online(nid))) | |
202 | return; | |
203 | ||
204 | node = node_devices[nid]; | |
205 | c = node_init_node_access(node, access); | |
206 | if (!c) | |
207 | return; | |
208 | ||
209 | c->hmem_attrs = *hmem_attrs; | |
210 | for (i = 0; access_attrs[i] != NULL; i++) { | |
211 | if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i], | |
212 | "initiators")) { | |
213 | pr_info("failed to add performance attribute to node %d\n", | |
214 | nid); | |
215 | break; | |
216 | } | |
217 | } | |
218 | } | |
acc02a10 KB |
219 | |
220 | /** | |
221 | * struct node_cache_info - Internal tracking for memory node caches | |
222 | * @dev: Device represeting the cache level | |
223 | * @node: List element for tracking in the node | |
224 | * @cache_attrs:Attributes for this cache level | |
225 | */ | |
226 | struct node_cache_info { | |
227 | struct device dev; | |
228 | struct list_head node; | |
229 | struct node_cache_attrs cache_attrs; | |
230 | }; | |
231 | #define to_cache_info(device) container_of(device, struct node_cache_info, dev) | |
232 | ||
233 | #define CACHE_ATTR(name, fmt) \ | |
234 | static ssize_t name##_show(struct device *dev, \ | |
235 | struct device_attribute *attr, \ | |
236 | char *buf) \ | |
237 | { \ | |
948b3edb JP |
238 | return sysfs_emit(buf, fmt "\n", \ |
239 | to_cache_info(dev)->cache_attrs.name); \ | |
acc02a10 | 240 | } \ |
fd03c075 | 241 | static DEVICE_ATTR_RO(name); |
acc02a10 KB |
242 | |
243 | CACHE_ATTR(size, "%llu") | |
244 | CACHE_ATTR(line_size, "%u") | |
245 | CACHE_ATTR(indexing, "%u") | |
246 | CACHE_ATTR(write_policy, "%u") | |
247 | ||
248 | static struct attribute *cache_attrs[] = { | |
249 | &dev_attr_indexing.attr, | |
250 | &dev_attr_size.attr, | |
251 | &dev_attr_line_size.attr, | |
252 | &dev_attr_write_policy.attr, | |
253 | NULL, | |
254 | }; | |
255 | ATTRIBUTE_GROUPS(cache); | |
256 | ||
257 | static void node_cache_release(struct device *dev) | |
258 | { | |
259 | kfree(dev); | |
260 | } | |
261 | ||
262 | static void node_cacheinfo_release(struct device *dev) | |
263 | { | |
264 | struct node_cache_info *info = to_cache_info(dev); | |
265 | kfree(info); | |
266 | } | |
267 | ||
268 | static void node_init_cache_dev(struct node *node) | |
269 | { | |
270 | struct device *dev; | |
271 | ||
272 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); | |
273 | if (!dev) | |
274 | return; | |
275 | ||
4ce535ec | 276 | device_initialize(dev); |
acc02a10 KB |
277 | dev->parent = &node->dev; |
278 | dev->release = node_cache_release; | |
279 | if (dev_set_name(dev, "memory_side_cache")) | |
4ce535ec | 280 | goto put_device; |
acc02a10 | 281 | |
4ce535ec DC |
282 | if (device_add(dev)) |
283 | goto put_device; | |
acc02a10 KB |
284 | |
285 | pm_runtime_no_callbacks(dev); | |
286 | node->cache_dev = dev; | |
287 | return; | |
4ce535ec DC |
288 | put_device: |
289 | put_device(dev); | |
acc02a10 KB |
290 | } |
291 | ||
292 | /** | |
293 | * node_add_cache() - add cache attribute to a memory node | |
294 | * @nid: Node identifier that has new cache attributes | |
295 | * @cache_attrs: Attributes for the cache being added | |
296 | */ | |
297 | void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) | |
298 | { | |
299 | struct node_cache_info *info; | |
300 | struct device *dev; | |
301 | struct node *node; | |
302 | ||
303 | if (!node_online(nid) || !node_devices[nid]) | |
304 | return; | |
305 | ||
306 | node = node_devices[nid]; | |
307 | list_for_each_entry(info, &node->cache_attrs, node) { | |
308 | if (info->cache_attrs.level == cache_attrs->level) { | |
309 | dev_warn(&node->dev, | |
310 | "attempt to add duplicate cache level:%d\n", | |
311 | cache_attrs->level); | |
312 | return; | |
313 | } | |
314 | } | |
315 | ||
316 | if (!node->cache_dev) | |
317 | node_init_cache_dev(node); | |
318 | if (!node->cache_dev) | |
319 | return; | |
320 | ||
321 | info = kzalloc(sizeof(*info), GFP_KERNEL); | |
322 | if (!info) | |
323 | return; | |
324 | ||
325 | dev = &info->dev; | |
4ce535ec | 326 | device_initialize(dev); |
acc02a10 KB |
327 | dev->parent = node->cache_dev; |
328 | dev->release = node_cacheinfo_release; | |
329 | dev->groups = cache_groups; | |
330 | if (dev_set_name(dev, "index%d", cache_attrs->level)) | |
4ce535ec | 331 | goto put_device; |
acc02a10 KB |
332 | |
333 | info->cache_attrs = *cache_attrs; | |
4ce535ec | 334 | if (device_add(dev)) { |
acc02a10 KB |
335 | dev_warn(&node->dev, "failed to add cache level:%d\n", |
336 | cache_attrs->level); | |
4ce535ec | 337 | goto put_device; |
acc02a10 KB |
338 | } |
339 | pm_runtime_no_callbacks(dev); | |
340 | list_add_tail(&info->node, &node->cache_attrs); | |
341 | return; | |
4ce535ec DC |
342 | put_device: |
343 | put_device(dev); | |
acc02a10 KB |
344 | } |
345 | ||
346 | static void node_remove_caches(struct node *node) | |
347 | { | |
348 | struct node_cache_info *info, *next; | |
349 | ||
350 | if (!node->cache_dev) | |
351 | return; | |
352 | ||
353 | list_for_each_entry_safe(info, next, &node->cache_attrs, node) { | |
354 | list_del(&info->node); | |
355 | device_unregister(&info->dev); | |
356 | } | |
357 | device_unregister(node->cache_dev); | |
358 | } | |
359 | ||
360 | static void node_init_caches(unsigned int nid) | |
361 | { | |
362 | INIT_LIST_HEAD(&node_devices[nid]->cache_attrs); | |
363 | } | |
364 | #else | |
365 | static void node_init_caches(unsigned int nid) { } | |
366 | static void node_remove_caches(struct node *node) { } | |
e1cf33aa KB |
367 | #endif |
368 | ||
1da177e4 | 369 | #define K(x) ((x) << (PAGE_SHIFT - 10)) |
10fbcf4c KS |
370 | static ssize_t node_read_meminfo(struct device *dev, |
371 | struct device_attribute *attr, char *buf) | |
1da177e4 | 372 | { |
948b3edb | 373 | int len = 0; |
1da177e4 | 374 | int nid = dev->id; |
599d0c95 | 375 | struct pglist_data *pgdat = NODE_DATA(nid); |
1da177e4 | 376 | struct sysinfo i; |
61f94e18 | 377 | unsigned long sreclaimable, sunreclaimable; |
b6038942 | 378 | unsigned long swapcached = 0; |
1da177e4 LT |
379 | |
380 | si_meminfo_node(&i, nid); | |
d42f3245 RG |
381 | sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B); |
382 | sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B); | |
b6038942 SB |
383 | #ifdef CONFIG_SWAP |
384 | swapcached = node_page_state_pages(pgdat, NR_SWAPCACHE); | |
385 | #endif | |
948b3edb JP |
386 | len = sysfs_emit_at(buf, len, |
387 | "Node %d MemTotal: %8lu kB\n" | |
388 | "Node %d MemFree: %8lu kB\n" | |
389 | "Node %d MemUsed: %8lu kB\n" | |
b6038942 | 390 | "Node %d SwapCached: %8lu kB\n" |
948b3edb JP |
391 | "Node %d Active: %8lu kB\n" |
392 | "Node %d Inactive: %8lu kB\n" | |
393 | "Node %d Active(anon): %8lu kB\n" | |
394 | "Node %d Inactive(anon): %8lu kB\n" | |
395 | "Node %d Active(file): %8lu kB\n" | |
396 | "Node %d Inactive(file): %8lu kB\n" | |
397 | "Node %d Unevictable: %8lu kB\n" | |
398 | "Node %d Mlocked: %8lu kB\n", | |
399 | nid, K(i.totalram), | |
400 | nid, K(i.freeram), | |
401 | nid, K(i.totalram - i.freeram), | |
b6038942 | 402 | nid, K(swapcached), |
948b3edb JP |
403 | nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) + |
404 | node_page_state(pgdat, NR_ACTIVE_FILE)), | |
405 | nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) + | |
406 | node_page_state(pgdat, NR_INACTIVE_FILE)), | |
407 | nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)), | |
408 | nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)), | |
409 | nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)), | |
410 | nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)), | |
411 | nid, K(node_page_state(pgdat, NR_UNEVICTABLE)), | |
412 | nid, K(sum_zone_node_page_state(nid, NR_MLOCK))); | |
7ee92255 | 413 | |
182e8e23 | 414 | #ifdef CONFIG_HIGHMEM |
948b3edb JP |
415 | len += sysfs_emit_at(buf, len, |
416 | "Node %d HighTotal: %8lu kB\n" | |
417 | "Node %d HighFree: %8lu kB\n" | |
418 | "Node %d LowTotal: %8lu kB\n" | |
419 | "Node %d LowFree: %8lu kB\n", | |
420 | nid, K(i.totalhigh), | |
421 | nid, K(i.freehigh), | |
422 | nid, K(i.totalram - i.totalhigh), | |
423 | nid, K(i.freeram - i.freehigh)); | |
182e8e23 | 424 | #endif |
948b3edb JP |
425 | len += sysfs_emit_at(buf, len, |
426 | "Node %d Dirty: %8lu kB\n" | |
427 | "Node %d Writeback: %8lu kB\n" | |
428 | "Node %d FilePages: %8lu kB\n" | |
429 | "Node %d Mapped: %8lu kB\n" | |
430 | "Node %d AnonPages: %8lu kB\n" | |
431 | "Node %d Shmem: %8lu kB\n" | |
432 | "Node %d KernelStack: %8lu kB\n" | |
628d06a4 | 433 | #ifdef CONFIG_SHADOW_CALL_STACK |
948b3edb | 434 | "Node %d ShadowCallStack:%8lu kB\n" |
628d06a4 | 435 | #endif |
948b3edb | 436 | "Node %d PageTables: %8lu kB\n" |
ebc97a52 | 437 | "Node %d SecPageTables: %8lu kB\n" |
948b3edb JP |
438 | "Node %d NFS_Unstable: %8lu kB\n" |
439 | "Node %d Bounce: %8lu kB\n" | |
440 | "Node %d WritebackTmp: %8lu kB\n" | |
441 | "Node %d KReclaimable: %8lu kB\n" | |
442 | "Node %d Slab: %8lu kB\n" | |
443 | "Node %d SReclaimable: %8lu kB\n" | |
444 | "Node %d SUnreclaim: %8lu kB\n" | |
05b258e9 | 445 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
948b3edb JP |
446 | "Node %d AnonHugePages: %8lu kB\n" |
447 | "Node %d ShmemHugePages: %8lu kB\n" | |
448 | "Node %d ShmemPmdMapped: %8lu kB\n" | |
449 | "Node %d FileHugePages: %8lu kB\n" | |
450 | "Node %d FilePmdMapped: %8lu kB\n" | |
dcdfdd40 KS |
451 | #endif |
452 | #ifdef CONFIG_UNACCEPTED_MEMORY | |
453 | "Node %d Unaccepted: %8lu kB\n" | |
05b258e9 | 454 | #endif |
948b3edb JP |
455 | , |
456 | nid, K(node_page_state(pgdat, NR_FILE_DIRTY)), | |
457 | nid, K(node_page_state(pgdat, NR_WRITEBACK)), | |
458 | nid, K(node_page_state(pgdat, NR_FILE_PAGES)), | |
459 | nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), | |
460 | nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), | |
461 | nid, K(i.sharedram), | |
462 | nid, node_page_state(pgdat, NR_KERNEL_STACK_KB), | |
628d06a4 | 463 | #ifdef CONFIG_SHADOW_CALL_STACK |
948b3edb | 464 | nid, node_page_state(pgdat, NR_KERNEL_SCS_KB), |
628d06a4 | 465 | #endif |
f0c0c115 | 466 | nid, K(node_page_state(pgdat, NR_PAGETABLE)), |
ebc97a52 | 467 | nid, K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)), |
948b3edb JP |
468 | nid, 0UL, |
469 | nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), | |
470 | nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), | |
471 | nid, K(sreclaimable + | |
472 | node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)), | |
473 | nid, K(sreclaimable + sunreclaimable), | |
474 | nid, K(sreclaimable), | |
475 | nid, K(sunreclaimable) | |
05b258e9 | 476 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
948b3edb | 477 | , |
69473e5d | 478 | nid, K(node_page_state(pgdat, NR_ANON_THPS)), |
57b2847d | 479 | nid, K(node_page_state(pgdat, NR_SHMEM_THPS)), |
a1528e21 | 480 | nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)), |
bf9ecead | 481 | nid, K(node_page_state(pgdat, NR_FILE_THPS)), |
380780e7 | 482 | nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED)) |
dcdfdd40 KS |
483 | #endif |
484 | #ifdef CONFIG_UNACCEPTED_MEMORY | |
485 | , | |
486 | nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED)) | |
05b258e9 | 487 | #endif |
948b3edb | 488 | ); |
7981593b | 489 | len += hugetlb_report_node_meminfo(buf, len, nid); |
948b3edb | 490 | return len; |
1da177e4 LT |
491 | } |
492 | ||
493 | #undef K | |
948b3edb | 494 | static DEVICE_ATTR(meminfo, 0444, node_read_meminfo, NULL); |
1da177e4 | 495 | |
10fbcf4c | 496 | static ssize_t node_read_numastat(struct device *dev, |
948b3edb | 497 | struct device_attribute *attr, char *buf) |
1da177e4 | 498 | { |
f19298b9 | 499 | fold_vm_numa_events(); |
aa838896 JP |
500 | return sysfs_emit(buf, |
501 | "numa_hit %lu\n" | |
502 | "numa_miss %lu\n" | |
503 | "numa_foreign %lu\n" | |
504 | "interleave_hit %lu\n" | |
505 | "local_node %lu\n" | |
506 | "other_node %lu\n", | |
f19298b9 MG |
507 | sum_zone_numa_event_state(dev->id, NUMA_HIT), |
508 | sum_zone_numa_event_state(dev->id, NUMA_MISS), | |
509 | sum_zone_numa_event_state(dev->id, NUMA_FOREIGN), | |
510 | sum_zone_numa_event_state(dev->id, NUMA_INTERLEAVE_HIT), | |
511 | sum_zone_numa_event_state(dev->id, NUMA_LOCAL), | |
512 | sum_zone_numa_event_state(dev->id, NUMA_OTHER)); | |
1da177e4 | 513 | } |
948b3edb | 514 | static DEVICE_ATTR(numastat, 0444, node_read_numastat, NULL); |
1da177e4 | 515 | |
10fbcf4c KS |
516 | static ssize_t node_read_vmstat(struct device *dev, |
517 | struct device_attribute *attr, char *buf) | |
2ac39037 MR |
518 | { |
519 | int nid = dev->id; | |
75ef7184 | 520 | struct pglist_data *pgdat = NODE_DATA(nid); |
fa25c503 | 521 | int i; |
948b3edb | 522 | int len = 0; |
fa25c503 KM |
523 | |
524 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
948b3edb JP |
525 | len += sysfs_emit_at(buf, len, "%s %lu\n", |
526 | zone_stat_name(i), | |
527 | sum_zone_node_page_state(nid, i)); | |
75ef7184 | 528 | |
3a321d2a | 529 | #ifdef CONFIG_NUMA |
f19298b9 MG |
530 | fold_vm_numa_events(); |
531 | for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) | |
948b3edb JP |
532 | len += sysfs_emit_at(buf, len, "%s %lu\n", |
533 | numa_stat_name(i), | |
f19298b9 | 534 | sum_zone_numa_event_state(nid, i)); |
3a321d2a | 535 | |
948b3edb | 536 | #endif |
69473e5d MS |
537 | for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { |
538 | unsigned long pages = node_page_state_pages(pgdat, i); | |
539 | ||
540 | if (vmstat_item_print_in_thp(i)) | |
541 | pages /= HPAGE_PMD_NR; | |
542 | len += sysfs_emit_at(buf, len, "%s %lu\n", node_stat_name(i), | |
543 | pages); | |
544 | } | |
fa25c503 | 545 | |
948b3edb | 546 | return len; |
2ac39037 | 547 | } |
948b3edb | 548 | static DEVICE_ATTR(vmstat, 0444, node_read_vmstat, NULL); |
2ac39037 | 549 | |
10fbcf4c | 550 | static ssize_t node_read_distance(struct device *dev, |
948b3edb | 551 | struct device_attribute *attr, char *buf) |
1da177e4 LT |
552 | { |
553 | int nid = dev->id; | |
554 | int len = 0; | |
555 | int i; | |
556 | ||
12ee3c0a DR |
557 | /* |
558 | * buf is currently PAGE_SIZE in length and each node needs 4 chars | |
559 | * at the most (distance + space or newline). | |
560 | */ | |
561 | BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE); | |
1da177e4 | 562 | |
948b3edb JP |
563 | for_each_online_node(i) { |
564 | len += sysfs_emit_at(buf, len, "%s%d", | |
565 | i ? " " : "", node_distance(nid, i)); | |
566 | } | |
1da177e4 | 567 | |
948b3edb | 568 | len += sysfs_emit_at(buf, len, "\n"); |
1da177e4 LT |
569 | return len; |
570 | } | |
948b3edb | 571 | static DEVICE_ATTR(distance, 0444, node_read_distance, NULL); |
1da177e4 | 572 | |
3c9b8aaf | 573 | static struct attribute *node_dev_attrs[] = { |
3c9b8aaf TI |
574 | &dev_attr_meminfo.attr, |
575 | &dev_attr_numastat.attr, | |
576 | &dev_attr_distance.attr, | |
577 | &dev_attr_vmstat.attr, | |
578 | NULL | |
579 | }; | |
75bd50fa TT |
580 | |
581 | static struct bin_attribute *node_dev_bin_attrs[] = { | |
582 | &bin_attr_cpumap, | |
583 | &bin_attr_cpulist, | |
584 | NULL | |
585 | }; | |
586 | ||
587 | static const struct attribute_group node_dev_group = { | |
588 | .attrs = node_dev_attrs, | |
589 | .bin_attrs = node_dev_bin_attrs | |
590 | }; | |
591 | ||
592 | static const struct attribute_group *node_dev_groups[] = { | |
593 | &node_dev_group, | |
50468e43 JS |
594 | #ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP |
595 | &arch_node_dev_group, | |
44b8f8bf JY |
596 | #endif |
597 | #ifdef CONFIG_MEMORY_FAILURE | |
598 | &memory_failure_attr_group, | |
50468e43 | 599 | #endif |
75bd50fa TT |
600 | NULL |
601 | }; | |
3c9b8aaf | 602 | |
8c7b5b4e YI |
603 | static void node_device_release(struct device *dev) |
604 | { | |
b958d4d0 | 605 | kfree(to_node(dev)); |
8c7b5b4e | 606 | } |
1da177e4 LT |
607 | |
608 | /* | |
405ae7d3 | 609 | * register_node - Setup a sysfs device for a node. |
1da177e4 LT |
610 | * @num - Node number to use when creating the device. |
611 | * | |
612 | * Initialize and register the node device. | |
613 | */ | |
a7be6e5a | 614 | static int register_node(struct node *node, int num) |
1da177e4 LT |
615 | { |
616 | int error; | |
617 | ||
10fbcf4c KS |
618 | node->dev.id = num; |
619 | node->dev.bus = &node_subsys; | |
8c7b5b4e | 620 | node->dev.release = node_device_release; |
7ca7ec40 | 621 | node->dev.groups = node_dev_groups; |
10fbcf4c | 622 | error = device_register(&node->dev); |
1da177e4 | 623 | |
a4a00b45 | 624 | if (error) { |
c1cc0d51 | 625 | put_device(&node->dev); |
a4a00b45 | 626 | } else { |
9a305230 | 627 | hugetlb_register_node(node); |
ed4a6d7f | 628 | compaction_register_node(node); |
1da177e4 | 629 | } |
b958d4d0 | 630 | |
1da177e4 LT |
631 | return error; |
632 | } | |
633 | ||
4b45099b KT |
634 | /** |
635 | * unregister_node - unregister a node device | |
636 | * @node: node going away | |
637 | * | |
638 | * Unregisters a node device @node. All the devices on the node must be | |
639 | * unregistered before calling this function. | |
640 | */ | |
641 | void unregister_node(struct node *node) | |
642 | { | |
a4a00b45 | 643 | hugetlb_unregister_node(node); |
da63dc84 | 644 | compaction_unregister_node(node); |
08d9dbe7 | 645 | node_remove_accesses(node); |
acc02a10 | 646 | node_remove_caches(node); |
10fbcf4c | 647 | device_unregister(&node->dev); |
4b45099b | 648 | } |
1da177e4 | 649 | |
8732794b | 650 | struct node *node_devices[MAX_NUMNODES]; |
0fc44159 | 651 | |
76b67ed9 KH |
652 | /* |
653 | * register cpu under node | |
654 | */ | |
655 | int register_cpu_under_node(unsigned int cpu, unsigned int nid) | |
656 | { | |
1830794a | 657 | int ret; |
8a25a2fd | 658 | struct device *obj; |
76b67ed9 | 659 | |
f8246f31 AC |
660 | if (!node_online(nid)) |
661 | return 0; | |
662 | ||
8a25a2fd | 663 | obj = get_cpu_device(cpu); |
f8246f31 AC |
664 | if (!obj) |
665 | return 0; | |
666 | ||
8732794b | 667 | ret = sysfs_create_link(&node_devices[nid]->dev.kobj, |
f8246f31 AC |
668 | &obj->kobj, |
669 | kobject_name(&obj->kobj)); | |
1830794a AC |
670 | if (ret) |
671 | return ret; | |
672 | ||
673 | return sysfs_create_link(&obj->kobj, | |
8732794b WC |
674 | &node_devices[nid]->dev.kobj, |
675 | kobject_name(&node_devices[nid]->dev.kobj)); | |
76b67ed9 KH |
676 | } |
677 | ||
08d9dbe7 KB |
678 | /** |
679 | * register_memory_node_under_compute_node - link memory node to its compute | |
680 | * node for a given access class. | |
58cb346c MCC |
681 | * @mem_nid: Memory node number |
682 | * @cpu_nid: Cpu node number | |
08d9dbe7 KB |
683 | * @access: Access class to register |
684 | * | |
685 | * Description: | |
686 | * For use with platforms that may have separate memory and compute nodes. | |
687 | * This function will export node relationships linking which memory | |
688 | * initiator nodes can access memory targets at a given ranked access | |
689 | * class. | |
690 | */ | |
691 | int register_memory_node_under_compute_node(unsigned int mem_nid, | |
692 | unsigned int cpu_nid, | |
e7deeb9d | 693 | unsigned int access) |
08d9dbe7 KB |
694 | { |
695 | struct node *init_node, *targ_node; | |
696 | struct node_access_nodes *initiator, *target; | |
697 | int ret; | |
698 | ||
699 | if (!node_online(cpu_nid) || !node_online(mem_nid)) | |
700 | return -ENODEV; | |
701 | ||
702 | init_node = node_devices[cpu_nid]; | |
703 | targ_node = node_devices[mem_nid]; | |
704 | initiator = node_init_node_access(init_node, access); | |
705 | target = node_init_node_access(targ_node, access); | |
706 | if (!initiator || !target) | |
707 | return -ENOMEM; | |
708 | ||
709 | ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets", | |
710 | &targ_node->dev.kobj, | |
711 | dev_name(&targ_node->dev)); | |
712 | if (ret) | |
713 | return ret; | |
714 | ||
715 | ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators", | |
716 | &init_node->dev.kobj, | |
717 | dev_name(&init_node->dev)); | |
718 | if (ret) | |
719 | goto err; | |
720 | ||
721 | return 0; | |
722 | err: | |
723 | sysfs_remove_link_from_group(&initiator->dev.kobj, "targets", | |
724 | dev_name(&targ_node->dev)); | |
725 | return ret; | |
726 | } | |
727 | ||
76b67ed9 KH |
728 | int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) |
729 | { | |
8a25a2fd | 730 | struct device *obj; |
b9d52dad AC |
731 | |
732 | if (!node_online(nid)) | |
733 | return 0; | |
734 | ||
8a25a2fd | 735 | obj = get_cpu_device(cpu); |
b9d52dad AC |
736 | if (!obj) |
737 | return 0; | |
738 | ||
8732794b | 739 | sysfs_remove_link(&node_devices[nid]->dev.kobj, |
b9d52dad | 740 | kobject_name(&obj->kobj)); |
1830794a | 741 | sysfs_remove_link(&obj->kobj, |
8732794b | 742 | kobject_name(&node_devices[nid]->dev.kobj)); |
b9d52dad | 743 | |
76b67ed9 KH |
744 | return 0; |
745 | } | |
746 | ||
50f9481e | 747 | #ifdef CONFIG_MEMORY_HOTPLUG |
bd721ea7 | 748 | static int __ref get_nid_for_pfn(unsigned long pfn) |
c04fc586 | 749 | { |
3a80a7fa | 750 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
8cdde385 | 751 | if (system_state < SYSTEM_RUNNING) |
3a80a7fa MG |
752 | return early_pfn_to_nid(pfn); |
753 | #endif | |
c04fc586 GH |
754 | return pfn_to_nid(pfn); |
755 | } | |
756 | ||
90c7eaeb | 757 | static void do_register_memory_block_under_node(int nid, |
395f6081 DH |
758 | struct memory_block *mem_blk, |
759 | enum meminit_context context) | |
f85086f9 LD |
760 | { |
761 | int ret; | |
762 | ||
395f6081 | 763 | memory_block_add_nid(mem_blk, nid, context); |
f85086f9 LD |
764 | |
765 | ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, | |
766 | &mem_blk->dev.kobj, | |
767 | kobject_name(&mem_blk->dev.kobj)); | |
90c7eaeb LD |
768 | if (ret && ret != -EEXIST) |
769 | dev_err_ratelimited(&node_devices[nid]->dev, | |
770 | "can't create link to %s in sysfs (%d)\n", | |
771 | kobject_name(&mem_blk->dev.kobj), ret); | |
f85086f9 | 772 | |
90c7eaeb | 773 | ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj, |
f85086f9 LD |
774 | &node_devices[nid]->dev.kobj, |
775 | kobject_name(&node_devices[nid]->dev.kobj)); | |
90c7eaeb LD |
776 | if (ret && ret != -EEXIST) |
777 | dev_err_ratelimited(&mem_blk->dev, | |
778 | "can't create link to %s in sysfs (%d)\n", | |
779 | kobject_name(&node_devices[nid]->dev.kobj), | |
780 | ret); | |
f85086f9 LD |
781 | } |
782 | ||
c04fc586 | 783 | /* register memory section under specified node if it spans that node */ |
f85086f9 LD |
784 | static int register_mem_block_under_node_early(struct memory_block *mem_blk, |
785 | void *arg) | |
c04fc586 | 786 | { |
b6c88d3b DH |
787 | unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE; |
788 | unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); | |
789 | unsigned long end_pfn = start_pfn + memory_block_pfns - 1; | |
f85086f9 | 790 | int nid = *(int *)arg; |
b6c88d3b | 791 | unsigned long pfn; |
c04fc586 | 792 | |
b6c88d3b | 793 | for (pfn = start_pfn; pfn <= end_pfn; pfn++) { |
c04fc586 GH |
794 | int page_nid; |
795 | ||
04697858 YL |
796 | /* |
797 | * memory block could have several absent sections from start. | |
798 | * skip pfn range from absent section | |
799 | */ | |
e03d1f78 | 800 | if (!pfn_in_present_section(pfn)) { |
04697858 YL |
801 | pfn = round_down(pfn + PAGES_PER_SECTION, |
802 | PAGES_PER_SECTION) - 1; | |
803 | continue; | |
804 | } | |
805 | ||
fc44f7f9 | 806 | /* |
f85086f9 LD |
807 | * We need to check if page belongs to nid only at the boot |
808 | * case because node's ranges can be interleaved. | |
d84f2f5a | 809 | */ |
f85086f9 LD |
810 | page_nid = get_nid_for_pfn(pfn); |
811 | if (page_nid < 0) | |
812 | continue; | |
813 | if (page_nid != nid) | |
814 | continue; | |
dee5d0d5 | 815 | |
395f6081 | 816 | do_register_memory_block_under_node(nid, mem_blk, MEMINIT_EARLY); |
90c7eaeb | 817 | return 0; |
c04fc586 GH |
818 | } |
819 | /* mem section does not span the specified node */ | |
820 | return 0; | |
821 | } | |
822 | ||
f85086f9 LD |
823 | /* |
824 | * During hotplug we know that all pages in the memory block belong to the same | |
825 | * node. | |
826 | */ | |
827 | static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk, | |
828 | void *arg) | |
829 | { | |
830 | int nid = *(int *)arg; | |
831 | ||
395f6081 | 832 | do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG); |
90c7eaeb | 833 | return 0; |
f85086f9 LD |
834 | } |
835 | ||
4c4b7f9b | 836 | /* |
d84f2f5a DH |
837 | * Unregister a memory block device under the node it spans. Memory blocks |
838 | * with multiple nodes cannot be offlined and therefore also never be removed. | |
4c4b7f9b | 839 | */ |
a31b264c | 840 | void unregister_memory_block_under_nodes(struct memory_block *mem_blk) |
c04fc586 | 841 | { |
d84f2f5a DH |
842 | if (mem_blk->nid == NUMA_NO_NODE) |
843 | return; | |
c04fc586 | 844 | |
d84f2f5a DH |
845 | sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj, |
846 | kobject_name(&mem_blk->dev.kobj)); | |
847 | sysfs_remove_link(&mem_blk->dev.kobj, | |
848 | kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); | |
c04fc586 GH |
849 | } |
850 | ||
cc651559 DH |
851 | void register_memory_blocks_under_node(int nid, unsigned long start_pfn, |
852 | unsigned long end_pfn, | |
853 | enum meminit_context context) | |
c04fc586 | 854 | { |
f85086f9 LD |
855 | walk_memory_blocks_func_t func; |
856 | ||
857 | if (context == MEMINIT_HOTPLUG) | |
858 | func = register_mem_block_under_node_hotplug; | |
859 | else | |
860 | func = register_mem_block_under_node_early; | |
861 | ||
90c7eaeb LD |
862 | walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), |
863 | (void *)&nid, func); | |
864 | return; | |
c04fc586 | 865 | } |
50f9481e | 866 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
4faf8d95 | 867 | |
9037a993 | 868 | int __register_one_node(int nid) |
0fc44159 | 869 | { |
9037a993 | 870 | int error; |
76b67ed9 | 871 | int cpu; |
0fc44159 | 872 | |
9037a993 MH |
873 | node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL); |
874 | if (!node_devices[nid]) | |
875 | return -ENOMEM; | |
c04fc586 | 876 | |
a7be6e5a | 877 | error = register_node(node_devices[nid], nid); |
39da08cb | 878 | |
9037a993 MH |
879 | /* link cpu under this node */ |
880 | for_each_present_cpu(cpu) { | |
881 | if (cpu_to_node(cpu) == nid) | |
882 | register_cpu_under_node(cpu, nid); | |
0fc44159 YG |
883 | } |
884 | ||
08d9dbe7 | 885 | INIT_LIST_HEAD(&node_devices[nid]->access_list); |
acc02a10 | 886 | node_init_caches(nid); |
0fc44159 | 887 | |
9037a993 | 888 | return error; |
0fc44159 YG |
889 | } |
890 | ||
891 | void unregister_one_node(int nid) | |
892 | { | |
92d585ef XQ |
893 | if (!node_devices[nid]) |
894 | return; | |
895 | ||
8732794b | 896 | unregister_node(node_devices[nid]); |
8732794b | 897 | node_devices[nid] = NULL; |
0fc44159 YG |
898 | } |
899 | ||
bde631a5 LS |
900 | /* |
901 | * node states attributes | |
902 | */ | |
903 | ||
b15f562f | 904 | struct node_attr { |
10fbcf4c | 905 | struct device_attribute attr; |
b15f562f AK |
906 | enum node_states state; |
907 | }; | |
bde631a5 | 908 | |
10fbcf4c KS |
909 | static ssize_t show_node_state(struct device *dev, |
910 | struct device_attribute *attr, char *buf) | |
bde631a5 | 911 | { |
b15f562f | 912 | struct node_attr *na = container_of(attr, struct node_attr, attr); |
948b3edb JP |
913 | |
914 | return sysfs_emit(buf, "%*pbl\n", | |
915 | nodemask_pr_args(&node_states[na->state])); | |
bde631a5 LS |
916 | } |
917 | ||
b15f562f | 918 | #define _NODE_ATTR(name, state) \ |
10fbcf4c | 919 | { __ATTR(name, 0444, show_node_state, NULL), state } |
bde631a5 | 920 | |
b15f562f | 921 | static struct node_attr node_state_attr[] = { |
fcf07d22 LJ |
922 | [N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE), |
923 | [N_ONLINE] = _NODE_ATTR(online, N_ONLINE), | |
924 | [N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY), | |
bde631a5 | 925 | #ifdef CONFIG_HIGHMEM |
fcf07d22 | 926 | [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY), |
20b2f52b | 927 | #endif |
20b2f52b | 928 | [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), |
fcf07d22 | 929 | [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), |
894c26a1 JC |
930 | [N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator, |
931 | N_GENERIC_INITIATOR), | |
bde631a5 LS |
932 | }; |
933 | ||
10fbcf4c | 934 | static struct attribute *node_state_attrs[] = { |
fcf07d22 LJ |
935 | &node_state_attr[N_POSSIBLE].attr.attr, |
936 | &node_state_attr[N_ONLINE].attr.attr, | |
937 | &node_state_attr[N_NORMAL_MEMORY].attr.attr, | |
3701cde6 | 938 | #ifdef CONFIG_HIGHMEM |
fcf07d22 | 939 | &node_state_attr[N_HIGH_MEMORY].attr.attr, |
20b2f52b | 940 | #endif |
20b2f52b | 941 | &node_state_attr[N_MEMORY].attr.attr, |
fcf07d22 | 942 | &node_state_attr[N_CPU].attr.attr, |
894c26a1 | 943 | &node_state_attr[N_GENERIC_INITIATOR].attr.attr, |
3701cde6 AK |
944 | NULL |
945 | }; | |
bde631a5 | 946 | |
5a576764 | 947 | static const struct attribute_group memory_root_attr_group = { |
10fbcf4c KS |
948 | .attrs = node_state_attrs, |
949 | }; | |
950 | ||
951 | static const struct attribute_group *cpu_root_attr_groups[] = { | |
952 | &memory_root_attr_group, | |
953 | NULL, | |
954 | }; | |
955 | ||
2848a28b | 956 | void __init node_dev_init(void) |
1da177e4 | 957 | { |
2848a28b | 958 | int ret, i; |
bde631a5 | 959 | |
3701cde6 AK |
960 | BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); |
961 | BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES); | |
962 | ||
10fbcf4c | 963 | ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); |
2848a28b DH |
964 | if (ret) |
965 | panic("%s() failed to register subsystem: %d\n", __func__, ret); | |
966 | ||
bde631a5 | 967 | /* |
2848a28b DH |
968 | * Create all node devices, which will properly link the node |
969 | * to applicable memory block devices and already created cpu devices. | |
bde631a5 | 970 | */ |
2848a28b DH |
971 | for_each_online_node(i) { |
972 | ret = register_one_node(i); | |
973 | if (ret) | |
974 | panic("%s() failed to add node: %d\n", __func__, ret); | |
975 | } | |
1da177e4 | 976 | } |