Merge branch 'linus'
[linux-block.git] / mm / memory-tiers.c
CommitLineData
992bf775 1// SPDX-License-Identifier: GPL-2.0
992bf775
AK
2#include <linux/slab.h>
3#include <linux/lockdep.h>
91952440
AK
4#include <linux/sysfs.h>
5#include <linux/kobject.h>
c6123a19 6#include <linux/memory.h>
992bf775
AK
7#include <linux/memory-tiers.h>
8
6c542ab7
AK
9#include "internal.h"
10
992bf775
AK
11struct memory_tier {
12 /* hierarchy of memory tiers */
13 struct list_head list;
14 /* list of all memory types part of this tier */
15 struct list_head memory_types;
16 /*
17 * start value of abstract distance. memory tier maps
18 * an abstract distance range,
19 * adistance_start .. adistance_start + MEMTIER_CHUNK_SIZE
20 */
21 int adistance_start;
9832fb87 22 struct device dev;
32008027
JG
23 /* All the nodes that are part of all the lower memory tiers. */
24 nodemask_t lower_tier_mask;
992bf775
AK
25};
26
6c542ab7
AK
27struct demotion_nodes {
28 nodemask_t preferred;
29};
30
7b88bda3
AK
31struct node_memory_type_map {
32 struct memory_dev_type *memtype;
33 int map_count;
992bf775
AK
34};
35
36static DEFINE_MUTEX(memory_tier_lock);
37static LIST_HEAD(memory_tiers);
7b88bda3
AK
38static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
39static struct memory_dev_type *default_dram_type;
9832fb87
AK
40
41static struct bus_type memory_tier_subsys = {
42 .name = "memory_tiering",
43 .dev_name = "memory_tier",
44};
45
6c542ab7 46#ifdef CONFIG_MIGRATION
467b171a 47static int top_tier_adistance;
6c542ab7
AK
48/*
49 * node_demotion[] examples:
50 *
51 * Example 1:
52 *
53 * Node 0 & 1 are CPU + DRAM nodes, node 2 & 3 are PMEM nodes.
54 *
55 * node distances:
56 * node 0 1 2 3
57 * 0 10 20 30 40
58 * 1 20 10 40 30
59 * 2 30 40 10 40
60 * 3 40 30 40 10
61 *
62 * memory_tiers0 = 0-1
63 * memory_tiers1 = 2-3
64 *
65 * node_demotion[0].preferred = 2
66 * node_demotion[1].preferred = 3
67 * node_demotion[2].preferred = <empty>
68 * node_demotion[3].preferred = <empty>
69 *
70 * Example 2:
71 *
72 * Node 0 & 1 are CPU + DRAM nodes, node 2 is memory-only DRAM node.
73 *
74 * node distances:
75 * node 0 1 2
76 * 0 10 20 30
77 * 1 20 10 30
78 * 2 30 30 10
79 *
80 * memory_tiers0 = 0-2
81 *
82 * node_demotion[0].preferred = <empty>
83 * node_demotion[1].preferred = <empty>
84 * node_demotion[2].preferred = <empty>
85 *
86 * Example 3:
87 *
88 * Node 0 is CPU + DRAM nodes, Node 1 is HBM node, node 2 is PMEM node.
89 *
90 * node distances:
91 * node 0 1 2
92 * 0 10 20 30
93 * 1 20 10 40
94 * 2 30 40 10
95 *
96 * memory_tiers0 = 1
97 * memory_tiers1 = 0
98 * memory_tiers2 = 2
99 *
100 * node_demotion[0].preferred = 2
101 * node_demotion[1].preferred = 0
102 * node_demotion[2].preferred = <empty>
103 *
104 */
105static struct demotion_nodes *node_demotion __read_mostly;
106#endif /* CONFIG_MIGRATION */
992bf775 107
9832fb87
AK
108static inline struct memory_tier *to_memory_tier(struct device *device)
109{
110 return container_of(device, struct memory_tier, dev);
111}
112
113static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memtier)
114{
115 nodemask_t nodes = NODE_MASK_NONE;
116 struct memory_dev_type *memtype;
117
118 list_for_each_entry(memtype, &memtier->memory_types, tier_sibiling)
119 nodes_or(nodes, nodes, memtype->nodes);
120
121 return nodes;
122}
123
124static void memory_tier_device_release(struct device *dev)
125{
126 struct memory_tier *tier = to_memory_tier(dev);
127 /*
128 * synchronize_rcu in clear_node_memory_tier makes sure
129 * we don't have rcu access to this memory tier.
130 */
131 kfree(tier);
132}
133
27d676a1
HY
134static ssize_t nodelist_show(struct device *dev,
135 struct device_attribute *attr, char *buf)
9832fb87
AK
136{
137 int ret;
138 nodemask_t nmask;
139
140 mutex_lock(&memory_tier_lock);
141 nmask = get_memtier_nodemask(to_memory_tier(dev));
142 ret = sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&nmask));
143 mutex_unlock(&memory_tier_lock);
144 return ret;
145}
27d676a1 146static DEVICE_ATTR_RO(nodelist);
9832fb87
AK
147
148static struct attribute *memtier_dev_attrs[] = {
27d676a1 149 &dev_attr_nodelist.attr,
9832fb87
AK
150 NULL
151};
152
153static const struct attribute_group memtier_dev_group = {
154 .attrs = memtier_dev_attrs,
155};
156
157static const struct attribute_group *memtier_dev_groups[] = {
158 &memtier_dev_group,
159 NULL
160};
161
992bf775
AK
162static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype)
163{
9832fb87 164 int ret;
992bf775
AK
165 bool found_slot = false;
166 struct memory_tier *memtier, *new_memtier;
167 int adistance = memtype->adistance;
168 unsigned int memtier_adistance_chunk_size = MEMTIER_CHUNK_SIZE;
169
170 lockdep_assert_held_once(&memory_tier_lock);
171
b26ac6f3 172 adistance = round_down(adistance, memtier_adistance_chunk_size);
992bf775
AK
173 /*
174 * If the memtype is already part of a memory tier,
175 * just return that.
176 */
b26ac6f3
AK
177 if (!list_empty(&memtype->tier_sibiling)) {
178 list_for_each_entry(memtier, &memory_tiers, list) {
179 if (adistance == memtier->adistance_start)
180 return memtier;
181 }
182 WARN_ON(1);
183 return ERR_PTR(-EINVAL);
184 }
992bf775 185
992bf775
AK
186 list_for_each_entry(memtier, &memory_tiers, list) {
187 if (adistance == memtier->adistance_start) {
9832fb87 188 goto link_memtype;
992bf775
AK
189 } else if (adistance < memtier->adistance_start) {
190 found_slot = true;
191 break;
192 }
193 }
194
9832fb87 195 new_memtier = kzalloc(sizeof(struct memory_tier), GFP_KERNEL);
992bf775
AK
196 if (!new_memtier)
197 return ERR_PTR(-ENOMEM);
198
199 new_memtier->adistance_start = adistance;
200 INIT_LIST_HEAD(&new_memtier->list);
201 INIT_LIST_HEAD(&new_memtier->memory_types);
202 if (found_slot)
203 list_add_tail(&new_memtier->list, &memtier->list);
204 else
205 list_add_tail(&new_memtier->list, &memory_tiers);
9832fb87
AK
206
207 new_memtier->dev.id = adistance >> MEMTIER_CHUNK_BITS;
208 new_memtier->dev.bus = &memory_tier_subsys;
209 new_memtier->dev.release = memory_tier_device_release;
210 new_memtier->dev.groups = memtier_dev_groups;
211
212 ret = device_register(&new_memtier->dev);
213 if (ret) {
214 list_del(&memtier->list);
215 put_device(&memtier->dev);
216 return ERR_PTR(ret);
217 }
218 memtier = new_memtier;
219
220link_memtype:
221 list_add(&memtype->tier_sibiling, &memtier->memory_types);
222 return memtier;
992bf775
AK
223}
224
6c542ab7
AK
225static struct memory_tier *__node_get_memory_tier(int node)
226{
7766cf7a 227 pg_data_t *pgdat;
6c542ab7 228
7766cf7a
AK
229 pgdat = NODE_DATA(node);
230 if (!pgdat)
231 return NULL;
232 /*
233 * Since we hold memory_tier_lock, we can avoid
234 * RCU read locks when accessing the details. No
235 * parallel updates are possible here.
236 */
237 return rcu_dereference_check(pgdat->memtier,
238 lockdep_is_held(&memory_tier_lock));
6c542ab7
AK
239}
240
241#ifdef CONFIG_MIGRATION
467b171a
AK
242bool node_is_toptier(int node)
243{
244 bool toptier;
245 pg_data_t *pgdat;
246 struct memory_tier *memtier;
247
248 pgdat = NODE_DATA(node);
249 if (!pgdat)
250 return false;
251
252 rcu_read_lock();
253 memtier = rcu_dereference(pgdat->memtier);
254 if (!memtier) {
255 toptier = true;
256 goto out;
257 }
258 if (memtier->adistance_start <= top_tier_adistance)
259 toptier = true;
260 else
261 toptier = false;
262out:
263 rcu_read_unlock();
264 return toptier;
265}
266
32008027
JG
267void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
268{
269 struct memory_tier *memtier;
270
271 /*
272 * pg_data_t.memtier updates includes a synchronize_rcu()
273 * which ensures that we either find NULL or a valid memtier
274 * in NODE_DATA. protect the access via rcu_read_lock();
275 */
276 rcu_read_lock();
277 memtier = rcu_dereference(pgdat->memtier);
278 if (memtier)
279 *targets = memtier->lower_tier_mask;
280 else
281 *targets = NODE_MASK_NONE;
282 rcu_read_unlock();
283}
284
6c542ab7
AK
285/**
286 * next_demotion_node() - Get the next node in the demotion path
287 * @node: The starting node to lookup the next node
288 *
289 * Return: node id for next memory node in the demotion path hierarchy
290 * from @node; NUMA_NO_NODE if @node is terminal. This does not keep
291 * @node online or guarantee that it *continues* to be the next demotion
292 * target.
293 */
294int next_demotion_node(int node)
295{
296 struct demotion_nodes *nd;
297 int target;
298
299 if (!node_demotion)
300 return NUMA_NO_NODE;
301
302 nd = &node_demotion[node];
303
304 /*
305 * node_demotion[] is updated without excluding this
306 * function from running.
307 *
308 * Make sure to use RCU over entire code blocks if
309 * node_demotion[] reads need to be consistent.
310 */
311 rcu_read_lock();
312 /*
313 * If there are multiple target nodes, just select one
314 * target node randomly.
315 *
316 * In addition, we can also use round-robin to select
317 * target node, but we should introduce another variable
318 * for node_demotion[] to record last selected target node,
319 * that may cause cache ping-pong due to the changing of
320 * last target node. Or introducing per-cpu data to avoid
321 * caching issue, which seems more complicated. So selecting
322 * target node randomly seems better until now.
323 */
324 target = node_random(&nd->preferred);
325 rcu_read_unlock();
326
327 return target;
328}
329
330static void disable_all_demotion_targets(void)
331{
32008027 332 struct memory_tier *memtier;
6c542ab7
AK
333 int node;
334
32008027 335 for_each_node_state(node, N_MEMORY) {
6c542ab7 336 node_demotion[node].preferred = NODE_MASK_NONE;
32008027
JG
337 /*
338 * We are holding memory_tier_lock, it is safe
339 * to access pgda->memtier.
340 */
341 memtier = __node_get_memory_tier(node);
342 if (memtier)
343 memtier->lower_tier_mask = NODE_MASK_NONE;
344 }
6c542ab7
AK
345 /*
346 * Ensure that the "disable" is visible across the system.
347 * Readers will see either a combination of before+disable
348 * state or disable+after. They will never see before and
349 * after state together.
350 */
351 synchronize_rcu();
352}
353
6c542ab7
AK
354/*
355 * Find an automatic demotion target for all memory
356 * nodes. Failing here is OK. It might just indicate
357 * being at the end of a chain.
358 */
359static void establish_demotion_targets(void)
360{
361 struct memory_tier *memtier;
362 struct demotion_nodes *nd;
363 int target = NUMA_NO_NODE, node;
364 int distance, best_distance;
32008027 365 nodemask_t tier_nodes, lower_tier;
6c542ab7
AK
366
367 lockdep_assert_held_once(&memory_tier_lock);
368
369 if (!node_demotion || !IS_ENABLED(CONFIG_MIGRATION))
370 return;
371
372 disable_all_demotion_targets();
373
374 for_each_node_state(node, N_MEMORY) {
375 best_distance = -1;
376 nd = &node_demotion[node];
377
378 memtier = __node_get_memory_tier(node);
379 if (!memtier || list_is_last(&memtier->list, &memory_tiers))
380 continue;
381 /*
382 * Get the lower memtier to find the demotion node list.
383 */
384 memtier = list_next_entry(memtier, list);
385 tier_nodes = get_memtier_nodemask(memtier);
386 /*
387 * find_next_best_node, use 'used' nodemask as a skip list.
388 * Add all memory nodes except the selected memory tier
389 * nodelist to skip list so that we find the best node from the
390 * memtier nodelist.
391 */
392 nodes_andnot(tier_nodes, node_states[N_MEMORY], tier_nodes);
393
394 /*
395 * Find all the nodes in the memory tier node list of same best distance.
396 * add them to the preferred mask. We randomly select between nodes
397 * in the preferred mask when allocating pages during demotion.
398 */
399 do {
400 target = find_next_best_node(node, &tier_nodes);
401 if (target == NUMA_NO_NODE)
402 break;
403
404 distance = node_distance(node, target);
405 if (distance == best_distance || best_distance == -1) {
406 best_distance = distance;
407 node_set(target, nd->preferred);
408 } else {
409 break;
410 }
411 } while (1);
412 }
467b171a
AK
413 /*
414 * Promotion is allowed from a memory tier to higher
415 * memory tier only if the memory tier doesn't include
416 * compute. We want to skip promotion from a memory tier,
417 * if any node that is part of the memory tier have CPUs.
418 * Once we detect such a memory tier, we consider that tier
419 * as top tiper from which promotion is not allowed.
420 */
421 list_for_each_entry_reverse(memtier, &memory_tiers, list) {
422 tier_nodes = get_memtier_nodemask(memtier);
423 nodes_and(tier_nodes, node_states[N_CPU], tier_nodes);
424 if (!nodes_empty(tier_nodes)) {
425 /*
426 * abstract distance below the max value of this memtier
427 * is considered toptier.
428 */
429 top_tier_adistance = memtier->adistance_start +
430 MEMTIER_CHUNK_SIZE - 1;
431 break;
432 }
433 }
32008027
JG
434 /*
435 * Now build the lower_tier mask for each node collecting node mask from
436 * all memory tier below it. This allows us to fallback demotion page
437 * allocation to a set of nodes that is closer the above selected
438 * perferred node.
439 */
440 lower_tier = node_states[N_MEMORY];
441 list_for_each_entry(memtier, &memory_tiers, list) {
442 /*
443 * Keep removing current tier from lower_tier nodes,
444 * This will remove all nodes in current and above
445 * memory tier from the lower_tier mask.
446 */
447 tier_nodes = get_memtier_nodemask(memtier);
448 nodes_andnot(lower_tier, lower_tier, tier_nodes);
449 memtier->lower_tier_mask = lower_tier;
450 }
6c542ab7
AK
451}
452
453#else
454static inline void disable_all_demotion_targets(void) {}
455static inline void establish_demotion_targets(void) {}
456#endif /* CONFIG_MIGRATION */
457
7b88bda3
AK
458static inline void __init_node_memory_type(int node, struct memory_dev_type *memtype)
459{
460 if (!node_memory_types[node].memtype)
461 node_memory_types[node].memtype = memtype;
462 /*
463 * for each device getting added in the same NUMA node
464 * with this specific memtype, bump the map count. We
465 * Only take memtype device reference once, so that
466 * changing a node memtype can be done by droping the
467 * only reference count taken here.
468 */
469
470 if (node_memory_types[node].memtype == memtype) {
471 if (!node_memory_types[node].map_count++)
472 kref_get(&memtype->kref);
473 }
474}
475
992bf775
AK
476static struct memory_tier *set_node_memory_tier(int node)
477{
478 struct memory_tier *memtier;
479 struct memory_dev_type *memtype;
7766cf7a
AK
480 pg_data_t *pgdat = NODE_DATA(node);
481
992bf775
AK
482
483 lockdep_assert_held_once(&memory_tier_lock);
484
485 if (!node_state(node, N_MEMORY))
486 return ERR_PTR(-EINVAL);
487
7b88bda3 488 __init_node_memory_type(node, default_dram_type);
992bf775 489
7b88bda3 490 memtype = node_memory_types[node].memtype;
992bf775
AK
491 node_set(node, memtype->nodes);
492 memtier = find_create_memory_tier(memtype);
7766cf7a
AK
493 if (!IS_ERR(memtier))
494 rcu_assign_pointer(pgdat->memtier, memtier);
992bf775
AK
495 return memtier;
496}
497
c6123a19
AK
498static void destroy_memory_tier(struct memory_tier *memtier)
499{
500 list_del(&memtier->list);
9832fb87 501 device_unregister(&memtier->dev);
c6123a19
AK
502}
503
504static bool clear_node_memory_tier(int node)
505{
506 bool cleared = false;
7766cf7a 507 pg_data_t *pgdat;
c6123a19
AK
508 struct memory_tier *memtier;
509
7766cf7a
AK
510 pgdat = NODE_DATA(node);
511 if (!pgdat)
512 return false;
513
514 /*
515 * Make sure that anybody looking at NODE_DATA who finds
516 * a valid memtier finds memory_dev_types with nodes still
517 * linked to the memtier. We achieve this by waiting for
518 * rcu read section to finish using synchronize_rcu.
519 * This also enables us to free the destroyed memory tier
520 * with kfree instead of kfree_rcu
521 */
c6123a19
AK
522 memtier = __node_get_memory_tier(node);
523 if (memtier) {
524 struct memory_dev_type *memtype;
525
7766cf7a
AK
526 rcu_assign_pointer(pgdat->memtier, NULL);
527 synchronize_rcu();
7b88bda3 528 memtype = node_memory_types[node].memtype;
c6123a19
AK
529 node_clear(node, memtype->nodes);
530 if (nodes_empty(memtype->nodes)) {
531 list_del_init(&memtype->tier_sibiling);
c6123a19
AK
532 if (list_empty(&memtier->memory_types))
533 destroy_memory_tier(memtier);
534 }
535 cleared = true;
536 }
537 return cleared;
538}
539
7b88bda3
AK
540static void release_memtype(struct kref *kref)
541{
542 struct memory_dev_type *memtype;
543
544 memtype = container_of(kref, struct memory_dev_type, kref);
545 kfree(memtype);
546}
547
548struct memory_dev_type *alloc_memory_type(int adistance)
549{
550 struct memory_dev_type *memtype;
551
552 memtype = kmalloc(sizeof(*memtype), GFP_KERNEL);
553 if (!memtype)
554 return ERR_PTR(-ENOMEM);
555
556 memtype->adistance = adistance;
557 INIT_LIST_HEAD(&memtype->tier_sibiling);
558 memtype->nodes = NODE_MASK_NONE;
7b88bda3
AK
559 kref_init(&memtype->kref);
560 return memtype;
561}
562EXPORT_SYMBOL_GPL(alloc_memory_type);
563
564void destroy_memory_type(struct memory_dev_type *memtype)
565{
566 kref_put(&memtype->kref, release_memtype);
567}
568EXPORT_SYMBOL_GPL(destroy_memory_type);
569
570void init_node_memory_type(int node, struct memory_dev_type *memtype)
571{
572
573 mutex_lock(&memory_tier_lock);
574 __init_node_memory_type(node, memtype);
575 mutex_unlock(&memory_tier_lock);
576}
577EXPORT_SYMBOL_GPL(init_node_memory_type);
578
579void clear_node_memory_type(int node, struct memory_dev_type *memtype)
580{
581 mutex_lock(&memory_tier_lock);
582 if (node_memory_types[node].memtype == memtype)
583 node_memory_types[node].map_count--;
584 /*
585 * If we umapped all the attached devices to this node,
586 * clear the node memory type.
587 */
588 if (!node_memory_types[node].map_count) {
589 node_memory_types[node].memtype = NULL;
590 kref_put(&memtype->kref, release_memtype);
591 }
592 mutex_unlock(&memory_tier_lock);
593}
594EXPORT_SYMBOL_GPL(clear_node_memory_type);
595
c6123a19
AK
596static int __meminit memtier_hotplug_callback(struct notifier_block *self,
597 unsigned long action, void *_arg)
598{
6c542ab7 599 struct memory_tier *memtier;
c6123a19
AK
600 struct memory_notify *arg = _arg;
601
602 /*
603 * Only update the node migration order when a node is
604 * changing status, like online->offline.
605 */
606 if (arg->status_change_nid < 0)
607 return notifier_from_errno(0);
608
609 switch (action) {
610 case MEM_OFFLINE:
611 mutex_lock(&memory_tier_lock);
6c542ab7
AK
612 if (clear_node_memory_tier(arg->status_change_nid))
613 establish_demotion_targets();
c6123a19
AK
614 mutex_unlock(&memory_tier_lock);
615 break;
616 case MEM_ONLINE:
617 mutex_lock(&memory_tier_lock);
6c542ab7
AK
618 memtier = set_node_memory_tier(arg->status_change_nid);
619 if (!IS_ERR(memtier))
620 establish_demotion_targets();
c6123a19
AK
621 mutex_unlock(&memory_tier_lock);
622 break;
623 }
624
625 return notifier_from_errno(0);
626}
627
992bf775
AK
628static int __init memory_tier_init(void)
629{
9832fb87 630 int ret, node;
992bf775
AK
631 struct memory_tier *memtier;
632
9832fb87
AK
633 ret = subsys_virtual_register(&memory_tier_subsys, NULL);
634 if (ret)
635 panic("%s() failed to register memory tier subsystem\n", __func__);
636
6c542ab7
AK
637#ifdef CONFIG_MIGRATION
638 node_demotion = kcalloc(nr_node_ids, sizeof(struct demotion_nodes),
639 GFP_KERNEL);
640 WARN_ON(!node_demotion);
641#endif
992bf775 642 mutex_lock(&memory_tier_lock);
7b88bda3
AK
643 /*
644 * For now we can have 4 faster memory tiers with smaller adistance
645 * than default DRAM tier.
646 */
647 default_dram_type = alloc_memory_type(MEMTIER_ADISTANCE_DRAM);
4a625cee 648 if (IS_ERR(default_dram_type))
7b88bda3
AK
649 panic("%s() failed to allocate default DRAM tier\n", __func__);
650
992bf775
AK
651 /*
652 * Look at all the existing N_MEMORY nodes and add them to
653 * default memory tier or to a tier if we already have memory
654 * types assigned.
655 */
656 for_each_node_state(node, N_MEMORY) {
657 memtier = set_node_memory_tier(node);
658 if (IS_ERR(memtier))
659 /*
660 * Continue with memtiers we are able to setup
661 */
662 break;
663 }
6c542ab7 664 establish_demotion_targets();
992bf775
AK
665 mutex_unlock(&memory_tier_lock);
666
1eeaa4fd 667 hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
992bf775
AK
668 return 0;
669}
670subsys_initcall(memory_tier_init);
91952440
AK
671
672bool numa_demotion_enabled = false;
673
674#ifdef CONFIG_MIGRATION
675#ifdef CONFIG_SYSFS
676static ssize_t numa_demotion_enabled_show(struct kobject *kobj,
677 struct kobj_attribute *attr, char *buf)
678{
679 return sysfs_emit(buf, "%s\n",
680 numa_demotion_enabled ? "true" : "false");
681}
682
683static ssize_t numa_demotion_enabled_store(struct kobject *kobj,
684 struct kobj_attribute *attr,
685 const char *buf, size_t count)
686{
687 ssize_t ret;
688
689 ret = kstrtobool(buf, &numa_demotion_enabled);
690 if (ret)
691 return ret;
692
693 return count;
694}
695
696static struct kobj_attribute numa_demotion_enabled_attr =
697 __ATTR(demotion_enabled, 0644, numa_demotion_enabled_show,
698 numa_demotion_enabled_store);
699
700static struct attribute *numa_attrs[] = {
701 &numa_demotion_enabled_attr.attr,
702 NULL,
703};
704
705static const struct attribute_group numa_attr_group = {
706 .attrs = numa_attrs,
707};
708
709static int __init numa_init_sysfs(void)
710{
711 int err;
712 struct kobject *numa_kobj;
713
714 numa_kobj = kobject_create_and_add("numa", mm_kobj);
715 if (!numa_kobj) {
716 pr_err("failed to create numa kobject\n");
717 return -ENOMEM;
718 }
719 err = sysfs_create_group(numa_kobj, &numa_attr_group);
720 if (err) {
721 pr_err("failed to register numa group\n");
722 goto delete_obj;
723 }
724 return 0;
725
726delete_obj:
727 kobject_put(numa_kobj);
728 return err;
729}
730subsys_initcall(numa_init_sysfs);
731#endif /* CONFIG_SYSFS */
732#endif