sched/numa: Fix placement of workloads spread across multiple nodes
[linux-2.6-block.git] / mm / mm_init.c
CommitLineData
6b74ab97
MG
1/*
2 * mm_init.c - Memory initialisation verification and debugging
3 *
4 * Copyright 2008 IBM Corporation, 2008
5 * Author Mel Gorman <mel@csn.ul.ie>
6 *
7 */
8#include <linux/kernel.h>
9#include <linux/init.h>
ff7ea79c 10#include <linux/kobject.h>
b95f1b31 11#include <linux/export.h>
917d9290
TC
12#include <linux/memory.h>
13#include <linux/notifier.h>
708614e6 14#include "internal.h"
6b74ab97 15
5e9426ab 16#ifdef CONFIG_DEBUG_MEMORY_INIT
759f9a2d 17int mminit_loglevel;
6b74ab97 18
5c9ffc9c
AM
19#ifndef SECTIONS_SHIFT
20#define SECTIONS_SHIFT 0
21#endif
22
68ad8df4
MG
23/* The zonelists are simply reported, validation is manual. */
24void mminit_verify_zonelist(void)
25{
26 int nid;
27
28 if (mminit_loglevel < MMINIT_VERIFY)
29 return;
30
31 for_each_online_node(nid) {
32 pg_data_t *pgdat = NODE_DATA(nid);
33 struct zone *zone;
34 struct zoneref *z;
35 struct zonelist *zonelist;
36 int i, listid, zoneid;
37
38 BUG_ON(MAX_ZONELISTS > 2);
39 for (i = 0; i < MAX_ZONELISTS * MAX_NR_ZONES; i++) {
40
41 /* Identify the zone and nodelist */
42 zoneid = i % MAX_NR_ZONES;
43 listid = i / MAX_NR_ZONES;
44 zonelist = &pgdat->node_zonelists[listid];
45 zone = &pgdat->node_zones[zoneid];
46 if (!populated_zone(zone))
47 continue;
48
49 /* Print information about the zonelist */
50 printk(KERN_DEBUG "mminit::zonelist %s %d:%s = ",
51 listid > 0 ? "thisnode" : "general", nid,
52 zone->name);
53
54 /* Iterate the zonelist */
55 for_each_zone_zonelist(zone, z, zonelist, zoneid) {
56#ifdef CONFIG_NUMA
57 printk(KERN_CONT "%d:%s ",
58 zone->node, zone->name);
59#else
60 printk(KERN_CONT "0:%s ", zone->name);
61#endif /* CONFIG_NUMA */
62 }
63 printk(KERN_CONT "\n");
64 }
65 }
66}
67
708614e6
MG
68void __init mminit_verify_pageflags_layout(void)
69{
70 int shift, width;
71 unsigned long or_mask, add_mask;
72
73 shift = 8 * sizeof(unsigned long);
b795854b 74 width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - LAST_NIDPID_SHIFT;
708614e6 75 mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
b795854b 76 "Section %d Node %d Zone %d Lastnidpid %d Flags %d\n",
708614e6
MG
77 SECTIONS_WIDTH,
78 NODES_WIDTH,
79 ZONES_WIDTH,
b795854b 80 LAST_NIDPID_WIDTH,
708614e6
MG
81 NR_PAGEFLAGS);
82 mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
b795854b 83 "Section %d Node %d Zone %d Lastnidpid %d\n",
708614e6 84 SECTIONS_SHIFT,
708614e6 85 NODES_SHIFT,
a4e1b4c6 86 ZONES_SHIFT,
b795854b 87 LAST_NIDPID_SHIFT);
a4e1b4c6 88 mminit_dprintk(MMINIT_TRACE, "pageflags_layout_pgshifts",
b795854b 89 "Section %lu Node %lu Zone %lu Lastnidpid %lu\n",
708614e6
MG
90 (unsigned long)SECTIONS_PGSHIFT,
91 (unsigned long)NODES_PGSHIFT,
a4e1b4c6 92 (unsigned long)ZONES_PGSHIFT,
b795854b 93 (unsigned long)LAST_NIDPID_PGSHIFT);
a4e1b4c6
MG
94 mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodezoneid",
95 "Node/Zone ID: %lu -> %lu\n",
96 (unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT),
97 (unsigned long)ZONEID_PGOFF);
708614e6 98 mminit_dprintk(MMINIT_TRACE, "pageflags_layout_usage",
a4e1b4c6 99 "location: %d -> %d layout %d -> %d unused %d -> %d page-flags\n",
708614e6
MG
100 shift, width, width, NR_PAGEFLAGS, NR_PAGEFLAGS, 0);
101#ifdef NODE_NOT_IN_PAGE_FLAGS
102 mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags",
103 "Node not in page flags");
104#endif
b795854b 105#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
a4e1b4c6 106 mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags",
b795854b 107 "Last nidpid not in page flags");
a4e1b4c6 108#endif
708614e6
MG
109
110 if (SECTIONS_WIDTH) {
111 shift -= SECTIONS_WIDTH;
112 BUG_ON(shift != SECTIONS_PGSHIFT);
113 }
114 if (NODES_WIDTH) {
115 shift -= NODES_WIDTH;
116 BUG_ON(shift != NODES_PGSHIFT);
117 }
118 if (ZONES_WIDTH) {
119 shift -= ZONES_WIDTH;
120 BUG_ON(shift != ZONES_PGSHIFT);
121 }
122
123 /* Check for bitmask overlaps */
124 or_mask = (ZONES_MASK << ZONES_PGSHIFT) |
125 (NODES_MASK << NODES_PGSHIFT) |
126 (SECTIONS_MASK << SECTIONS_PGSHIFT);
127 add_mask = (ZONES_MASK << ZONES_PGSHIFT) +
128 (NODES_MASK << NODES_PGSHIFT) +
129 (SECTIONS_MASK << SECTIONS_PGSHIFT);
130 BUG_ON(or_mask != add_mask);
131}
132
133void __meminit mminit_verify_page_links(struct page *page, enum zone_type zone,
134 unsigned long nid, unsigned long pfn)
135{
136 BUG_ON(page_to_nid(page) != nid);
137 BUG_ON(page_zonenum(page) != zone);
138 BUG_ON(page_to_pfn(page) != pfn);
139}
140
6b74ab97
MG
141static __init int set_mminit_loglevel(char *str)
142{
143 get_option(&str, &mminit_loglevel);
144 return 0;
145}
146early_param("mminit_loglevel", set_mminit_loglevel);
5e9426ab 147#endif /* CONFIG_DEBUG_MEMORY_INIT */
ff7ea79c
NA
148
149struct kobject *mm_kobj;
150EXPORT_SYMBOL_GPL(mm_kobj);
151
917d9290
TC
152#ifdef CONFIG_SMP
153s32 vm_committed_as_batch = 32;
154
155static void __meminit mm_compute_batch(void)
156{
157 u64 memsized_batch;
158 s32 nr = num_present_cpus();
159 s32 batch = max_t(s32, nr*2, 32);
160
161 /* batch size set to 0.4% of (total memory/#cpus), or max int32 */
162 memsized_batch = min_t(u64, (totalram_pages/nr)/256, 0x7fffffff);
163
164 vm_committed_as_batch = max_t(s32, memsized_batch, batch);
165}
166
167static int __meminit mm_compute_batch_notifier(struct notifier_block *self,
168 unsigned long action, void *arg)
169{
170 switch (action) {
171 case MEM_ONLINE:
172 case MEM_OFFLINE:
173 mm_compute_batch();
174 default:
175 break;
176 }
177 return NOTIFY_OK;
178}
179
180static struct notifier_block compute_batch_nb __meminitdata = {
181 .notifier_call = mm_compute_batch_notifier,
182 .priority = IPC_CALLBACK_PRI, /* use lowest priority */
183};
184
185static int __init mm_compute_batch_init(void)
186{
187 mm_compute_batch();
188 register_hotmemory_notifier(&compute_batch_nb);
189
190 return 0;
191}
192
193__initcall(mm_compute_batch_init);
194
195#endif
196
ff7ea79c
NA
197static int __init mm_sysfs_init(void)
198{
199 mm_kobj = kobject_create_and_add("mm", kernel_kobj);
200 if (!mm_kobj)
201 return -ENOMEM;
202
203 return 0;
204}
205
206__initcall(mm_sysfs_init);