Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/kernel/power/swsusp.c | |
3 | * | |
96bc7aec | 4 | * This file provides code to write suspend image to swap and read it back. |
1da177e4 LT |
5 | * |
6 | * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
25761b6e | 7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz> |
1da177e4 LT |
8 | * |
9 | * This file is released under the GPLv2. | |
10 | * | |
11 | * I'd like to thank the following people for their work: | |
2e4d5822 | 12 | * |
1da177e4 LT |
13 | * Pavel Machek <pavel@ucw.cz>: |
14 | * Modifications, defectiveness pointing, being with me at the very beginning, | |
15 | * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. | |
16 | * | |
2e4d5822 | 17 | * Steve Doddi <dirk@loth.demon.co.uk>: |
1da177e4 LT |
18 | * Support the possibility of hardware state restoring. |
19 | * | |
20 | * Raph <grey.havens@earthling.net>: | |
21 | * Support for preserving states of network devices and virtual console | |
22 | * (including X and svgatextmode) | |
23 | * | |
24 | * Kurt Garloff <garloff@suse.de>: | |
25 | * Straightened the critical function in order to prevent compilers from | |
26 | * playing tricks with local variables. | |
27 | * | |
28 | * Andreas Mohr <a.mohr@mailto.de> | |
29 | * | |
30 | * Alex Badea <vampire@go.ro>: | |
31 | * Fixed runaway init | |
32 | * | |
7088a5c0 | 33 | * Rafael J. Wysocki <rjw@sisk.pl> |
61159a31 | 34 | * Reworked the freeing of memory and the handling of swap |
7088a5c0 | 35 | * |
1da177e4 LT |
36 | * More state savers are welcome. Especially for the scsi layer... |
37 | * | |
38 | * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt | |
39 | */ | |
40 | ||
1da177e4 LT |
41 | #include <linux/mm.h> |
42 | #include <linux/suspend.h> | |
1da177e4 | 43 | #include <linux/spinlock.h> |
1da177e4 LT |
44 | #include <linux/kernel.h> |
45 | #include <linux/major.h> | |
46 | #include <linux/swap.h> | |
47 | #include <linux/pm.h> | |
1da177e4 LT |
48 | #include <linux/swapops.h> |
49 | #include <linux/bootmem.h> | |
50 | #include <linux/syscalls.h> | |
1da177e4 | 51 | #include <linux/highmem.h> |
0d3a9abe | 52 | #include <linux/time.h> |
d1d241cc | 53 | #include <linux/rbtree.h> |
1da177e4 LT |
54 | |
55 | #include "power.h" | |
56 | ||
ca0aec0f | 57 | /* |
853609b6 | 58 | * Preferred image size in bytes (tunable via /sys/power/image_size). |
ca0aec0f | 59 | * When it is set to N, swsusp will do its best to ensure the image |
853609b6 | 60 | * size will not exceed N bytes, but if that is impossible, it will |
ca0aec0f RW |
61 | * try to create the smallest image possible. |
62 | */ | |
853609b6 | 63 | unsigned long image_size = 500 * 1024 * 1024; |
ca0aec0f | 64 | |
f577eb30 RW |
65 | int in_suspend __nosavedata = 0; |
66 | ||
1da177e4 | 67 | /** |
f577eb30 RW |
68 | * The following functions are used for tracing the allocated |
69 | * swap pages, so that they can be freed in case of an error. | |
1da177e4 | 70 | */ |
7088a5c0 | 71 | |
d1d241cc RW |
72 | struct swsusp_extent { |
73 | struct rb_node node; | |
74 | unsigned long start; | |
75 | unsigned long end; | |
76 | }; | |
1da177e4 | 77 | |
d1d241cc | 78 | static struct rb_root swsusp_extents = RB_ROOT; |
7088a5c0 | 79 | |
d1d241cc | 80 | static int swsusp_extents_insert(unsigned long swap_offset) |
7088a5c0 | 81 | { |
d1d241cc RW |
82 | struct rb_node **new = &(swsusp_extents.rb_node); |
83 | struct rb_node *parent = NULL; | |
84 | struct swsusp_extent *ext; | |
85 | ||
86 | /* Figure out where to put the new node */ | |
87 | while (*new) { | |
88 | ext = container_of(*new, struct swsusp_extent, node); | |
89 | parent = *new; | |
90 | if (swap_offset < ext->start) { | |
91 | /* Try to merge */ | |
92 | if (swap_offset == ext->start - 1) { | |
93 | ext->start--; | |
94 | return 0; | |
95 | } | |
96 | new = &((*new)->rb_left); | |
97 | } else if (swap_offset > ext->end) { | |
98 | /* Try to merge */ | |
99 | if (swap_offset == ext->end + 1) { | |
100 | ext->end++; | |
101 | return 0; | |
102 | } | |
103 | new = &((*new)->rb_right); | |
104 | } else { | |
105 | /* It already is in the tree */ | |
106 | return -EINVAL; | |
7088a5c0 | 107 | } |
1da177e4 | 108 | } |
d1d241cc RW |
109 | /* Add the new node and rebalance the tree. */ |
110 | ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL); | |
111 | if (!ext) | |
112 | return -ENOMEM; | |
113 | ||
114 | ext->start = swap_offset; | |
115 | ext->end = swap_offset; | |
116 | rb_link_node(&ext->node, parent, new); | |
117 | rb_insert_color(&ext->node, &swsusp_extents); | |
f577eb30 | 118 | return 0; |
7088a5c0 | 119 | } |
1da177e4 | 120 | |
d1d241cc RW |
121 | /** |
122 | * alloc_swapdev_block - allocate a swap page and register that it has | |
123 | * been allocated, so that it can be freed in case of an error. | |
124 | */ | |
125 | ||
126 | sector_t alloc_swapdev_block(int swap) | |
7088a5c0 | 127 | { |
f577eb30 RW |
128 | unsigned long offset; |
129 | ||
130 | offset = swp_offset(get_swap_page_of_type(swap)); | |
131 | if (offset) { | |
d1d241cc | 132 | if (swsusp_extents_insert(offset)) |
f577eb30 | 133 | swap_free(swp_entry(swap, offset)); |
3aef83e0 RW |
134 | else |
135 | return swapdev_block(swap, offset); | |
7088a5c0 | 136 | } |
3aef83e0 | 137 | return 0; |
7088a5c0 | 138 | } |
1da177e4 | 139 | |
d1d241cc RW |
140 | /** |
141 | * free_all_swap_pages - free swap pages allocated for saving image data. | |
142 | * It also frees the extents used to register which swap entres had been | |
143 | * allocated. | |
144 | */ | |
145 | ||
146 | void free_all_swap_pages(int swap) | |
7088a5c0 | 147 | { |
d1d241cc RW |
148 | struct rb_node *node; |
149 | ||
150 | while ((node = swsusp_extents.rb_node)) { | |
151 | struct swsusp_extent *ext; | |
152 | unsigned long offset; | |
153 | ||
154 | ext = container_of(node, struct swsusp_extent, node); | |
155 | rb_erase(node, &swsusp_extents); | |
156 | for (offset = ext->start; offset <= ext->end; offset++) | |
157 | swap_free(swp_entry(swap, offset)); | |
158 | ||
159 | kfree(ext); | |
1da177e4 | 160 | } |
7088a5c0 RW |
161 | } |
162 | ||
d1d241cc RW |
163 | int swsusp_swap_in_use(void) |
164 | { | |
165 | return (swsusp_extents.rb_node != NULL); | |
166 | } | |
167 | ||
0d3a9abe RW |
168 | /** |
169 | * swsusp_show_speed - print the time elapsed between two events represented by | |
170 | * @start and @stop | |
171 | * | |
172 | * @nr_pages - number of pages processed between @start and @stop | |
173 | * @msg - introductory message to print | |
174 | */ | |
175 | ||
176 | void swsusp_show_speed(struct timeval *start, struct timeval *stop, | |
177 | unsigned nr_pages, char *msg) | |
178 | { | |
179 | s64 elapsed_centisecs64; | |
180 | int centisecs; | |
181 | int k; | |
182 | int kps; | |
183 | ||
184 | elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); | |
185 | do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); | |
186 | centisecs = elapsed_centisecs64; | |
187 | if (centisecs == 0) | |
188 | centisecs = 1; /* avoid div-by-zero */ | |
189 | k = nr_pages * (PAGE_SIZE / 1024); | |
190 | kps = (k * 100) / centisecs; | |
23976728 RW |
191 | printk(KERN_INFO "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", |
192 | msg, k, | |
0d3a9abe RW |
193 | centisecs / 100, centisecs % 100, |
194 | kps / 1000, (kps % 1000) / 10); | |
195 | } | |
196 | ||
72a97e08 RW |
197 | /** |
198 | * swsusp_shrink_memory - Try to free as much memory as needed | |
199 | * | |
200 | * ... but do not OOM-kill anyone | |
201 | * | |
202 | * Notice: all userland should be stopped before it is called, or | |
203 | * livelock is possible. | |
204 | */ | |
205 | ||
206 | #define SHRINK_BITE 10000 | |
d6277db4 RW |
207 | static inline unsigned long __shrink_memory(long tmp) |
208 | { | |
209 | if (tmp > SHRINK_BITE) | |
210 | tmp = SHRINK_BITE; | |
211 | return shrink_all_memory(tmp); | |
212 | } | |
72a97e08 RW |
213 | |
214 | int swsusp_shrink_memory(void) | |
215 | { | |
8357376d | 216 | long tmp; |
72a97e08 RW |
217 | struct zone *zone; |
218 | unsigned long pages = 0; | |
219 | unsigned int i = 0; | |
220 | char *p = "-\\|/"; | |
0d3a9abe | 221 | struct timeval start, stop; |
72a97e08 | 222 | |
23976728 | 223 | printk(KERN_INFO "PM: Shrinking memory... "); |
0d3a9abe | 224 | do_gettimeofday(&start); |
72a97e08 | 225 | do { |
8357376d RW |
226 | long size, highmem_size; |
227 | ||
228 | highmem_size = count_highmem_pages(); | |
56f99bcb | 229 | size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; |
b3a93a25 | 230 | tmp = size; |
8357376d | 231 | size += highmem_size; |
72a97e08 | 232 | for_each_zone (zone) |
8357376d | 233 | if (populated_zone(zone)) { |
c75fd0ee | 234 | tmp += snapshot_additional_pages(zone); |
8357376d | 235 | if (is_highmem(zone)) { |
d23ad423 CL |
236 | highmem_size -= |
237 | zone_page_state(zone, NR_FREE_PAGES); | |
8357376d | 238 | } else { |
d23ad423 | 239 | tmp -= zone_page_state(zone, NR_FREE_PAGES); |
8357376d | 240 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; |
8357376d | 241 | } |
a938c356 | 242 | } |
8357376d RW |
243 | |
244 | if (highmem_size < 0) | |
245 | highmem_size = 0; | |
246 | ||
247 | tmp += highmem_size; | |
72a97e08 | 248 | if (tmp > 0) { |
d6277db4 | 249 | tmp = __shrink_memory(tmp); |
72a97e08 RW |
250 | if (!tmp) |
251 | return -ENOMEM; | |
252 | pages += tmp; | |
853609b6 | 253 | } else if (size > image_size / PAGE_SIZE) { |
d6277db4 | 254 | tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); |
b3a93a25 | 255 | pages += tmp; |
72a97e08 | 256 | } |
72a97e08 RW |
257 | printk("\b%c", p[i++%4]); |
258 | } while (tmp > 0); | |
0d3a9abe | 259 | do_gettimeofday(&stop); |
72a97e08 | 260 | printk("\bdone (%lu pages freed)\n", pages); |
0d3a9abe | 261 | swsusp_show_speed(&start, &stop, pages, "Freed"); |
72a97e08 RW |
262 | |
263 | return 0; | |
264 | } | |
3f4b0ef7 RW |
265 | |
266 | /* | |
267 | * Platforms, like ACPI, may want us to save some memory used by them during | |
268 | * hibernation and to restore the contents of this memory during the subsequent | |
269 | * resume. The code below implements a mechanism allowing us to do that. | |
270 | */ | |
271 | ||
272 | struct nvs_page { | |
273 | unsigned long phys_start; | |
274 | unsigned int size; | |
275 | void *kaddr; | |
276 | void *data; | |
277 | struct list_head node; | |
278 | }; | |
279 | ||
280 | static LIST_HEAD(nvs_list); | |
281 | ||
282 | /** | |
283 | * hibernate_nvs_register - register platform NVS memory region to save | |
284 | * @start - physical address of the region | |
285 | * @size - size of the region | |
286 | * | |
287 | * The NVS region need not be page-aligned (both ends) and we arrange | |
288 | * things so that the data from page-aligned addresses in this region will | |
289 | * be copied into separate RAM pages. | |
290 | */ | |
291 | int hibernate_nvs_register(unsigned long start, unsigned long size) | |
292 | { | |
293 | struct nvs_page *entry, *next; | |
294 | ||
295 | while (size > 0) { | |
296 | unsigned int nr_bytes; | |
297 | ||
298 | entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); | |
299 | if (!entry) | |
300 | goto Error; | |
301 | ||
302 | list_add_tail(&entry->node, &nvs_list); | |
303 | entry->phys_start = start; | |
304 | nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); | |
305 | entry->size = (size < nr_bytes) ? size : nr_bytes; | |
306 | ||
307 | start += entry->size; | |
308 | size -= entry->size; | |
309 | } | |
310 | return 0; | |
311 | ||
312 | Error: | |
313 | list_for_each_entry_safe(entry, next, &nvs_list, node) { | |
314 | list_del(&entry->node); | |
315 | kfree(entry); | |
316 | } | |
317 | return -ENOMEM; | |
318 | } | |
319 | ||
320 | /** | |
321 | * hibernate_nvs_free - free data pages allocated for saving NVS regions | |
322 | */ | |
323 | void hibernate_nvs_free(void) | |
324 | { | |
325 | struct nvs_page *entry; | |
326 | ||
327 | list_for_each_entry(entry, &nvs_list, node) | |
328 | if (entry->data) { | |
329 | free_page((unsigned long)entry->data); | |
330 | entry->data = NULL; | |
331 | if (entry->kaddr) { | |
332 | iounmap(entry->kaddr); | |
333 | entry->kaddr = NULL; | |
334 | } | |
335 | } | |
336 | } | |
337 | ||
338 | /** | |
339 | * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions | |
340 | */ | |
341 | int hibernate_nvs_alloc(void) | |
342 | { | |
343 | struct nvs_page *entry; | |
344 | ||
345 | list_for_each_entry(entry, &nvs_list, node) { | |
346 | entry->data = (void *)__get_free_page(GFP_KERNEL); | |
347 | if (!entry->data) { | |
348 | hibernate_nvs_free(); | |
349 | return -ENOMEM; | |
350 | } | |
351 | } | |
352 | return 0; | |
353 | } | |
354 | ||
355 | /** | |
356 | * hibernate_nvs_save - save NVS memory regions | |
357 | */ | |
358 | void hibernate_nvs_save(void) | |
359 | { | |
360 | struct nvs_page *entry; | |
361 | ||
362 | printk(KERN_INFO "PM: Saving platform NVS memory\n"); | |
363 | ||
364 | list_for_each_entry(entry, &nvs_list, node) | |
365 | if (entry->data) { | |
366 | entry->kaddr = ioremap(entry->phys_start, entry->size); | |
367 | memcpy(entry->data, entry->kaddr, entry->size); | |
368 | } | |
369 | } | |
370 | ||
371 | /** | |
372 | * hibernate_nvs_restore - restore NVS memory regions | |
373 | * | |
374 | * This function is going to be called with interrupts disabled, so it | |
375 | * cannot iounmap the virtual addresses used to access the NVS region. | |
376 | */ | |
377 | void hibernate_nvs_restore(void) | |
378 | { | |
379 | struct nvs_page *entry; | |
380 | ||
381 | printk(KERN_INFO "PM: Restoring platform NVS memory\n"); | |
382 | ||
383 | list_for_each_entry(entry, &nvs_list, node) | |
384 | if (entry->data) | |
385 | memcpy(entry->kaddr, entry->data, entry->size); | |
386 | } |