Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/kernel/power/swsusp.c | |
3 | * | |
96bc7aec | 4 | * This file provides code to write suspend image to swap and read it back. |
1da177e4 LT |
5 | * |
6 | * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
25761b6e | 7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz> |
1da177e4 LT |
8 | * |
9 | * This file is released under the GPLv2. | |
10 | * | |
11 | * I'd like to thank the following people for their work: | |
2e4d5822 | 12 | * |
1da177e4 LT |
13 | * Pavel Machek <pavel@ucw.cz>: |
14 | * Modifications, defectiveness pointing, being with me at the very beginning, | |
15 | * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. | |
16 | * | |
2e4d5822 | 17 | * Steve Doddi <dirk@loth.demon.co.uk>: |
1da177e4 LT |
18 | * Support the possibility of hardware state restoring. |
19 | * | |
20 | * Raph <grey.havens@earthling.net>: | |
21 | * Support for preserving states of network devices and virtual console | |
22 | * (including X and svgatextmode) | |
23 | * | |
24 | * Kurt Garloff <garloff@suse.de>: | |
25 | * Straightened the critical function in order to prevent compilers from | |
26 | * playing tricks with local variables. | |
27 | * | |
28 | * Andreas Mohr <a.mohr@mailto.de> | |
29 | * | |
30 | * Alex Badea <vampire@go.ro>: | |
31 | * Fixed runaway init | |
32 | * | |
7088a5c0 | 33 | * Rafael J. Wysocki <rjw@sisk.pl> |
61159a31 | 34 | * Reworked the freeing of memory and the handling of swap |
7088a5c0 | 35 | * |
1da177e4 LT |
36 | * More state savers are welcome. Especially for the scsi layer... |
37 | * | |
38 | * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt | |
39 | */ | |
40 | ||
1da177e4 LT |
41 | #include <linux/mm.h> |
42 | #include <linux/suspend.h> | |
1da177e4 | 43 | #include <linux/spinlock.h> |
1da177e4 LT |
44 | #include <linux/kernel.h> |
45 | #include <linux/major.h> | |
46 | #include <linux/swap.h> | |
47 | #include <linux/pm.h> | |
1da177e4 LT |
48 | #include <linux/swapops.h> |
49 | #include <linux/bootmem.h> | |
50 | #include <linux/syscalls.h> | |
1da177e4 | 51 | #include <linux/highmem.h> |
0d3a9abe | 52 | #include <linux/time.h> |
d1d241cc | 53 | #include <linux/rbtree.h> |
1da177e4 LT |
54 | |
55 | #include "power.h" | |
56 | ||
ca0aec0f | 57 | /* |
853609b6 | 58 | * Preferred image size in bytes (tunable via /sys/power/image_size). |
ca0aec0f | 59 | * When it is set to N, swsusp will do its best to ensure the image |
853609b6 | 60 | * size will not exceed N bytes, but if that is impossible, it will |
ca0aec0f RW |
61 | * try to create the smallest image possible. |
62 | */ | |
853609b6 | 63 | unsigned long image_size = 500 * 1024 * 1024; |
ca0aec0f | 64 | |
f577eb30 RW |
65 | int in_suspend __nosavedata = 0; |
66 | ||
3448097f LT |
67 | #ifdef CONFIG_HIGHMEM |
68 | unsigned int count_highmem_pages(void); | |
3448097f LT |
69 | int restore_highmem(void); |
70 | #else | |
3448097f LT |
71 | static inline int restore_highmem(void) { return 0; } |
72 | static inline unsigned int count_highmem_pages(void) { return 0; } | |
73 | #endif | |
74 | ||
1da177e4 | 75 | /** |
f577eb30 RW |
76 | * The following functions are used for tracing the allocated |
77 | * swap pages, so that they can be freed in case of an error. | |
1da177e4 | 78 | */ |
7088a5c0 | 79 | |
d1d241cc RW |
80 | struct swsusp_extent { |
81 | struct rb_node node; | |
82 | unsigned long start; | |
83 | unsigned long end; | |
84 | }; | |
1da177e4 | 85 | |
d1d241cc | 86 | static struct rb_root swsusp_extents = RB_ROOT; |
7088a5c0 | 87 | |
d1d241cc | 88 | static int swsusp_extents_insert(unsigned long swap_offset) |
7088a5c0 | 89 | { |
d1d241cc RW |
90 | struct rb_node **new = &(swsusp_extents.rb_node); |
91 | struct rb_node *parent = NULL; | |
92 | struct swsusp_extent *ext; | |
93 | ||
94 | /* Figure out where to put the new node */ | |
95 | while (*new) { | |
96 | ext = container_of(*new, struct swsusp_extent, node); | |
97 | parent = *new; | |
98 | if (swap_offset < ext->start) { | |
99 | /* Try to merge */ | |
100 | if (swap_offset == ext->start - 1) { | |
101 | ext->start--; | |
102 | return 0; | |
103 | } | |
104 | new = &((*new)->rb_left); | |
105 | } else if (swap_offset > ext->end) { | |
106 | /* Try to merge */ | |
107 | if (swap_offset == ext->end + 1) { | |
108 | ext->end++; | |
109 | return 0; | |
110 | } | |
111 | new = &((*new)->rb_right); | |
112 | } else { | |
113 | /* It already is in the tree */ | |
114 | return -EINVAL; | |
7088a5c0 | 115 | } |
1da177e4 | 116 | } |
d1d241cc RW |
117 | /* Add the new node and rebalance the tree. */ |
118 | ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL); | |
119 | if (!ext) | |
120 | return -ENOMEM; | |
121 | ||
122 | ext->start = swap_offset; | |
123 | ext->end = swap_offset; | |
124 | rb_link_node(&ext->node, parent, new); | |
125 | rb_insert_color(&ext->node, &swsusp_extents); | |
f577eb30 | 126 | return 0; |
7088a5c0 | 127 | } |
1da177e4 | 128 | |
d1d241cc RW |
129 | /** |
130 | * alloc_swapdev_block - allocate a swap page and register that it has | |
131 | * been allocated, so that it can be freed in case of an error. | |
132 | */ | |
133 | ||
134 | sector_t alloc_swapdev_block(int swap) | |
7088a5c0 | 135 | { |
f577eb30 RW |
136 | unsigned long offset; |
137 | ||
138 | offset = swp_offset(get_swap_page_of_type(swap)); | |
139 | if (offset) { | |
d1d241cc | 140 | if (swsusp_extents_insert(offset)) |
f577eb30 | 141 | swap_free(swp_entry(swap, offset)); |
3aef83e0 RW |
142 | else |
143 | return swapdev_block(swap, offset); | |
7088a5c0 | 144 | } |
3aef83e0 | 145 | return 0; |
7088a5c0 | 146 | } |
1da177e4 | 147 | |
d1d241cc RW |
148 | /** |
149 | * free_all_swap_pages - free swap pages allocated for saving image data. | |
150 | * It also frees the extents used to register which swap entres had been | |
151 | * allocated. | |
152 | */ | |
153 | ||
154 | void free_all_swap_pages(int swap) | |
7088a5c0 | 155 | { |
d1d241cc RW |
156 | struct rb_node *node; |
157 | ||
158 | while ((node = swsusp_extents.rb_node)) { | |
159 | struct swsusp_extent *ext; | |
160 | unsigned long offset; | |
161 | ||
162 | ext = container_of(node, struct swsusp_extent, node); | |
163 | rb_erase(node, &swsusp_extents); | |
164 | for (offset = ext->start; offset <= ext->end; offset++) | |
165 | swap_free(swp_entry(swap, offset)); | |
166 | ||
167 | kfree(ext); | |
1da177e4 | 168 | } |
7088a5c0 RW |
169 | } |
170 | ||
d1d241cc RW |
171 | int swsusp_swap_in_use(void) |
172 | { | |
173 | return (swsusp_extents.rb_node != NULL); | |
174 | } | |
175 | ||
0d3a9abe RW |
176 | /** |
177 | * swsusp_show_speed - print the time elapsed between two events represented by | |
178 | * @start and @stop | |
179 | * | |
180 | * @nr_pages - number of pages processed between @start and @stop | |
181 | * @msg - introductory message to print | |
182 | */ | |
183 | ||
184 | void swsusp_show_speed(struct timeval *start, struct timeval *stop, | |
185 | unsigned nr_pages, char *msg) | |
186 | { | |
187 | s64 elapsed_centisecs64; | |
188 | int centisecs; | |
189 | int k; | |
190 | int kps; | |
191 | ||
192 | elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); | |
193 | do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); | |
194 | centisecs = elapsed_centisecs64; | |
195 | if (centisecs == 0) | |
196 | centisecs = 1; /* avoid div-by-zero */ | |
197 | k = nr_pages * (PAGE_SIZE / 1024); | |
198 | kps = (k * 100) / centisecs; | |
199 | printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k, | |
200 | centisecs / 100, centisecs % 100, | |
201 | kps / 1000, (kps % 1000) / 10); | |
202 | } | |
203 | ||
72a97e08 RW |
204 | /** |
205 | * swsusp_shrink_memory - Try to free as much memory as needed | |
206 | * | |
207 | * ... but do not OOM-kill anyone | |
208 | * | |
209 | * Notice: all userland should be stopped before it is called, or | |
210 | * livelock is possible. | |
211 | */ | |
212 | ||
213 | #define SHRINK_BITE 10000 | |
d6277db4 RW |
214 | static inline unsigned long __shrink_memory(long tmp) |
215 | { | |
216 | if (tmp > SHRINK_BITE) | |
217 | tmp = SHRINK_BITE; | |
218 | return shrink_all_memory(tmp); | |
219 | } | |
72a97e08 RW |
220 | |
221 | int swsusp_shrink_memory(void) | |
222 | { | |
8357376d | 223 | long tmp; |
72a97e08 RW |
224 | struct zone *zone; |
225 | unsigned long pages = 0; | |
226 | unsigned int i = 0; | |
227 | char *p = "-\\|/"; | |
0d3a9abe | 228 | struct timeval start, stop; |
72a97e08 RW |
229 | |
230 | printk("Shrinking memory... "); | |
0d3a9abe | 231 | do_gettimeofday(&start); |
72a97e08 | 232 | do { |
8357376d RW |
233 | long size, highmem_size; |
234 | ||
235 | highmem_size = count_highmem_pages(); | |
236 | size = count_data_pages() + PAGES_FOR_IO; | |
b3a93a25 | 237 | tmp = size; |
8357376d | 238 | size += highmem_size; |
72a97e08 | 239 | for_each_zone (zone) |
8357376d | 240 | if (populated_zone(zone)) { |
c75fd0ee | 241 | tmp += snapshot_additional_pages(zone); |
8357376d | 242 | if (is_highmem(zone)) { |
d23ad423 CL |
243 | highmem_size -= |
244 | zone_page_state(zone, NR_FREE_PAGES); | |
8357376d | 245 | } else { |
d23ad423 | 246 | tmp -= zone_page_state(zone, NR_FREE_PAGES); |
8357376d | 247 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; |
8357376d | 248 | } |
a938c356 | 249 | } |
8357376d RW |
250 | |
251 | if (highmem_size < 0) | |
252 | highmem_size = 0; | |
253 | ||
254 | tmp += highmem_size; | |
72a97e08 | 255 | if (tmp > 0) { |
d6277db4 | 256 | tmp = __shrink_memory(tmp); |
72a97e08 RW |
257 | if (!tmp) |
258 | return -ENOMEM; | |
259 | pages += tmp; | |
853609b6 | 260 | } else if (size > image_size / PAGE_SIZE) { |
d6277db4 | 261 | tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); |
b3a93a25 | 262 | pages += tmp; |
72a97e08 | 263 | } |
72a97e08 RW |
264 | printk("\b%c", p[i++%4]); |
265 | } while (tmp > 0); | |
0d3a9abe | 266 | do_gettimeofday(&stop); |
72a97e08 | 267 | printk("\bdone (%lu pages freed)\n", pages); |
0d3a9abe | 268 | swsusp_show_speed(&start, &stop, pages, "Freed"); |
72a97e08 RW |
269 | |
270 | return 0; | |
271 | } | |
272 | ||
1da177e4 LT |
273 | int swsusp_suspend(void) |
274 | { | |
275 | int error; | |
0fbeb5a4 | 276 | |
1da177e4 LT |
277 | if ((error = arch_prepare_suspend())) |
278 | return error; | |
8357376d | 279 | |
1da177e4 LT |
280 | local_irq_disable(); |
281 | /* At this point, device_suspend() has been called, but *not* | |
282 | * device_power_down(). We *must* device_power_down() now. | |
283 | * Otherwise, drivers for some devices (e.g. interrupt controllers) | |
284 | * become desynchronized with the actual state of the hardware | |
285 | * at resume time, and evil weirdness ensues. | |
286 | */ | |
287 | if ((error = device_power_down(PMSG_FREEZE))) { | |
99dc7d63 | 288 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); |
0fbeb5a4 | 289 | goto Enable_irqs; |
1da177e4 | 290 | } |
47b724f3 | 291 | |
1da177e4 LT |
292 | save_processor_state(); |
293 | if ((error = swsusp_arch_suspend())) | |
99dc7d63 | 294 | printk(KERN_ERR "Error %d suspending\n", error); |
1da177e4 LT |
295 | /* Restore control flow magically appears here */ |
296 | restore_processor_state(); | |
f1cc0a89 DB |
297 | /* NOTE: device_power_up() is just a resume() for devices |
298 | * that suspended with irqs off ... no overall powerup. | |
299 | */ | |
1da177e4 | 300 | device_power_up(); |
59a49335 | 301 | Enable_irqs: |
1da177e4 LT |
302 | local_irq_enable(); |
303 | return error; | |
304 | } | |
305 | ||
306 | int swsusp_resume(void) | |
307 | { | |
308 | int error; | |
f1cc0a89 | 309 | |
1da177e4 | 310 | local_irq_disable(); |
f1cc0a89 DB |
311 | /* NOTE: device_power_down() is just a suspend() with irqs off; |
312 | * it has no special "power things down" semantics | |
313 | */ | |
314 | if (device_power_down(PMSG_PRETHAW)) | |
1da177e4 LT |
315 | printk(KERN_ERR "Some devices failed to power down, very bad\n"); |
316 | /* We'll ignore saved state, but this gets preempt count (etc) right */ | |
317 | save_processor_state(); | |
8357376d RW |
318 | error = restore_highmem(); |
319 | if (!error) { | |
320 | error = swsusp_arch_resume(); | |
321 | /* The code below is only ever reached in case of a failure. | |
322 | * Otherwise execution continues at place where | |
323 | * swsusp_arch_suspend() was called | |
324 | */ | |
325 | BUG_ON(!error); | |
326 | /* This call to restore_highmem() undos the previous one */ | |
327 | restore_highmem(); | |
328 | } | |
2c1b4a5c RW |
329 | /* The only reason why swsusp_arch_resume() can fail is memory being |
330 | * very tight, so we have to free it as soon as we can to avoid | |
331 | * subsequent failures | |
332 | */ | |
333 | swsusp_free(); | |
1da177e4 | 334 | restore_processor_state(); |
8446f1d3 | 335 | touch_softlockup_watchdog(); |
1da177e4 LT |
336 | device_power_up(); |
337 | local_irq_enable(); | |
338 | return error; | |
339 | } |