Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/kernel/power/swsusp.c | |
3 | * | |
96bc7aec | 4 | * This file provides code to write suspend image to swap and read it back. |
1da177e4 LT |
5 | * |
6 | * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
25761b6e | 7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz> |
1da177e4 LT |
8 | * |
9 | * This file is released under the GPLv2. | |
10 | * | |
11 | * I'd like to thank the following people for their work: | |
2e4d5822 | 12 | * |
1da177e4 LT |
13 | * Pavel Machek <pavel@ucw.cz>: |
14 | * Modifications, defectiveness pointing, being with me at the very beginning, | |
15 | * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. | |
16 | * | |
2e4d5822 | 17 | * Steve Doddi <dirk@loth.demon.co.uk>: |
1da177e4 LT |
18 | * Support the possibility of hardware state restoring. |
19 | * | |
20 | * Raph <grey.havens@earthling.net>: | |
21 | * Support for preserving states of network devices and virtual console | |
22 | * (including X and svgatextmode) | |
23 | * | |
24 | * Kurt Garloff <garloff@suse.de>: | |
25 | * Straightened the critical function in order to prevent compilers from | |
26 | * playing tricks with local variables. | |
27 | * | |
28 | * Andreas Mohr <a.mohr@mailto.de> | |
29 | * | |
30 | * Alex Badea <vampire@go.ro>: | |
31 | * Fixed runaway init | |
32 | * | |
c2ff18f4 AS |
33 | * Andreas Steinmetz <ast@domdv.de>: |
34 | * Added encrypted suspend option | |
35 | * | |
1da177e4 LT |
36 | * More state savers are welcome. Especially for the scsi layer... |
37 | * | |
38 | * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt | |
39 | */ | |
40 | ||
41 | #include <linux/module.h> | |
42 | #include <linux/mm.h> | |
43 | #include <linux/suspend.h> | |
44 | #include <linux/smp_lock.h> | |
45 | #include <linux/file.h> | |
46 | #include <linux/utsname.h> | |
47 | #include <linux/version.h> | |
48 | #include <linux/delay.h> | |
1da177e4 | 49 | #include <linux/bitops.h> |
1da177e4 LT |
50 | #include <linux/spinlock.h> |
51 | #include <linux/genhd.h> | |
52 | #include <linux/kernel.h> | |
53 | #include <linux/major.h> | |
54 | #include <linux/swap.h> | |
55 | #include <linux/pm.h> | |
56 | #include <linux/device.h> | |
57 | #include <linux/buffer_head.h> | |
58 | #include <linux/swapops.h> | |
59 | #include <linux/bootmem.h> | |
60 | #include <linux/syscalls.h> | |
1da177e4 LT |
61 | #include <linux/highmem.h> |
62 | #include <linux/bio.h> | |
63 | ||
64 | #include <asm/uaccess.h> | |
65 | #include <asm/mmu_context.h> | |
66 | #include <asm/pgtable.h> | |
67 | #include <asm/tlbflush.h> | |
68 | #include <asm/io.h> | |
69 | ||
c2ff18f4 AS |
70 | #include <linux/random.h> |
71 | #include <linux/crypto.h> | |
72 | #include <asm/scatterlist.h> | |
73 | ||
1da177e4 LT |
74 | #include "power.h" |
75 | ||
0fbeb5a4 RW |
76 | #ifdef CONFIG_HIGHMEM |
77 | int save_highmem(void); | |
78 | int restore_highmem(void); | |
79 | #else | |
80 | static int save_highmem(void) { return 0; } | |
81 | static int restore_highmem(void) { return 0; } | |
82 | #endif | |
83 | ||
c2ff18f4 AS |
84 | #define CIPHER "aes" |
85 | #define MAXKEY 32 | |
86 | #define MAXIV 32 | |
87 | ||
1da177e4 LT |
88 | extern char resume_file[]; |
89 | ||
90 | /* Local variables that should not be affected by save */ | |
25761b6e | 91 | unsigned int nr_copy_pages __nosavedata = 0; |
1da177e4 LT |
92 | |
93 | /* Suspend pagedir is allocated before final copy, therefore it | |
2e4d5822 | 94 | must be freed after resume |
1da177e4 | 95 | |
1da177e4 LT |
96 | Warning: this is even more evil than it seems. Pagedirs this file |
97 | talks about are completely different from page directories used by | |
98 | MMU hardware. | |
99 | */ | |
100 | suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; | |
1da177e4 LT |
101 | |
102 | #define SWSUSP_SIG "S1SUSPEND" | |
103 | ||
104 | static struct swsusp_header { | |
c2ff18f4 AS |
105 | char reserved[PAGE_SIZE - 20 - MAXKEY - MAXIV - sizeof(swp_entry_t)]; |
106 | u8 key_iv[MAXKEY+MAXIV]; | |
1da177e4 LT |
107 | swp_entry_t swsusp_info; |
108 | char orig_sig[10]; | |
109 | char sig[10]; | |
110 | } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; | |
111 | ||
112 | static struct swsusp_info swsusp_info; | |
113 | ||
1da177e4 LT |
114 | /* |
115 | * Saving part... | |
116 | */ | |
117 | ||
118 | /* We memorize in swapfile_used what swap devices are used for suspension */ | |
119 | #define SWAPFILE_UNUSED 0 | |
120 | #define SWAPFILE_SUSPEND 1 /* This is the suspending device */ | |
121 | #define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ | |
122 | ||
123 | static unsigned short swapfile_used[MAX_SWAPFILES]; | |
124 | static unsigned short root_swap; | |
125 | ||
dc19d507 PM |
126 | static int write_page(unsigned long addr, swp_entry_t *loc); |
127 | static int bio_read_page(pgoff_t page_off, void *page); | |
c2ff18f4 AS |
128 | |
129 | static u8 key_iv[MAXKEY+MAXIV]; | |
130 | ||
131 | #ifdef CONFIG_SWSUSP_ENCRYPT | |
132 | ||
133 | static int crypto_init(int mode, void **mem) | |
134 | { | |
135 | int error = 0; | |
136 | int len; | |
137 | char *modemsg; | |
138 | struct crypto_tfm *tfm; | |
139 | ||
140 | modemsg = mode ? "suspend not possible" : "resume not possible"; | |
141 | ||
142 | tfm = crypto_alloc_tfm(CIPHER, CRYPTO_TFM_MODE_CBC); | |
143 | if(!tfm) { | |
144 | printk(KERN_ERR "swsusp: no tfm, %s\n", modemsg); | |
145 | error = -EINVAL; | |
146 | goto out; | |
147 | } | |
148 | ||
149 | if(MAXKEY < crypto_tfm_alg_min_keysize(tfm)) { | |
150 | printk(KERN_ERR "swsusp: key buffer too small, %s\n", modemsg); | |
151 | error = -ENOKEY; | |
152 | goto fail; | |
153 | } | |
154 | ||
155 | if (mode) | |
156 | get_random_bytes(key_iv, MAXKEY+MAXIV); | |
157 | ||
158 | len = crypto_tfm_alg_max_keysize(tfm); | |
159 | if (len > MAXKEY) | |
160 | len = MAXKEY; | |
161 | ||
162 | if (crypto_cipher_setkey(tfm, key_iv, len)) { | |
163 | printk(KERN_ERR "swsusp: key setup failure, %s\n", modemsg); | |
164 | error = -EKEYREJECTED; | |
165 | goto fail; | |
166 | } | |
167 | ||
168 | len = crypto_tfm_alg_ivsize(tfm); | |
169 | ||
170 | if (MAXIV < len) { | |
171 | printk(KERN_ERR "swsusp: iv buffer too small, %s\n", modemsg); | |
172 | error = -EOVERFLOW; | |
173 | goto fail; | |
174 | } | |
175 | ||
176 | crypto_cipher_set_iv(tfm, key_iv+MAXKEY, len); | |
177 | ||
178 | *mem=(void *)tfm; | |
179 | ||
180 | goto out; | |
181 | ||
182 | fail: crypto_free_tfm(tfm); | |
183 | out: return error; | |
184 | } | |
185 | ||
186 | static __inline__ void crypto_exit(void *mem) | |
187 | { | |
188 | crypto_free_tfm((struct crypto_tfm *)mem); | |
189 | } | |
190 | ||
191 | static __inline__ int crypto_write(struct pbe *p, void *mem) | |
192 | { | |
193 | int error = 0; | |
194 | struct scatterlist src, dst; | |
195 | ||
196 | src.page = virt_to_page(p->address); | |
197 | src.offset = 0; | |
198 | src.length = PAGE_SIZE; | |
199 | dst.page = virt_to_page((void *)&swsusp_header); | |
200 | dst.offset = 0; | |
201 | dst.length = PAGE_SIZE; | |
202 | ||
203 | error = crypto_cipher_encrypt((struct crypto_tfm *)mem, &dst, &src, | |
204 | PAGE_SIZE); | |
205 | ||
206 | if (!error) | |
207 | error = write_page((unsigned long)&swsusp_header, | |
208 | &(p->swap_address)); | |
209 | return error; | |
210 | } | |
211 | ||
212 | static __inline__ int crypto_read(struct pbe *p, void *mem) | |
213 | { | |
214 | int error = 0; | |
215 | struct scatterlist src, dst; | |
216 | ||
217 | error = bio_read_page(swp_offset(p->swap_address), (void *)p->address); | |
218 | if (!error) { | |
219 | src.offset = 0; | |
220 | src.length = PAGE_SIZE; | |
221 | dst.offset = 0; | |
222 | dst.length = PAGE_SIZE; | |
223 | src.page = dst.page = virt_to_page((void *)p->address); | |
224 | ||
225 | error = crypto_cipher_decrypt((struct crypto_tfm *)mem, &dst, | |
226 | &src, PAGE_SIZE); | |
227 | } | |
228 | return error; | |
229 | } | |
230 | #else | |
231 | static __inline__ int crypto_init(int mode, void *mem) | |
232 | { | |
233 | return 0; | |
234 | } | |
235 | ||
236 | static __inline__ void crypto_exit(void *mem) | |
237 | { | |
238 | } | |
239 | ||
240 | static __inline__ int crypto_write(struct pbe *p, void *mem) | |
241 | { | |
242 | return write_page(p->address, &(p->swap_address)); | |
243 | } | |
244 | ||
245 | static __inline__ int crypto_read(struct pbe *p, void *mem) | |
246 | { | |
247 | return bio_read_page(swp_offset(p->swap_address), (void *)p->address); | |
248 | } | |
249 | #endif | |
250 | ||
1da177e4 LT |
251 | static int mark_swapfiles(swp_entry_t prev) |
252 | { | |
253 | int error; | |
254 | ||
2e4d5822 | 255 | rw_swap_page_sync(READ, |
1da177e4 LT |
256 | swp_entry(root_swap, 0), |
257 | virt_to_page((unsigned long)&swsusp_header)); | |
258 | if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || | |
259 | !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { | |
260 | memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); | |
261 | memcpy(swsusp_header.sig,SWSUSP_SIG, 10); | |
c2ff18f4 | 262 | memcpy(swsusp_header.key_iv, key_iv, MAXKEY+MAXIV); |
1da177e4 | 263 | swsusp_header.swsusp_info = prev; |
2e4d5822 | 264 | error = rw_swap_page_sync(WRITE, |
1da177e4 LT |
265 | swp_entry(root_swap, 0), |
266 | virt_to_page((unsigned long) | |
267 | &swsusp_header)); | |
268 | } else { | |
269 | pr_debug("swsusp: Partition is not swap space.\n"); | |
270 | error = -ENODEV; | |
271 | } | |
272 | return error; | |
273 | } | |
274 | ||
275 | /* | |
276 | * Check whether the swap device is the specified resume | |
277 | * device, irrespective of whether they are specified by | |
278 | * identical names. | |
279 | * | |
280 | * (Thus, device inode aliasing is allowed. You can say /dev/hda4 | |
281 | * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs] | |
282 | * and they'll be considered the same device. This is *necessary* for | |
283 | * devfs, since the resume code can only recognize the form /dev/hda4, | |
284 | * but the suspend code would see the long name.) | |
285 | */ | |
286 | static int is_resume_device(const struct swap_info_struct *swap_info) | |
287 | { | |
288 | struct file *file = swap_info->swap_file; | |
289 | struct inode *inode = file->f_dentry->d_inode; | |
290 | ||
291 | return S_ISBLK(inode->i_mode) && | |
292 | swsusp_resume_device == MKDEV(imajor(inode), iminor(inode)); | |
293 | } | |
294 | ||
295 | static int swsusp_swap_check(void) /* This is called before saving image */ | |
296 | { | |
297 | int i, len; | |
2e4d5822 | 298 | |
1da177e4 LT |
299 | len=strlen(resume_file); |
300 | root_swap = 0xFFFF; | |
2e4d5822 | 301 | |
dae06ac4 | 302 | spin_lock(&swap_lock); |
2e4d5822 | 303 | for (i=0; i<MAX_SWAPFILES; i++) { |
dae06ac4 | 304 | if (!(swap_info[i].flags & SWP_WRITEOK)) { |
1da177e4 LT |
305 | swapfile_used[i]=SWAPFILE_UNUSED; |
306 | } else { | |
2e4d5822 | 307 | if (!len) { |
1da177e4 | 308 | printk(KERN_WARNING "resume= option should be used to set suspend device" ); |
2e4d5822 | 309 | if (root_swap == 0xFFFF) { |
1da177e4 LT |
310 | swapfile_used[i] = SWAPFILE_SUSPEND; |
311 | root_swap = i; | |
312 | } else | |
2e4d5822 | 313 | swapfile_used[i] = SWAPFILE_IGNORED; |
1da177e4 LT |
314 | } else { |
315 | /* we ignore all swap devices that are not the resume_file */ | |
316 | if (is_resume_device(&swap_info[i])) { | |
317 | swapfile_used[i] = SWAPFILE_SUSPEND; | |
318 | root_swap = i; | |
319 | } else { | |
320 | swapfile_used[i] = SWAPFILE_IGNORED; | |
321 | } | |
322 | } | |
323 | } | |
324 | } | |
dae06ac4 | 325 | spin_unlock(&swap_lock); |
1da177e4 LT |
326 | return (root_swap != 0xffff) ? 0 : -ENODEV; |
327 | } | |
328 | ||
329 | /** | |
330 | * This is called after saving image so modification | |
331 | * will be lost after resume... and that's what we want. | |
332 | * we make the device unusable. A new call to | |
2e4d5822 | 333 | * lock_swapdevices can unlock the devices. |
1da177e4 LT |
334 | */ |
335 | static void lock_swapdevices(void) | |
336 | { | |
337 | int i; | |
338 | ||
dae06ac4 | 339 | spin_lock(&swap_lock); |
2e4d5822 PM |
340 | for (i = 0; i< MAX_SWAPFILES; i++) |
341 | if (swapfile_used[i] == SWAPFILE_IGNORED) { | |
dae06ac4 | 342 | swap_info[i].flags ^= SWP_WRITEOK; |
1da177e4 | 343 | } |
dae06ac4 | 344 | spin_unlock(&swap_lock); |
1da177e4 LT |
345 | } |
346 | ||
347 | /** | |
8686bcd0 | 348 | * write_page - Write one page to a fresh swap location. |
1da177e4 LT |
349 | * @addr: Address we're writing. |
350 | * @loc: Place to store the entry we used. | |
351 | * | |
352 | * Allocate a new swap entry and 'sync' it. Note we discard -EIO | |
2e4d5822 | 353 | * errors. That is an artifact left over from swsusp. It did not |
1da177e4 LT |
354 | * check the return of rw_swap_page_sync() at all, since most pages |
355 | * written back to swap would return -EIO. | |
356 | * This is a partial improvement, since we will at least return other | |
357 | * errors, though we need to eventually fix the damn code. | |
358 | */ | |
dc19d507 | 359 | static int write_page(unsigned long addr, swp_entry_t *loc) |
1da177e4 LT |
360 | { |
361 | swp_entry_t entry; | |
362 | int error = 0; | |
363 | ||
364 | entry = get_swap_page(); | |
2e4d5822 | 365 | if (swp_offset(entry) && |
1da177e4 LT |
366 | swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) { |
367 | error = rw_swap_page_sync(WRITE, entry, | |
368 | virt_to_page(addr)); | |
369 | if (error == -EIO) | |
370 | error = 0; | |
371 | if (!error) | |
372 | *loc = entry; | |
373 | } else | |
374 | error = -ENOSPC; | |
375 | return error; | |
376 | } | |
377 | ||
378 | /** | |
379 | * data_free - Free the swap entries used by the saved image. | |
380 | * | |
2e4d5822 | 381 | * Walk the list of used swap entries and free each one. |
1da177e4 LT |
382 | * This is only used for cleanup when suspend fails. |
383 | */ | |
384 | static void data_free(void) | |
385 | { | |
386 | swp_entry_t entry; | |
dc19d507 | 387 | struct pbe *p; |
1da177e4 | 388 | |
47b90ffe | 389 | for_each_pbe (p, pagedir_nosave) { |
254b5477 | 390 | entry = p->swap_address; |
1da177e4 LT |
391 | if (entry.val) |
392 | swap_free(entry); | |
393 | else | |
394 | break; | |
1da177e4 LT |
395 | } |
396 | } | |
397 | ||
398 | /** | |
399 | * data_write - Write saved image to swap. | |
400 | * | |
401 | * Walk the list of pages in the image and sync each one to swap. | |
402 | */ | |
403 | static int data_write(void) | |
404 | { | |
405 | int error = 0, i = 0; | |
406 | unsigned int mod = nr_copy_pages / 100; | |
407 | struct pbe *p; | |
c2ff18f4 AS |
408 | void *tfm; |
409 | ||
410 | if ((error = crypto_init(1, &tfm))) | |
411 | return error; | |
1da177e4 LT |
412 | |
413 | if (!mod) | |
414 | mod = 1; | |
415 | ||
416 | printk( "Writing data to swap (%d pages)... ", nr_copy_pages ); | |
2e4d5822 | 417 | for_each_pbe (p, pagedir_nosave) { |
1da177e4 LT |
418 | if (!(i%mod)) |
419 | printk( "\b\b\b\b%3d%%", i / mod ); | |
c2ff18f4 AS |
420 | if ((error = crypto_write(p, tfm))) { |
421 | crypto_exit(tfm); | |
1da177e4 | 422 | return error; |
c2ff18f4 | 423 | } |
1da177e4 LT |
424 | i++; |
425 | } | |
426 | printk("\b\b\b\bdone\n"); | |
c2ff18f4 | 427 | crypto_exit(tfm); |
1da177e4 LT |
428 | return error; |
429 | } | |
430 | ||
431 | static void dump_info(void) | |
432 | { | |
433 | pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code); | |
434 | pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages); | |
435 | pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname); | |
436 | pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename); | |
437 | pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release); | |
438 | pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version); | |
439 | pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine); | |
440 | pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname); | |
441 | pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus); | |
442 | pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages); | |
443 | pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages); | |
444 | } | |
445 | ||
446 | static void init_header(void) | |
447 | { | |
448 | memset(&swsusp_info, 0, sizeof(swsusp_info)); | |
449 | swsusp_info.version_code = LINUX_VERSION_CODE; | |
450 | swsusp_info.num_physpages = num_physpages; | |
451 | memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname)); | |
452 | ||
453 | swsusp_info.suspend_pagedir = pagedir_nosave; | |
454 | swsusp_info.cpus = num_online_cpus(); | |
455 | swsusp_info.image_pages = nr_copy_pages; | |
456 | } | |
457 | ||
458 | static int close_swap(void) | |
459 | { | |
460 | swp_entry_t entry; | |
461 | int error; | |
462 | ||
463 | dump_info(); | |
464 | error = write_page((unsigned long)&swsusp_info, &entry); | |
2e4d5822 | 465 | if (!error) { |
1da177e4 LT |
466 | printk( "S" ); |
467 | error = mark_swapfiles(entry); | |
468 | printk( "|\n" ); | |
469 | } | |
470 | return error; | |
471 | } | |
472 | ||
473 | /** | |
474 | * free_pagedir_entries - Free pages used by the page directory. | |
475 | * | |
476 | * This is used during suspend for error recovery. | |
477 | */ | |
478 | ||
479 | static void free_pagedir_entries(void) | |
480 | { | |
481 | int i; | |
482 | ||
483 | for (i = 0; i < swsusp_info.pagedir_pages; i++) | |
484 | swap_free(swsusp_info.pagedir[i]); | |
485 | } | |
486 | ||
487 | ||
488 | /** | |
489 | * write_pagedir - Write the array of pages holding the page directory. | |
490 | * @last: Last swap entry we write (needed for header). | |
491 | */ | |
492 | ||
493 | static int write_pagedir(void) | |
494 | { | |
495 | int error = 0; | |
dc19d507 PM |
496 | unsigned int n = 0; |
497 | struct pbe *pbe; | |
1da177e4 LT |
498 | |
499 | printk( "Writing pagedir..."); | |
2e4d5822 | 500 | for_each_pb_page (pbe, pagedir_nosave) { |
1da177e4 LT |
501 | if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++]))) |
502 | return error; | |
503 | } | |
504 | ||
505 | swsusp_info.pagedir_pages = n; | |
506 | printk("done (%u pages)\n", n); | |
507 | return error; | |
508 | } | |
509 | ||
0fbeb5a4 RW |
510 | /** |
511 | * enough_swap - Make sure we have enough swap to save the image. | |
512 | * | |
513 | * Returns TRUE or FALSE after checking the total amount of swap | |
514 | * space avaiable. | |
515 | * | |
516 | * FIXME: si_swapinfo(&i) returns all swap devices information. | |
517 | * We should only consider resume_device. | |
518 | */ | |
519 | ||
520 | static int enough_swap(unsigned int nr_pages) | |
521 | { | |
522 | struct sysinfo i; | |
523 | ||
524 | si_swapinfo(&i); | |
525 | pr_debug("swsusp: available swap: %lu pages\n", i.freeswap); | |
526 | return i.freeswap > (nr_pages + PAGES_FOR_IO + | |
527 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | |
528 | } | |
529 | ||
1da177e4 LT |
530 | /** |
531 | * write_suspend_image - Write entire image and metadata. | |
532 | * | |
533 | */ | |
1da177e4 LT |
534 | static int write_suspend_image(void) |
535 | { | |
536 | int error; | |
537 | ||
0fbeb5a4 RW |
538 | if (!enough_swap(nr_copy_pages)) { |
539 | printk(KERN_ERR "swsusp: Not enough free swap\n"); | |
540 | return -ENOSPC; | |
541 | } | |
542 | ||
1da177e4 LT |
543 | init_header(); |
544 | if ((error = data_write())) | |
545 | goto FreeData; | |
546 | ||
547 | if ((error = write_pagedir())) | |
548 | goto FreePagedir; | |
549 | ||
550 | if ((error = close_swap())) | |
551 | goto FreePagedir; | |
552 | Done: | |
c2ff18f4 | 553 | memset(key_iv, 0, MAXKEY+MAXIV); |
1da177e4 LT |
554 | return error; |
555 | FreePagedir: | |
556 | free_pagedir_entries(); | |
557 | FreeData: | |
558 | data_free(); | |
559 | goto Done; | |
560 | } | |
561 | ||
1da177e4 LT |
562 | /* It is important _NOT_ to umount filesystems at this point. We want |
563 | * them synced (in case something goes wrong) but we DO not want to mark | |
564 | * filesystem clean: it is not. (And it does not matter, if we resume | |
565 | * correctly, we'll mark system clean, anyway.) | |
566 | */ | |
567 | int swsusp_write(void) | |
568 | { | |
569 | int error; | |
0245b3e7 | 570 | |
0fbeb5a4 RW |
571 | if ((error = swsusp_swap_check())) { |
572 | printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); | |
573 | return error; | |
574 | } | |
1da177e4 LT |
575 | lock_swapdevices(); |
576 | error = write_suspend_image(); | |
577 | /* This will unlock ignored swap devices since writing is finished */ | |
578 | lock_swapdevices(); | |
579 | return error; | |
1da177e4 LT |
580 | } |
581 | ||
582 | ||
1da177e4 LT |
583 | |
584 | int swsusp_suspend(void) | |
585 | { | |
586 | int error; | |
0fbeb5a4 | 587 | |
1da177e4 LT |
588 | if ((error = arch_prepare_suspend())) |
589 | return error; | |
590 | local_irq_disable(); | |
591 | /* At this point, device_suspend() has been called, but *not* | |
592 | * device_power_down(). We *must* device_power_down() now. | |
593 | * Otherwise, drivers for some devices (e.g. interrupt controllers) | |
594 | * become desynchronized with the actual state of the hardware | |
595 | * at resume time, and evil weirdness ensues. | |
596 | */ | |
597 | if ((error = device_power_down(PMSG_FREEZE))) { | |
99dc7d63 | 598 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); |
0fbeb5a4 | 599 | goto Enable_irqs; |
1da177e4 | 600 | } |
47b724f3 | 601 | |
0fbeb5a4 RW |
602 | if ((error = save_highmem())) { |
603 | printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); | |
604 | goto Restore_highmem; | |
47b724f3 PM |
605 | } |
606 | ||
1da177e4 LT |
607 | save_processor_state(); |
608 | if ((error = swsusp_arch_suspend())) | |
99dc7d63 | 609 | printk(KERN_ERR "Error %d suspending\n", error); |
1da177e4 LT |
610 | /* Restore control flow magically appears here */ |
611 | restore_processor_state(); | |
0fbeb5a4 | 612 | Restore_highmem: |
1da177e4 LT |
613 | restore_highmem(); |
614 | device_power_up(); | |
0fbeb5a4 | 615 | Enable_irqs: |
1da177e4 LT |
616 | local_irq_enable(); |
617 | return error; | |
618 | } | |
619 | ||
620 | int swsusp_resume(void) | |
621 | { | |
622 | int error; | |
623 | local_irq_disable(); | |
624 | if (device_power_down(PMSG_FREEZE)) | |
625 | printk(KERN_ERR "Some devices failed to power down, very bad\n"); | |
626 | /* We'll ignore saved state, but this gets preempt count (etc) right */ | |
627 | save_processor_state(); | |
628 | error = swsusp_arch_resume(); | |
629 | /* Code below is only ever reached in case of failure. Otherwise | |
630 | * execution continues at place where swsusp_arch_suspend was called | |
631 | */ | |
632 | BUG_ON(!error); | |
2c1b4a5c RW |
633 | /* The only reason why swsusp_arch_resume() can fail is memory being |
634 | * very tight, so we have to free it as soon as we can to avoid | |
635 | * subsequent failures | |
636 | */ | |
637 | swsusp_free(); | |
1da177e4 LT |
638 | restore_processor_state(); |
639 | restore_highmem(); | |
8446f1d3 | 640 | touch_softlockup_watchdog(); |
1da177e4 LT |
641 | device_power_up(); |
642 | local_irq_enable(); | |
643 | return error; | |
644 | } | |
645 | ||
1da177e4 | 646 | /** |
ed14b527 RW |
647 | * mark_unsafe_pages - mark the pages that cannot be used for storing |
648 | * the image during resume, because they conflict with the pages that | |
649 | * had been used before suspend | |
1da177e4 LT |
650 | */ |
651 | ||
ed14b527 | 652 | static void mark_unsafe_pages(struct pbe *pblist) |
1da177e4 LT |
653 | { |
654 | struct zone *zone; | |
655 | unsigned long zone_pfn; | |
ed14b527 | 656 | struct pbe *p; |
1da177e4 LT |
657 | |
658 | if (!pblist) /* a sanity check */ | |
ed14b527 | 659 | return; |
1da177e4 | 660 | |
2c1b4a5c | 661 | /* Clear page flags */ |
2e4d5822 | 662 | for_each_zone (zone) { |
ed14b527 RW |
663 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) |
664 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) | |
665 | ClearPageNosaveFree(pfn_to_page(zone_pfn + | |
1da177e4 LT |
666 | zone->zone_start_pfn)); |
667 | } | |
668 | ||
2c1b4a5c | 669 | /* Mark orig addresses */ |
1da177e4 | 670 | for_each_pbe (p, pblist) |
2c1b4a5c | 671 | SetPageNosaveFree(virt_to_page(p->orig_address)); |
1da177e4 | 672 | |
ed14b527 | 673 | } |
1da177e4 | 674 | |
ed14b527 RW |
675 | static void copy_page_backup_list(struct pbe *dst, struct pbe *src) |
676 | { | |
677 | /* We assume both lists contain the same number of elements */ | |
678 | while (src) { | |
679 | dst->orig_address = src->orig_address; | |
680 | dst->swap_address = src->swap_address; | |
681 | dst = dst->next; | |
682 | src = src->next; | |
2c1b4a5c | 683 | } |
1da177e4 LT |
684 | } |
685 | ||
4dc3b16b | 686 | /* |
1da177e4 LT |
687 | * Using bio to read from swap. |
688 | * This code requires a bit more work than just using buffer heads | |
689 | * but, it is the recommended way for 2.5/2.6. | |
690 | * The following are to signal the beginning and end of I/O. Bios | |
691 | * finish asynchronously, while we want them to happen synchronously. | |
692 | * A simple atomic_t, and a wait loop take care of this problem. | |
693 | */ | |
694 | ||
695 | static atomic_t io_done = ATOMIC_INIT(0); | |
696 | ||
dc19d507 | 697 | static int end_io(struct bio *bio, unsigned int num, int err) |
1da177e4 LT |
698 | { |
699 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | |
700 | panic("I/O error reading memory image"); | |
701 | atomic_set(&io_done, 0); | |
702 | return 0; | |
703 | } | |
704 | ||
dc19d507 | 705 | static struct block_device *resume_bdev; |
1da177e4 LT |
706 | |
707 | /** | |
708 | * submit - submit BIO request. | |
709 | * @rw: READ or WRITE. | |
710 | * @off physical offset of page. | |
711 | * @page: page we're reading or writing. | |
712 | * | |
713 | * Straight from the textbook - allocate and initialize the bio. | |
714 | * If we're writing, make sure the page is marked as dirty. | |
715 | * Then submit it and wait. | |
716 | */ | |
717 | ||
dc19d507 | 718 | static int submit(int rw, pgoff_t page_off, void *page) |
1da177e4 LT |
719 | { |
720 | int error = 0; | |
dc19d507 | 721 | struct bio *bio; |
1da177e4 LT |
722 | |
723 | bio = bio_alloc(GFP_ATOMIC, 1); | |
724 | if (!bio) | |
725 | return -ENOMEM; | |
726 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); | |
727 | bio_get(bio); | |
728 | bio->bi_bdev = resume_bdev; | |
729 | bio->bi_end_io = end_io; | |
730 | ||
731 | if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { | |
732 | printk("swsusp: ERROR: adding page to bio at %ld\n",page_off); | |
733 | error = -EFAULT; | |
734 | goto Done; | |
735 | } | |
736 | ||
737 | if (rw == WRITE) | |
738 | bio_set_pages_dirty(bio); | |
739 | ||
740 | atomic_set(&io_done, 1); | |
741 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); | |
742 | while (atomic_read(&io_done)) | |
743 | yield(); | |
744 | ||
745 | Done: | |
746 | bio_put(bio); | |
747 | return error; | |
748 | } | |
749 | ||
dc19d507 | 750 | static int bio_read_page(pgoff_t page_off, void *page) |
1da177e4 LT |
751 | { |
752 | return submit(READ, page_off, page); | |
753 | } | |
754 | ||
dc19d507 | 755 | static int bio_write_page(pgoff_t page_off, void *page) |
1da177e4 LT |
756 | { |
757 | return submit(WRITE, page_off, page); | |
758 | } | |
759 | ||
760 | /* | |
761 | * Sanity check if this image makes sense with this kernel/swap context | |
762 | * I really don't think that it's foolproof but more than nothing.. | |
763 | */ | |
764 | ||
dc19d507 | 765 | static const char *sanity_check(void) |
1da177e4 LT |
766 | { |
767 | dump_info(); | |
47b724f3 | 768 | if (swsusp_info.version_code != LINUX_VERSION_CODE) |
1da177e4 | 769 | return "kernel version"; |
47b724f3 | 770 | if (swsusp_info.num_physpages != num_physpages) |
1da177e4 LT |
771 | return "memory size"; |
772 | if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname)) | |
773 | return "system type"; | |
774 | if (strcmp(swsusp_info.uts.release,system_utsname.release)) | |
775 | return "kernel release"; | |
776 | if (strcmp(swsusp_info.uts.version,system_utsname.version)) | |
777 | return "version"; | |
778 | if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) | |
779 | return "machine"; | |
5a72e04d | 780 | #if 0 |
99dc7d63 PM |
781 | /* We can't use number of online CPUs when we use hotplug to remove them ;-))) */ |
782 | if (swsusp_info.cpus != num_possible_cpus()) | |
1da177e4 | 783 | return "number of cpus"; |
5a72e04d | 784 | #endif |
1da177e4 LT |
785 | return NULL; |
786 | } | |
787 | ||
788 | ||
789 | static int check_header(void) | |
790 | { | |
dc19d507 | 791 | const char *reason = NULL; |
1da177e4 LT |
792 | int error; |
793 | ||
794 | if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) | |
795 | return error; | |
796 | ||
797 | /* Is this same machine? */ | |
798 | if ((reason = sanity_check())) { | |
799 | printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason); | |
800 | return -EPERM; | |
801 | } | |
802 | nr_copy_pages = swsusp_info.image_pages; | |
803 | return error; | |
804 | } | |
805 | ||
806 | static int check_sig(void) | |
807 | { | |
808 | int error; | |
809 | ||
810 | memset(&swsusp_header, 0, sizeof(swsusp_header)); | |
811 | if ((error = bio_read_page(0, &swsusp_header))) | |
812 | return error; | |
813 | if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { | |
814 | memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); | |
c2ff18f4 AS |
815 | memcpy(key_iv, swsusp_header.key_iv, MAXKEY+MAXIV); |
816 | memset(swsusp_header.key_iv, 0, MAXKEY+MAXIV); | |
1da177e4 LT |
817 | |
818 | /* | |
819 | * Reset swap signature now. | |
820 | */ | |
821 | error = bio_write_page(0, &swsusp_header); | |
0fbeb5a4 | 822 | } else { |
1da177e4 LT |
823 | return -EINVAL; |
824 | } | |
825 | if (!error) | |
826 | pr_debug("swsusp: Signature found, resuming\n"); | |
827 | return error; | |
828 | } | |
829 | ||
830 | /** | |
831 | * data_read - Read image pages from swap. | |
832 | * | |
833 | * You do not need to check for overlaps, check_pagedir() | |
834 | * already did that. | |
835 | */ | |
836 | ||
837 | static int data_read(struct pbe *pblist) | |
838 | { | |
dc19d507 | 839 | struct pbe *p; |
1da177e4 LT |
840 | int error = 0; |
841 | int i = 0; | |
842 | int mod = swsusp_info.image_pages / 100; | |
c2ff18f4 AS |
843 | void *tfm; |
844 | ||
845 | if ((error = crypto_init(0, &tfm))) | |
846 | return error; | |
1da177e4 LT |
847 | |
848 | if (!mod) | |
849 | mod = 1; | |
850 | ||
851 | printk("swsusp: Reading image data (%lu pages): ", | |
852 | swsusp_info.image_pages); | |
853 | ||
854 | for_each_pbe (p, pblist) { | |
855 | if (!(i % mod)) | |
856 | printk("\b\b\b\b%3d%%", i / mod); | |
857 | ||
c2ff18f4 AS |
858 | if ((error = crypto_read(p, tfm))) { |
859 | crypto_exit(tfm); | |
1da177e4 | 860 | return error; |
c2ff18f4 | 861 | } |
1da177e4 LT |
862 | |
863 | i++; | |
864 | } | |
865 | printk("\b\b\b\bdone\n"); | |
c2ff18f4 | 866 | crypto_exit(tfm); |
1da177e4 LT |
867 | return error; |
868 | } | |
869 | ||
1da177e4 LT |
870 | /** |
871 | * read_pagedir - Read page backup list pages from swap | |
872 | */ | |
873 | ||
874 | static int read_pagedir(struct pbe *pblist) | |
875 | { | |
876 | struct pbe *pbpage, *p; | |
dc19d507 | 877 | unsigned int i = 0; |
1da177e4 LT |
878 | int error; |
879 | ||
880 | if (!pblist) | |
881 | return -EFAULT; | |
882 | ||
883 | printk("swsusp: Reading pagedir (%lu pages)\n", | |
884 | swsusp_info.pagedir_pages); | |
885 | ||
886 | for_each_pb_page (pbpage, pblist) { | |
887 | unsigned long offset = swp_offset(swsusp_info.pagedir[i++]); | |
888 | ||
889 | error = -EFAULT; | |
890 | if (offset) { | |
891 | p = (pbpage + PB_PAGE_SKIP)->next; | |
892 | error = bio_read_page(offset, (void *)pbpage); | |
893 | (pbpage + PB_PAGE_SKIP)->next = p; | |
894 | } | |
895 | if (error) | |
896 | break; | |
897 | } | |
898 | ||
2c1b4a5c | 899 | if (!error) |
f2d61379 | 900 | BUG_ON(i != swsusp_info.pagedir_pages); |
1da177e4 LT |
901 | |
902 | return error; | |
903 | } | |
904 | ||
905 | ||
906 | static int check_suspend_image(void) | |
907 | { | |
908 | int error = 0; | |
909 | ||
910 | if ((error = check_sig())) | |
911 | return error; | |
912 | ||
913 | if ((error = check_header())) | |
914 | return error; | |
915 | ||
916 | return 0; | |
917 | } | |
918 | ||
919 | static int read_suspend_image(void) | |
920 | { | |
921 | int error = 0; | |
922 | struct pbe *p; | |
923 | ||
054bd4c1 | 924 | if (!(p = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 0))) |
1da177e4 LT |
925 | return -ENOMEM; |
926 | ||
927 | if ((error = read_pagedir(p))) | |
928 | return error; | |
1da177e4 | 929 | create_pbe_list(p, nr_copy_pages); |
ed14b527 RW |
930 | mark_unsafe_pages(p); |
931 | pagedir_nosave = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); | |
932 | if (pagedir_nosave) { | |
933 | create_pbe_list(pagedir_nosave, nr_copy_pages); | |
934 | copy_page_backup_list(pagedir_nosave, p); | |
935 | } | |
936 | free_pagedir(p); | |
937 | if (!pagedir_nosave) | |
1da177e4 LT |
938 | return -ENOMEM; |
939 | ||
940 | /* Allocate memory for the image and read the data from swap */ | |
941 | ||
054bd4c1 | 942 | error = alloc_data_pages(pagedir_nosave, GFP_ATOMIC, 1); |
3dd08325 | 943 | |
1da177e4 LT |
944 | if (!error) |
945 | error = data_read(pagedir_nosave); | |
946 | ||
1da177e4 LT |
947 | return error; |
948 | } | |
949 | ||
950 | /** | |
951 | * swsusp_check - Check for saved image in swap | |
952 | */ | |
953 | ||
954 | int swsusp_check(void) | |
955 | { | |
956 | int error; | |
957 | ||
1da177e4 LT |
958 | resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); |
959 | if (!IS_ERR(resume_bdev)) { | |
960 | set_blocksize(resume_bdev, PAGE_SIZE); | |
961 | error = check_suspend_image(); | |
962 | if (error) | |
963 | blkdev_put(resume_bdev); | |
964 | } else | |
965 | error = PTR_ERR(resume_bdev); | |
966 | ||
967 | if (!error) | |
968 | pr_debug("swsusp: resume file found\n"); | |
969 | else | |
970 | pr_debug("swsusp: Error %d check for resume file\n", error); | |
971 | return error; | |
972 | } | |
973 | ||
974 | /** | |
975 | * swsusp_read - Read saved image from swap. | |
976 | */ | |
977 | ||
978 | int swsusp_read(void) | |
979 | { | |
980 | int error; | |
981 | ||
982 | if (IS_ERR(resume_bdev)) { | |
983 | pr_debug("swsusp: block device not initialised\n"); | |
984 | return PTR_ERR(resume_bdev); | |
985 | } | |
986 | ||
987 | error = read_suspend_image(); | |
988 | blkdev_put(resume_bdev); | |
c2ff18f4 | 989 | memset(key_iv, 0, MAXKEY+MAXIV); |
1da177e4 LT |
990 | |
991 | if (!error) | |
992 | pr_debug("swsusp: Reading resume file was successful\n"); | |
993 | else | |
994 | pr_debug("swsusp: Error %d resuming\n", error); | |
995 | return error; | |
996 | } | |
997 | ||
998 | /** | |
999 | * swsusp_close - close swap device. | |
1000 | */ | |
1001 | ||
1002 | void swsusp_close(void) | |
1003 | { | |
1004 | if (IS_ERR(resume_bdev)) { | |
1005 | pr_debug("swsusp: block device not initialised\n"); | |
1006 | return; | |
1007 | } | |
1008 | ||
1009 | blkdev_put(resume_bdev); | |
1010 | } |