Commit | Line | Data |
---|---|---|
25761b6e | 1 | /* |
96bc7aec | 2 | * linux/kernel/power/snapshot.c |
25761b6e | 3 | * |
96bc7aec | 4 | * This file provide system snapshot/restore functionality. |
25761b6e RW |
5 | * |
6 | * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz> | |
7 | * | |
8 | * This file is released under the GPLv2, and is based on swsusp.c. | |
9 | * | |
10 | */ | |
11 | ||
12 | ||
f577eb30 | 13 | #include <linux/version.h> |
25761b6e RW |
14 | #include <linux/module.h> |
15 | #include <linux/mm.h> | |
16 | #include <linux/suspend.h> | |
17 | #include <linux/smp_lock.h> | |
25761b6e | 18 | #include <linux/delay.h> |
25761b6e | 19 | #include <linux/bitops.h> |
25761b6e | 20 | #include <linux/spinlock.h> |
25761b6e | 21 | #include <linux/kernel.h> |
25761b6e RW |
22 | #include <linux/pm.h> |
23 | #include <linux/device.h> | |
25761b6e RW |
24 | #include <linux/bootmem.h> |
25 | #include <linux/syscalls.h> | |
26 | #include <linux/console.h> | |
27 | #include <linux/highmem.h> | |
25761b6e RW |
28 | |
29 | #include <asm/uaccess.h> | |
30 | #include <asm/mmu_context.h> | |
31 | #include <asm/pgtable.h> | |
32 | #include <asm/tlbflush.h> | |
33 | #include <asm/io.h> | |
34 | ||
25761b6e RW |
35 | #include "power.h" |
36 | ||
7088a5c0 | 37 | struct pbe *pagedir_nosave; |
f577eb30 RW |
38 | static unsigned int nr_copy_pages; |
39 | static unsigned int nr_meta_pages; | |
6e1819d6 | 40 | static unsigned long *buffer; |
7088a5c0 | 41 | |
ce4ab001 SL |
42 | struct arch_saveable_page { |
43 | unsigned long start; | |
44 | unsigned long end; | |
45 | char *data; | |
46 | struct arch_saveable_page *next; | |
47 | }; | |
48 | static struct arch_saveable_page *arch_pages; | |
49 | ||
50 | int swsusp_add_arch_pages(unsigned long start, unsigned long end) | |
51 | { | |
52 | struct arch_saveable_page *tmp; | |
53 | ||
54 | while (start < end) { | |
55 | tmp = kzalloc(sizeof(struct arch_saveable_page), GFP_KERNEL); | |
56 | if (!tmp) | |
57 | return -ENOMEM; | |
58 | tmp->start = start; | |
59 | tmp->end = ((start >> PAGE_SHIFT) + 1) << PAGE_SHIFT; | |
60 | if (tmp->end > end) | |
61 | tmp->end = end; | |
62 | tmp->next = arch_pages; | |
63 | start = tmp->end; | |
64 | arch_pages = tmp; | |
65 | } | |
66 | return 0; | |
67 | } | |
68 | ||
69 | static unsigned int count_arch_pages(void) | |
70 | { | |
71 | unsigned int count = 0; | |
72 | struct arch_saveable_page *tmp = arch_pages; | |
73 | while (tmp) { | |
74 | count++; | |
75 | tmp = tmp->next; | |
76 | } | |
77 | return count; | |
78 | } | |
79 | ||
80 | static int save_arch_mem(void) | |
81 | { | |
82 | char *kaddr; | |
83 | struct arch_saveable_page *tmp = arch_pages; | |
84 | int offset; | |
85 | ||
86 | pr_debug("swsusp: Saving arch specific memory"); | |
87 | while (tmp) { | |
88 | tmp->data = (char *)__get_free_page(GFP_ATOMIC); | |
89 | if (!tmp->data) | |
90 | return -ENOMEM; | |
91 | offset = tmp->start - (tmp->start & PAGE_MASK); | |
92 | /* arch pages might haven't a 'struct page' */ | |
93 | kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0); | |
94 | memcpy(tmp->data + offset, kaddr + offset, | |
95 | tmp->end - tmp->start); | |
96 | kunmap_atomic(kaddr, KM_USER0); | |
97 | ||
98 | tmp = tmp->next; | |
99 | } | |
100 | return 0; | |
101 | } | |
102 | ||
103 | static int restore_arch_mem(void) | |
104 | { | |
105 | char *kaddr; | |
106 | struct arch_saveable_page *tmp = arch_pages; | |
107 | int offset; | |
108 | ||
109 | while (tmp) { | |
110 | if (!tmp->data) | |
111 | continue; | |
112 | offset = tmp->start - (tmp->start & PAGE_MASK); | |
113 | kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0); | |
114 | memcpy(kaddr + offset, tmp->data + offset, | |
115 | tmp->end - tmp->start); | |
116 | kunmap_atomic(kaddr, KM_USER0); | |
117 | free_page((long)tmp->data); | |
118 | tmp->data = NULL; | |
119 | tmp = tmp->next; | |
120 | } | |
121 | return 0; | |
122 | } | |
123 | ||
25761b6e | 124 | #ifdef CONFIG_HIGHMEM |
7bff24e2 | 125 | static unsigned int count_highmem_pages(void) |
72a97e08 RW |
126 | { |
127 | struct zone *zone; | |
128 | unsigned long zone_pfn; | |
129 | unsigned int n = 0; | |
130 | ||
131 | for_each_zone (zone) | |
132 | if (is_highmem(zone)) { | |
133 | mark_free_pages(zone); | |
134 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) { | |
135 | struct page *page; | |
136 | unsigned long pfn = zone_pfn + zone->zone_start_pfn; | |
137 | if (!pfn_valid(pfn)) | |
138 | continue; | |
139 | page = pfn_to_page(pfn); | |
140 | if (PageReserved(page)) | |
141 | continue; | |
142 | if (PageNosaveFree(page)) | |
143 | continue; | |
144 | n++; | |
145 | } | |
146 | } | |
147 | return n; | |
148 | } | |
149 | ||
25761b6e RW |
150 | struct highmem_page { |
151 | char *data; | |
152 | struct page *page; | |
153 | struct highmem_page *next; | |
154 | }; | |
155 | ||
156 | static struct highmem_page *highmem_copy; | |
157 | ||
158 | static int save_highmem_zone(struct zone *zone) | |
159 | { | |
160 | unsigned long zone_pfn; | |
161 | mark_free_pages(zone); | |
162 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { | |
163 | struct page *page; | |
164 | struct highmem_page *save; | |
165 | void *kaddr; | |
166 | unsigned long pfn = zone_pfn + zone->zone_start_pfn; | |
167 | ||
ce6ed29f | 168 | if (!(pfn%10000)) |
25761b6e RW |
169 | printk("."); |
170 | if (!pfn_valid(pfn)) | |
171 | continue; | |
172 | page = pfn_to_page(pfn); | |
173 | /* | |
174 | * This condition results from rvmalloc() sans vmalloc_32() | |
175 | * and architectural memory reservations. This should be | |
176 | * corrected eventually when the cases giving rise to this | |
177 | * are better understood. | |
178 | */ | |
c8adb494 | 179 | if (PageReserved(page)) |
25761b6e | 180 | continue; |
25761b6e RW |
181 | BUG_ON(PageNosave(page)); |
182 | if (PageNosaveFree(page)) | |
183 | continue; | |
184 | save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC); | |
185 | if (!save) | |
186 | return -ENOMEM; | |
187 | save->next = highmem_copy; | |
188 | save->page = page; | |
189 | save->data = (void *) get_zeroed_page(GFP_ATOMIC); | |
190 | if (!save->data) { | |
191 | kfree(save); | |
192 | return -ENOMEM; | |
193 | } | |
194 | kaddr = kmap_atomic(page, KM_USER0); | |
195 | memcpy(save->data, kaddr, PAGE_SIZE); | |
196 | kunmap_atomic(kaddr, KM_USER0); | |
197 | highmem_copy = save; | |
198 | } | |
199 | return 0; | |
200 | } | |
25761b6e | 201 | |
7bff24e2 | 202 | static int save_highmem(void) |
25761b6e | 203 | { |
25761b6e RW |
204 | struct zone *zone; |
205 | int res = 0; | |
206 | ||
ce6ed29f | 207 | pr_debug("swsusp: Saving Highmem"); |
e4e4d665 | 208 | drain_local_pages(); |
25761b6e RW |
209 | for_each_zone (zone) { |
210 | if (is_highmem(zone)) | |
211 | res = save_highmem_zone(zone); | |
212 | if (res) | |
213 | return res; | |
214 | } | |
ce6ed29f | 215 | printk("\n"); |
25761b6e RW |
216 | return 0; |
217 | } | |
218 | ||
7bff24e2 | 219 | static int restore_highmem(void) |
25761b6e | 220 | { |
25761b6e RW |
221 | printk("swsusp: Restoring Highmem\n"); |
222 | while (highmem_copy) { | |
223 | struct highmem_page *save = highmem_copy; | |
224 | void *kaddr; | |
225 | highmem_copy = save->next; | |
226 | ||
227 | kaddr = kmap_atomic(save->page, KM_USER0); | |
228 | memcpy(kaddr, save->data, PAGE_SIZE); | |
229 | kunmap_atomic(kaddr, KM_USER0); | |
230 | free_page((long) save->data); | |
231 | kfree(save); | |
232 | } | |
25761b6e RW |
233 | return 0; |
234 | } | |
ce4ab001 | 235 | #else |
7bff24e2 AB |
236 | static inline unsigned int count_highmem_pages(void) {return 0;} |
237 | static inline int save_highmem(void) {return 0;} | |
238 | static inline int restore_highmem(void) {return 0;} | |
0fbeb5a4 | 239 | #endif |
25761b6e | 240 | |
ce4ab001 SL |
241 | unsigned int count_special_pages(void) |
242 | { | |
243 | return count_arch_pages() + count_highmem_pages(); | |
244 | } | |
245 | ||
246 | int save_special_mem(void) | |
247 | { | |
248 | int ret; | |
249 | ret = save_arch_mem(); | |
250 | if (!ret) | |
251 | ret = save_highmem(); | |
252 | return ret; | |
253 | } | |
254 | ||
255 | int restore_special_mem(void) | |
256 | { | |
257 | int ret; | |
258 | ret = restore_arch_mem(); | |
259 | if (!ret) | |
260 | ret = restore_highmem(); | |
261 | return ret; | |
262 | } | |
263 | ||
25761b6e RW |
264 | static int pfn_is_nosave(unsigned long pfn) |
265 | { | |
266 | unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; | |
267 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; | |
268 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); | |
269 | } | |
270 | ||
271 | /** | |
272 | * saveable - Determine whether a page should be cloned or not. | |
273 | * @pfn: The page | |
274 | * | |
275 | * We save a page if it's Reserved, and not in the range of pages | |
276 | * statically defined as 'unsaveable', or if it isn't reserved, and | |
277 | * isn't part of a free chunk of pages. | |
278 | */ | |
279 | ||
de491861 | 280 | static int saveable(struct zone *zone, unsigned long *zone_pfn) |
25761b6e RW |
281 | { |
282 | unsigned long pfn = *zone_pfn + zone->zone_start_pfn; | |
de491861 | 283 | struct page *page; |
25761b6e RW |
284 | |
285 | if (!pfn_valid(pfn)) | |
286 | return 0; | |
287 | ||
288 | page = pfn_to_page(pfn); | |
25761b6e RW |
289 | if (PageNosave(page)) |
290 | return 0; | |
72a97e08 | 291 | if (PageReserved(page) && pfn_is_nosave(pfn)) |
25761b6e | 292 | return 0; |
25761b6e RW |
293 | if (PageNosaveFree(page)) |
294 | return 0; | |
295 | ||
296 | return 1; | |
297 | } | |
298 | ||
72a97e08 | 299 | unsigned int count_data_pages(void) |
25761b6e RW |
300 | { |
301 | struct zone *zone; | |
302 | unsigned long zone_pfn; | |
dc19d507 | 303 | unsigned int n = 0; |
25761b6e | 304 | |
25761b6e RW |
305 | for_each_zone (zone) { |
306 | if (is_highmem(zone)) | |
307 | continue; | |
308 | mark_free_pages(zone); | |
309 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | |
a0f49651 | 310 | n += saveable(zone, &zone_pfn); |
25761b6e | 311 | } |
a0f49651 | 312 | return n; |
25761b6e RW |
313 | } |
314 | ||
a0f49651 | 315 | static void copy_data_pages(struct pbe *pblist) |
25761b6e RW |
316 | { |
317 | struct zone *zone; | |
318 | unsigned long zone_pfn; | |
a0f49651 | 319 | struct pbe *pbe, *p; |
25761b6e | 320 | |
a0f49651 | 321 | pbe = pblist; |
25761b6e RW |
322 | for_each_zone (zone) { |
323 | if (is_highmem(zone)) | |
324 | continue; | |
325 | mark_free_pages(zone); | |
326 | /* This is necessary for swsusp_free() */ | |
a0f49651 | 327 | for_each_pb_page (p, pblist) |
25761b6e | 328 | SetPageNosaveFree(virt_to_page(p)); |
a0f49651 | 329 | for_each_pbe (p, pblist) |
25761b6e RW |
330 | SetPageNosaveFree(virt_to_page(p->address)); |
331 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { | |
332 | if (saveable(zone, &zone_pfn)) { | |
de491861 | 333 | struct page *page; |
25761b6e RW |
334 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); |
335 | BUG_ON(!pbe); | |
336 | pbe->orig_address = (unsigned long)page_address(page); | |
337 | /* copy_page is not usable for copying task structs. */ | |
338 | memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE); | |
339 | pbe = pbe->next; | |
340 | } | |
341 | } | |
342 | } | |
343 | BUG_ON(pbe); | |
344 | } | |
345 | ||
346 | ||
347 | /** | |
348 | * free_pagedir - free pages allocated with alloc_pagedir() | |
349 | */ | |
350 | ||
4a3b98a4 | 351 | static void free_pagedir(struct pbe *pblist, int clear_nosave_free) |
25761b6e RW |
352 | { |
353 | struct pbe *pbe; | |
354 | ||
355 | while (pblist) { | |
356 | pbe = (pblist + PB_PAGE_SKIP)->next; | |
357 | ClearPageNosave(virt_to_page(pblist)); | |
4a3b98a4 RW |
358 | if (clear_nosave_free) |
359 | ClearPageNosaveFree(virt_to_page(pblist)); | |
25761b6e RW |
360 | free_page((unsigned long)pblist); |
361 | pblist = pbe; | |
362 | } | |
363 | } | |
364 | ||
365 | /** | |
366 | * fill_pb_page - Create a list of PBEs on a given memory page | |
367 | */ | |
368 | ||
369 | static inline void fill_pb_page(struct pbe *pbpage) | |
370 | { | |
371 | struct pbe *p; | |
372 | ||
373 | p = pbpage; | |
374 | pbpage += PB_PAGE_SKIP; | |
375 | do | |
376 | p->next = p + 1; | |
377 | while (++p < pbpage); | |
378 | } | |
379 | ||
380 | /** | |
381 | * create_pbe_list - Create a list of PBEs on top of a given chain | |
382 | * of memory pages allocated with alloc_pagedir() | |
383 | */ | |
384 | ||
7088a5c0 | 385 | static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) |
25761b6e RW |
386 | { |
387 | struct pbe *pbpage, *p; | |
dc19d507 | 388 | unsigned int num = PBES_PER_PAGE; |
25761b6e RW |
389 | |
390 | for_each_pb_page (pbpage, pblist) { | |
391 | if (num >= nr_pages) | |
392 | break; | |
393 | ||
394 | fill_pb_page(pbpage); | |
395 | num += PBES_PER_PAGE; | |
396 | } | |
397 | if (pbpage) { | |
398 | for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++) | |
399 | p->next = p + 1; | |
400 | p->next = NULL; | |
401 | } | |
25761b6e RW |
402 | } |
403 | ||
72a97e08 RW |
404 | /** |
405 | * On resume it is necessary to trace and eventually free the unsafe | |
406 | * pages that have been allocated, because they are needed for I/O | |
407 | * (on x86-64 we likely will "eat" these pages once again while | |
408 | * creating the temporary page translation tables) | |
409 | */ | |
410 | ||
411 | struct eaten_page { | |
412 | struct eaten_page *next; | |
413 | char padding[PAGE_SIZE - sizeof(void *)]; | |
414 | }; | |
415 | ||
416 | static struct eaten_page *eaten_pages = NULL; | |
417 | ||
f577eb30 | 418 | static void release_eaten_pages(void) |
72a97e08 RW |
419 | { |
420 | struct eaten_page *p, *q; | |
421 | ||
422 | p = eaten_pages; | |
423 | while (p) { | |
424 | q = p->next; | |
425 | /* We don't want swsusp_free() to free this page again */ | |
426 | ClearPageNosave(virt_to_page(p)); | |
427 | free_page((unsigned long)p); | |
428 | p = q; | |
429 | } | |
430 | eaten_pages = NULL; | |
431 | } | |
432 | ||
054bd4c1 RW |
433 | /** |
434 | * @safe_needed - on resume, for storing the PBE list and the image, | |
435 | * we can only use memory pages that do not conflict with the pages | |
436 | * which had been used before suspend. | |
437 | * | |
438 | * The unsafe pages are marked with the PG_nosave_free flag | |
439 | * | |
440 | * Allocated but unusable (ie eaten) memory pages should be marked | |
441 | * so that swsusp_free() can release them | |
442 | */ | |
443 | ||
444 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | |
25761b6e | 445 | { |
054bd4c1 RW |
446 | void *res; |
447 | ||
448 | if (safe_needed) | |
449 | do { | |
450 | res = (void *)get_zeroed_page(gfp_mask); | |
72a97e08 | 451 | if (res && PageNosaveFree(virt_to_page(res))) { |
054bd4c1 RW |
452 | /* This is for swsusp_free() */ |
453 | SetPageNosave(virt_to_page(res)); | |
72a97e08 RW |
454 | ((struct eaten_page *)res)->next = eaten_pages; |
455 | eaten_pages = res; | |
456 | } | |
054bd4c1 RW |
457 | } while (res && PageNosaveFree(virt_to_page(res))); |
458 | else | |
459 | res = (void *)get_zeroed_page(gfp_mask); | |
25761b6e RW |
460 | if (res) { |
461 | SetPageNosave(virt_to_page(res)); | |
462 | SetPageNosaveFree(virt_to_page(res)); | |
463 | } | |
464 | return res; | |
465 | } | |
466 | ||
054bd4c1 RW |
467 | unsigned long get_safe_page(gfp_t gfp_mask) |
468 | { | |
469 | return (unsigned long)alloc_image_page(gfp_mask, 1); | |
470 | } | |
471 | ||
25761b6e RW |
472 | /** |
473 | * alloc_pagedir - Allocate the page directory. | |
474 | * | |
475 | * First, determine exactly how many pages we need and | |
476 | * allocate them. | |
477 | * | |
478 | * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE | |
479 | * struct pbe elements (pbes) and the last element in the page points | |
480 | * to the next page. | |
481 | * | |
482 | * On each page we set up a list of struct_pbe elements. | |
483 | */ | |
484 | ||
7bff24e2 AB |
485 | static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, |
486 | int safe_needed) | |
25761b6e | 487 | { |
dc19d507 | 488 | unsigned int num; |
25761b6e RW |
489 | struct pbe *pblist, *pbe; |
490 | ||
491 | if (!nr_pages) | |
492 | return NULL; | |
493 | ||
054bd4c1 | 494 | pblist = alloc_image_page(gfp_mask, safe_needed); |
25761b6e RW |
495 | /* FIXME: rewrite this ugly loop */ |
496 | for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; | |
497 | pbe = pbe->next, num += PBES_PER_PAGE) { | |
498 | pbe += PB_PAGE_SKIP; | |
054bd4c1 | 499 | pbe->next = alloc_image_page(gfp_mask, safe_needed); |
25761b6e RW |
500 | } |
501 | if (!pbe) { /* get_zeroed_page() failed */ | |
4a3b98a4 | 502 | free_pagedir(pblist, 1); |
25761b6e | 503 | pblist = NULL; |
7088a5c0 | 504 | } else |
6e1819d6 | 505 | create_pbe_list(pblist, nr_pages); |
25761b6e RW |
506 | return pblist; |
507 | } | |
508 | ||
509 | /** | |
510 | * Free pages we allocated for suspend. Suspend pages are alocated | |
511 | * before atomic copy, so we need to free them after resume. | |
512 | */ | |
513 | ||
514 | void swsusp_free(void) | |
515 | { | |
516 | struct zone *zone; | |
517 | unsigned long zone_pfn; | |
518 | ||
519 | for_each_zone(zone) { | |
520 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | |
521 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { | |
dc19d507 | 522 | struct page *page; |
25761b6e RW |
523 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); |
524 | if (PageNosave(page) && PageNosaveFree(page)) { | |
525 | ClearPageNosave(page); | |
526 | ClearPageNosaveFree(page); | |
527 | free_page((long) page_address(page)); | |
528 | } | |
529 | } | |
530 | } | |
f577eb30 RW |
531 | nr_copy_pages = 0; |
532 | nr_meta_pages = 0; | |
533 | pagedir_nosave = NULL; | |
6e1819d6 | 534 | buffer = NULL; |
25761b6e RW |
535 | } |
536 | ||
537 | ||
538 | /** | |
539 | * enough_free_mem - Make sure we enough free memory to snapshot. | |
540 | * | |
541 | * Returns TRUE or FALSE after checking the number of available | |
542 | * free pages. | |
543 | */ | |
544 | ||
dc19d507 | 545 | static int enough_free_mem(unsigned int nr_pages) |
25761b6e | 546 | { |
e5e2fa78 RW |
547 | struct zone *zone; |
548 | unsigned int n = 0; | |
549 | ||
550 | for_each_zone (zone) | |
551 | if (!is_highmem(zone)) | |
552 | n += zone->free_pages; | |
553 | pr_debug("swsusp: available memory: %u pages\n", n); | |
554 | return n > (nr_pages + PAGES_FOR_IO + | |
a0f49651 | 555 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); |
25761b6e RW |
556 | } |
557 | ||
f577eb30 | 558 | static int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed) |
054bd4c1 RW |
559 | { |
560 | struct pbe *p; | |
561 | ||
562 | for_each_pbe (p, pblist) { | |
563 | p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed); | |
564 | if (!p->address) | |
565 | return -ENOMEM; | |
566 | } | |
567 | return 0; | |
568 | } | |
25761b6e | 569 | |
dc19d507 | 570 | static struct pbe *swsusp_alloc(unsigned int nr_pages) |
25761b6e | 571 | { |
054bd4c1 | 572 | struct pbe *pblist; |
25761b6e | 573 | |
054bd4c1 | 574 | if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) { |
25761b6e | 575 | printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); |
a0f49651 | 576 | return NULL; |
25761b6e | 577 | } |
25761b6e | 578 | |
054bd4c1 RW |
579 | if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) { |
580 | printk(KERN_ERR "suspend: Allocating image pages failed.\n"); | |
581 | swsusp_free(); | |
582 | return NULL; | |
25761b6e RW |
583 | } |
584 | ||
a0f49651 | 585 | return pblist; |
25761b6e RW |
586 | } |
587 | ||
2e32a43e | 588 | asmlinkage int swsusp_save(void) |
25761b6e | 589 | { |
dc19d507 | 590 | unsigned int nr_pages; |
25761b6e RW |
591 | |
592 | pr_debug("swsusp: critical section: \n"); | |
25761b6e RW |
593 | |
594 | drain_local_pages(); | |
a0f49651 RW |
595 | nr_pages = count_data_pages(); |
596 | printk("swsusp: Need to copy %u pages\n", nr_pages); | |
25761b6e RW |
597 | |
598 | pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n", | |
a0f49651 RW |
599 | nr_pages, |
600 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE, | |
25761b6e RW |
601 | PAGES_FOR_IO, nr_free_pages()); |
602 | ||
a0f49651 | 603 | if (!enough_free_mem(nr_pages)) { |
25761b6e RW |
604 | printk(KERN_ERR "swsusp: Not enough free memory\n"); |
605 | return -ENOMEM; | |
606 | } | |
607 | ||
a0f49651 RW |
608 | pagedir_nosave = swsusp_alloc(nr_pages); |
609 | if (!pagedir_nosave) | |
610 | return -ENOMEM; | |
25761b6e RW |
611 | |
612 | /* During allocating of suspend pagedir, new cold pages may appear. | |
613 | * Kill them. | |
614 | */ | |
615 | drain_local_pages(); | |
a0f49651 | 616 | copy_data_pages(pagedir_nosave); |
25761b6e RW |
617 | |
618 | /* | |
619 | * End of critical section. From now on, we can write to memory, | |
620 | * but we should not touch disk. This specially means we must _not_ | |
621 | * touch swap space! Except we must write out our image of course. | |
622 | */ | |
623 | ||
a0f49651 | 624 | nr_copy_pages = nr_pages; |
f577eb30 | 625 | nr_meta_pages = (nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT; |
a0f49651 RW |
626 | |
627 | printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages); | |
25761b6e RW |
628 | return 0; |
629 | } | |
f577eb30 RW |
630 | |
631 | static void init_header(struct swsusp_info *info) | |
632 | { | |
633 | memset(info, 0, sizeof(struct swsusp_info)); | |
634 | info->version_code = LINUX_VERSION_CODE; | |
635 | info->num_physpages = num_physpages; | |
636 | memcpy(&info->uts, &system_utsname, sizeof(system_utsname)); | |
637 | info->cpus = num_online_cpus(); | |
638 | info->image_pages = nr_copy_pages; | |
639 | info->pages = nr_copy_pages + nr_meta_pages + 1; | |
6e1819d6 RW |
640 | info->size = info->pages; |
641 | info->size <<= PAGE_SHIFT; | |
f577eb30 RW |
642 | } |
643 | ||
644 | /** | |
645 | * pack_orig_addresses - the .orig_address fields of the PBEs from the | |
646 | * list starting at @pbe are stored in the array @buf[] (1 page) | |
647 | */ | |
648 | ||
649 | static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pbe) | |
650 | { | |
651 | int j; | |
652 | ||
653 | for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { | |
654 | buf[j] = pbe->orig_address; | |
655 | pbe = pbe->next; | |
656 | } | |
657 | if (!pbe) | |
658 | for (; j < PAGE_SIZE / sizeof(long); j++) | |
659 | buf[j] = 0; | |
660 | return pbe; | |
661 | } | |
662 | ||
663 | /** | |
664 | * snapshot_read_next - used for reading the system memory snapshot. | |
665 | * | |
666 | * On the first call to it @handle should point to a zeroed | |
667 | * snapshot_handle structure. The structure gets updated and a pointer | |
668 | * to it should be passed to this function every next time. | |
669 | * | |
670 | * The @count parameter should contain the number of bytes the caller | |
671 | * wants to read from the snapshot. It must not be zero. | |
672 | * | |
673 | * On success the function returns a positive number. Then, the caller | |
674 | * is allowed to read up to the returned number of bytes from the memory | |
675 | * location computed by the data_of() macro. The number returned | |
676 | * may be smaller than @count, but this only happens if the read would | |
677 | * cross a page boundary otherwise. | |
678 | * | |
679 | * The function returns 0 to indicate the end of data stream condition, | |
680 | * and a negative number is returned on error. In such cases the | |
681 | * structure pointed to by @handle is not updated and should not be used | |
682 | * any more. | |
683 | */ | |
684 | ||
685 | int snapshot_read_next(struct snapshot_handle *handle, size_t count) | |
686 | { | |
f577eb30 RW |
687 | if (handle->page > nr_meta_pages + nr_copy_pages) |
688 | return 0; | |
689 | if (!buffer) { | |
690 | /* This makes the buffer be freed by swsusp_free() */ | |
691 | buffer = alloc_image_page(GFP_ATOMIC, 0); | |
692 | if (!buffer) | |
693 | return -ENOMEM; | |
694 | } | |
695 | if (!handle->offset) { | |
696 | init_header((struct swsusp_info *)buffer); | |
697 | handle->buffer = buffer; | |
698 | handle->pbe = pagedir_nosave; | |
699 | } | |
700 | if (handle->prev < handle->page) { | |
701 | if (handle->page <= nr_meta_pages) { | |
702 | handle->pbe = pack_orig_addresses(buffer, handle->pbe); | |
703 | if (!handle->pbe) | |
704 | handle->pbe = pagedir_nosave; | |
705 | } else { | |
706 | handle->buffer = (void *)handle->pbe->address; | |
707 | handle->pbe = handle->pbe->next; | |
708 | } | |
709 | handle->prev = handle->page; | |
710 | } | |
711 | handle->buf_offset = handle->page_offset; | |
712 | if (handle->page_offset + count >= PAGE_SIZE) { | |
713 | count = PAGE_SIZE - handle->page_offset; | |
714 | handle->page_offset = 0; | |
715 | handle->page++; | |
716 | } else { | |
717 | handle->page_offset += count; | |
718 | } | |
719 | handle->offset += count; | |
720 | return count; | |
721 | } | |
722 | ||
723 | /** | |
724 | * mark_unsafe_pages - mark the pages that cannot be used for storing | |
725 | * the image during resume, because they conflict with the pages that | |
726 | * had been used before suspend | |
727 | */ | |
728 | ||
729 | static int mark_unsafe_pages(struct pbe *pblist) | |
730 | { | |
731 | struct zone *zone; | |
732 | unsigned long zone_pfn; | |
733 | struct pbe *p; | |
734 | ||
735 | if (!pblist) /* a sanity check */ | |
736 | return -EINVAL; | |
737 | ||
738 | /* Clear page flags */ | |
739 | for_each_zone (zone) { | |
740 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | |
741 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) | |
742 | ClearPageNosaveFree(pfn_to_page(zone_pfn + | |
743 | zone->zone_start_pfn)); | |
744 | } | |
745 | ||
746 | /* Mark orig addresses */ | |
747 | for_each_pbe (p, pblist) { | |
748 | if (virt_addr_valid(p->orig_address)) | |
749 | SetPageNosaveFree(virt_to_page(p->orig_address)); | |
750 | else | |
751 | return -EFAULT; | |
752 | } | |
753 | ||
754 | return 0; | |
755 | } | |
756 | ||
757 | static void copy_page_backup_list(struct pbe *dst, struct pbe *src) | |
758 | { | |
759 | /* We assume both lists contain the same number of elements */ | |
760 | while (src) { | |
761 | dst->orig_address = src->orig_address; | |
762 | dst = dst->next; | |
763 | src = src->next; | |
764 | } | |
765 | } | |
766 | ||
767 | static int check_header(struct swsusp_info *info) | |
768 | { | |
769 | char *reason = NULL; | |
770 | ||
771 | if (info->version_code != LINUX_VERSION_CODE) | |
772 | reason = "kernel version"; | |
773 | if (info->num_physpages != num_physpages) | |
774 | reason = "memory size"; | |
775 | if (strcmp(info->uts.sysname,system_utsname.sysname)) | |
776 | reason = "system type"; | |
777 | if (strcmp(info->uts.release,system_utsname.release)) | |
778 | reason = "kernel release"; | |
779 | if (strcmp(info->uts.version,system_utsname.version)) | |
780 | reason = "version"; | |
781 | if (strcmp(info->uts.machine,system_utsname.machine)) | |
782 | reason = "machine"; | |
783 | if (reason) { | |
784 | printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason); | |
785 | return -EPERM; | |
786 | } | |
787 | return 0; | |
788 | } | |
789 | ||
790 | /** | |
791 | * load header - check the image header and copy data from it | |
792 | */ | |
793 | ||
794 | static int load_header(struct snapshot_handle *handle, | |
795 | struct swsusp_info *info) | |
796 | { | |
797 | int error; | |
798 | struct pbe *pblist; | |
799 | ||
800 | error = check_header(info); | |
801 | if (!error) { | |
802 | pblist = alloc_pagedir(info->image_pages, GFP_ATOMIC, 0); | |
803 | if (!pblist) | |
804 | return -ENOMEM; | |
805 | pagedir_nosave = pblist; | |
806 | handle->pbe = pblist; | |
807 | nr_copy_pages = info->image_pages; | |
808 | nr_meta_pages = info->pages - info->image_pages - 1; | |
809 | } | |
810 | return error; | |
811 | } | |
812 | ||
813 | /** | |
814 | * unpack_orig_addresses - copy the elements of @buf[] (1 page) to | |
815 | * the PBEs in the list starting at @pbe | |
816 | */ | |
817 | ||
818 | static inline struct pbe *unpack_orig_addresses(unsigned long *buf, | |
819 | struct pbe *pbe) | |
820 | { | |
821 | int j; | |
822 | ||
823 | for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { | |
824 | pbe->orig_address = buf[j]; | |
825 | pbe = pbe->next; | |
826 | } | |
827 | return pbe; | |
828 | } | |
829 | ||
830 | /** | |
831 | * create_image - use metadata contained in the PBE list | |
832 | * pointed to by pagedir_nosave to mark the pages that will | |
833 | * be overwritten in the process of restoring the system | |
834 | * memory state from the image and allocate memory for | |
835 | * the image avoiding these pages | |
836 | */ | |
837 | ||
838 | static int create_image(struct snapshot_handle *handle) | |
839 | { | |
840 | int error = 0; | |
841 | struct pbe *p, *pblist; | |
842 | ||
843 | p = pagedir_nosave; | |
844 | error = mark_unsafe_pages(p); | |
845 | if (!error) { | |
846 | pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); | |
847 | if (pblist) | |
848 | copy_page_backup_list(pblist, p); | |
4a3b98a4 | 849 | free_pagedir(p, 0); |
f577eb30 RW |
850 | if (!pblist) |
851 | error = -ENOMEM; | |
852 | } | |
853 | if (!error) | |
854 | error = alloc_data_pages(pblist, GFP_ATOMIC, 1); | |
855 | if (!error) { | |
856 | release_eaten_pages(); | |
857 | pagedir_nosave = pblist; | |
858 | } else { | |
859 | pagedir_nosave = NULL; | |
860 | handle->pbe = NULL; | |
861 | nr_copy_pages = 0; | |
862 | nr_meta_pages = 0; | |
863 | } | |
864 | return error; | |
865 | } | |
866 | ||
867 | /** | |
868 | * snapshot_write_next - used for writing the system memory snapshot. | |
869 | * | |
870 | * On the first call to it @handle should point to a zeroed | |
871 | * snapshot_handle structure. The structure gets updated and a pointer | |
872 | * to it should be passed to this function every next time. | |
873 | * | |
874 | * The @count parameter should contain the number of bytes the caller | |
875 | * wants to write to the image. It must not be zero. | |
876 | * | |
877 | * On success the function returns a positive number. Then, the caller | |
878 | * is allowed to write up to the returned number of bytes to the memory | |
879 | * location computed by the data_of() macro. The number returned | |
880 | * may be smaller than @count, but this only happens if the write would | |
881 | * cross a page boundary otherwise. | |
882 | * | |
883 | * The function returns 0 to indicate the "end of file" condition, | |
884 | * and a negative number is returned on error. In such cases the | |
885 | * structure pointed to by @handle is not updated and should not be used | |
886 | * any more. | |
887 | */ | |
888 | ||
889 | int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |
890 | { | |
f577eb30 RW |
891 | int error = 0; |
892 | ||
893 | if (handle->prev && handle->page > nr_meta_pages + nr_copy_pages) | |
894 | return 0; | |
895 | if (!buffer) { | |
896 | /* This makes the buffer be freed by swsusp_free() */ | |
897 | buffer = alloc_image_page(GFP_ATOMIC, 0); | |
898 | if (!buffer) | |
899 | return -ENOMEM; | |
900 | } | |
901 | if (!handle->offset) | |
902 | handle->buffer = buffer; | |
903 | if (handle->prev < handle->page) { | |
904 | if (!handle->prev) { | |
905 | error = load_header(handle, (struct swsusp_info *)buffer); | |
906 | if (error) | |
907 | return error; | |
908 | } else if (handle->prev <= nr_meta_pages) { | |
909 | handle->pbe = unpack_orig_addresses(buffer, handle->pbe); | |
910 | if (!handle->pbe) { | |
911 | error = create_image(handle); | |
912 | if (error) | |
913 | return error; | |
914 | handle->pbe = pagedir_nosave; | |
915 | handle->buffer = (void *)handle->pbe->address; | |
916 | } | |
917 | } else { | |
918 | handle->pbe = handle->pbe->next; | |
919 | handle->buffer = (void *)handle->pbe->address; | |
920 | } | |
921 | handle->prev = handle->page; | |
922 | } | |
923 | handle->buf_offset = handle->page_offset; | |
924 | if (handle->page_offset + count >= PAGE_SIZE) { | |
925 | count = PAGE_SIZE - handle->page_offset; | |
926 | handle->page_offset = 0; | |
927 | handle->page++; | |
928 | } else { | |
929 | handle->page_offset += count; | |
930 | } | |
931 | handle->offset += count; | |
932 | return count; | |
933 | } | |
934 | ||
935 | int snapshot_image_loaded(struct snapshot_handle *handle) | |
936 | { | |
937 | return !(!handle->pbe || handle->pbe->next || !nr_copy_pages || | |
938 | handle->page <= nr_meta_pages + nr_copy_pages); | |
939 | } |