Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[linux-2.6-block.git] / arch / x86 / power / hibernate_64.c
CommitLineData
ef8b03fa
RW
1/*
2 * Hibernation support for x86-64
3 *
4 * Distribute under GPLv2
5 *
6 * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
a2531293 7 * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
ef8b03fa
RW
8 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
9 */
10
5a0e3ad6 11#include <linux/gfp.h>
ef8b03fa
RW
12#include <linux/smp.h>
13#include <linux/suspend.h>
62a03def
CY
14#include <linux/scatterlist.h>
15#include <linux/kdebug.h>
16
17#include <crypto/hash.h>
8b78c21d 18
5520b7e7 19#include <asm/e820/api.h>
8b78c21d 20#include <asm/init.h>
ef8b03fa
RW
21#include <asm/proto.h>
22#include <asm/page.h>
23#include <asm/pgtable.h>
24#include <asm/mtrr.h>
7f8998c7 25#include <asm/sections.h>
a8af7898 26#include <asm/suspend.h>
65c0554b 27#include <asm/tlbflush.h>
ef8b03fa 28
261f0ce5 29/* Defined in hibernate_asm_64.S */
2605fc21 30extern asmlinkage __visible int restore_image(void);
ef8b03fa
RW
31
32/*
33 * Address to jump to in the last phase of restore in order to get to the image
34 * kernel's text (this value is passed in the image header).
35 */
d6efc2f7 36unsigned long restore_jump_address __visible;
65c0554b 37unsigned long jump_address_phys;
ef8b03fa
RW
38
39/*
40 * Value of the cr3 register from before the hibernation (this value is passed
41 * in the image header).
42 */
d6efc2f7 43unsigned long restore_cr3 __visible;
ef8b03fa 44
c226fab4 45unsigned long temp_level4_pgt __visible;
ef8b03fa 46
65c0554b
RW
47unsigned long relocated_restore_code __visible;
48
c226fab4 49static int set_up_temporary_text_mapping(pgd_t *pgd)
65c0554b
RW
50{
51 pmd_t *pmd;
52 pud_t *pud;
91f606a8 53 p4d_t *p4d = NULL;
fb43d6cb
DH
54 pgprot_t pgtable_prot = __pgprot(_KERNPG_TABLE);
55 pgprot_t pmd_text_prot = __pgprot(__PAGE_KERNEL_LARGE_EXEC);
56
57 /* Filter out unsupported __PAGE_KERNEL* bits: */
58 pgprot_val(pmd_text_prot) &= __default_kernel_pte_mask;
59 pgprot_val(pgtable_prot) &= __default_kernel_pte_mask;
65c0554b
RW
60
61 /*
62 * The new mapping only has to cover the page containing the image
63 * kernel's entry point (jump_address_phys), because the switch over to
64 * it is carried out by relocated code running from a page allocated
65 * specifically for this purpose and covered by the identity mapping, so
66 * the temporary kernel text mapping is only needed for the final jump.
67 * Moreover, in that mapping the virtual address of the image kernel's
68 * entry point must be the same as its virtual address in the image
69 * kernel (restore_jump_address), so the image kernel's
70 * restore_registers() code doesn't find itself in a different area of
71 * the virtual address space after switching over to the original page
72 * tables used by the image kernel.
73 */
06c830a4 74
91f606a8 75 if (pgtable_l5_enabled) {
06c830a4
KS
76 p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
77 if (!p4d)
78 return -ENOMEM;
79 }
80
65c0554b
RW
81 pud = (pud_t *)get_safe_page(GFP_ATOMIC);
82 if (!pud)
83 return -ENOMEM;
84
85 pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
86 if (!pmd)
87 return -ENOMEM;
88
89 set_pmd(pmd + pmd_index(restore_jump_address),
fb43d6cb 90 __pmd((jump_address_phys & PMD_MASK) | pgprot_val(pmd_text_prot)));
65c0554b 91 set_pud(pud + pud_index(restore_jump_address),
fb43d6cb 92 __pud(__pa(pmd) | pgprot_val(pgtable_prot)));
91f606a8 93 if (p4d) {
fb43d6cb
DH
94 p4d_t new_p4d = __p4d(__pa(pud) | pgprot_val(pgtable_prot));
95 pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot));
96
97 set_p4d(p4d + p4d_index(restore_jump_address), new_p4d);
98 set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
06c830a4
KS
99 } else {
100 /* No p4d for 4-level paging: point the pgd to the pud page table */
05189820 101 pgd_t new_pgd = __pgd(__pa(pud) | pgprot_val(pgtable_prot));
fb43d6cb 102 set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
06c830a4 103 }
65c0554b
RW
104
105 return 0;
106}
ef8b03fa 107
8b78c21d 108static void *alloc_pgt_page(void *context)
ef8b03fa 109{
8b78c21d 110 return (void *)get_safe_page(GFP_ATOMIC);
ef8b03fa
RW
111}
112
113static int set_up_temporary_mappings(void)
114{
8b78c21d
YL
115 struct x86_mapping_info info = {
116 .alloc_pgt_page = alloc_pgt_page,
66aad4fd 117 .page_flag = __PAGE_KERNEL_LARGE_EXEC,
e4630fdd 118 .offset = __PAGE_OFFSET,
8b78c21d
YL
119 };
120 unsigned long mstart, mend;
c226fab4 121 pgd_t *pgd;
8b78c21d
YL
122 int result;
123 int i;
ef8b03fa 124
c226fab4
RW
125 pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
126 if (!pgd)
ef8b03fa
RW
127 return -ENOMEM;
128
65c0554b 129 /* Prepare a temporary mapping for the kernel text */
c226fab4 130 result = set_up_temporary_text_mapping(pgd);
65c0554b
RW
131 if (result)
132 return result;
ef8b03fa
RW
133
134 /* Set up the direct mapping from scratch */
8b78c21d
YL
135 for (i = 0; i < nr_pfn_mapped; i++) {
136 mstart = pfn_mapped[i].start << PAGE_SHIFT;
137 mend = pfn_mapped[i].end << PAGE_SHIFT;
138
c226fab4 139 result = kernel_ident_mapping_init(&info, pgd, mstart, mend);
8b78c21d
YL
140 if (result)
141 return result;
ef8b03fa 142 }
8b78c21d 143
5d87f493 144 temp_level4_pgt = __pa(pgd);
ef8b03fa
RW
145 return 0;
146}
147
65c0554b
RW
148static int relocate_restore_code(void)
149{
150 pgd_t *pgd;
06c830a4 151 p4d_t *p4d;
65c0554b 152 pud_t *pud;
06c830a4
KS
153 pmd_t *pmd;
154 pte_t *pte;
65c0554b
RW
155
156 relocated_restore_code = get_safe_page(GFP_ATOMIC);
157 if (!relocated_restore_code)
158 return -ENOMEM;
159
c0944883 160 memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
65c0554b
RW
161
162 /* Make the page containing the relocated code executable */
6c690ee1
AL
163 pgd = (pgd_t *)__va(read_cr3_pa()) +
164 pgd_index(relocated_restore_code);
06c830a4
KS
165 p4d = p4d_offset(pgd, relocated_restore_code);
166 if (p4d_large(*p4d)) {
167 set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
168 goto out;
169 }
170 pud = pud_offset(p4d, relocated_restore_code);
65c0554b
RW
171 if (pud_large(*pud)) {
172 set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
06c830a4
KS
173 goto out;
174 }
175 pmd = pmd_offset(pud, relocated_restore_code);
176 if (pmd_large(*pmd)) {
177 set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
178 goto out;
65c0554b 179 }
06c830a4
KS
180 pte = pte_offset_kernel(pmd, relocated_restore_code);
181 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
182out:
65c0554b 183 __flush_tlb_all();
65c0554b
RW
184 return 0;
185}
186
328008a7 187asmlinkage int swsusp_arch_resume(void)
ef8b03fa
RW
188{
189 int error;
190
191 /* We have got enough memory and from now on we cannot recover */
65c0554b
RW
192 error = set_up_temporary_mappings();
193 if (error)
ef8b03fa
RW
194 return error;
195
65c0554b
RW
196 error = relocate_restore_code();
197 if (error)
198 return error;
ef8b03fa
RW
199
200 restore_image();
201 return 0;
202}
203
204/*
205 * pfn_is_nosave - check if given pfn is in the 'nosave' section
206 */
207
208int pfn_is_nosave(unsigned long pfn)
209{
210 unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
211 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
212 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
213}
214
62a03def
CY
215#define MD5_DIGEST_SIZE 16
216
ef8b03fa
RW
217struct restore_data_record {
218 unsigned long jump_address;
65c0554b 219 unsigned long jump_address_phys;
ef8b03fa
RW
220 unsigned long cr3;
221 unsigned long magic;
62a03def 222 u8 e820_digest[MD5_DIGEST_SIZE];
ef8b03fa
RW
223};
224
62a03def
CY
225#define RESTORE_MAGIC 0x23456789ABCDEF01UL
226
227#if IS_BUILTIN(CONFIG_CRYPTO_MD5)
228/**
bf495573 229 * get_e820_md5 - calculate md5 according to given e820 table
62a03def 230 *
bf495573 231 * @table: the e820 table to be calculated
62a03def
CY
232 * @buf: the md5 result to be stored to
233 */
bf495573 234static int get_e820_md5(struct e820_table *table, void *buf)
62a03def
CY
235{
236 struct scatterlist sg;
237 struct crypto_ahash *tfm;
238 int size;
239 int ret = 0;
240
241 tfm = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
242 if (IS_ERR(tfm))
243 return -ENOMEM;
244
245 {
246 AHASH_REQUEST_ON_STACK(req, tfm);
bf495573 247 size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry) * table->nr_entries;
62a03def 248 ahash_request_set_tfm(req, tfm);
bf495573 249 sg_init_one(&sg, (u8 *)table, size);
62a03def
CY
250 ahash_request_set_callback(req, 0, NULL, NULL);
251 ahash_request_set_crypt(req, &sg, buf, size);
252
253 if (crypto_ahash_digest(req))
254 ret = -EINVAL;
255 ahash_request_zero(req);
256 }
257 crypto_free_ahash(tfm);
258
259 return ret;
260}
261
262static void hibernation_e820_save(void *buf)
263{
544a0f47 264 get_e820_md5(e820_table_firmware, buf);
62a03def
CY
265}
266
267static bool hibernation_e820_mismatch(void *buf)
268{
269 int ret;
270 u8 result[MD5_DIGEST_SIZE];
271
272 memset(result, 0, MD5_DIGEST_SIZE);
273 /* If there is no digest in suspend kernel, let it go. */
274 if (!memcmp(result, buf, MD5_DIGEST_SIZE))
275 return false;
276
544a0f47 277 ret = get_e820_md5(e820_table_firmware, result);
62a03def
CY
278 if (ret)
279 return true;
280
281 return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false;
282}
283#else
284static void hibernation_e820_save(void *buf)
285{
286}
287
288static bool hibernation_e820_mismatch(void *buf)
289{
290 /* If md5 is not builtin for restore kernel, let it go. */
291 return false;
292}
293#endif
ef8b03fa
RW
294
295/**
296 * arch_hibernation_header_save - populate the architecture specific part
297 * of a hibernation image header
298 * @addr: address to save the data at
299 */
300int arch_hibernation_header_save(void *addr, unsigned int max_size)
301{
302 struct restore_data_record *rdr = addr;
303
304 if (max_size < sizeof(struct restore_data_record))
305 return -EOVERFLOW;
c0944883
KC
306 rdr->jump_address = (unsigned long)restore_registers;
307 rdr->jump_address_phys = __pa_symbol(restore_registers);
f34902c5
AL
308
309 /*
310 * The restore code fixes up CR3 and CR4 in the following sequence:
311 *
312 * [in hibernation asm]
313 * 1. CR3 <= temporary page tables
314 * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
315 * 3. CR3 <= rdr->cr3
316 * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
317 * [in restore_processor_state()]
318 * 5. CR4 <= saved CR4
319 * 6. CR3 <= saved CR3
320 *
321 * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
322 * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
323 * rdr->cr3 needs to point to valid page tables but must not
324 * have any of the PCID bits set.
325 */
326 rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
327
ef8b03fa 328 rdr->magic = RESTORE_MAGIC;
62a03def
CY
329
330 hibernation_e820_save(rdr->e820_digest);
331
ef8b03fa
RW
332 return 0;
333}
334
335/**
336 * arch_hibernation_header_restore - read the architecture specific data
337 * from the hibernation image header
338 * @addr: address to read the data from
339 */
340int arch_hibernation_header_restore(void *addr)
341{
342 struct restore_data_record *rdr = addr;
343
344 restore_jump_address = rdr->jump_address;
65c0554b 345 jump_address_phys = rdr->jump_address_phys;
ef8b03fa 346 restore_cr3 = rdr->cr3;
62a03def
CY
347
348 if (rdr->magic != RESTORE_MAGIC) {
349 pr_crit("Unrecognized hibernate image header format!\n");
350 return -EINVAL;
351 }
352
353 if (hibernation_e820_mismatch(rdr->e820_digest)) {
354 pr_crit("Hibernate inconsistent memory map detected!\n");
355 return -ENODEV;
356 }
357
358 return 0;
ef8b03fa 359}