mm, dax, gpu: convert vm_insert_mixed to pfn_t
[linux-block.git] / include / asm-generic / pgtable.h
CommitLineData
1da177e4
LT
1#ifndef _ASM_GENERIC_PGTABLE_H
2#define _ASM_GENERIC_PGTABLE_H
3
673eae82 4#ifndef __ASSEMBLY__
9535239f 5#ifdef CONFIG_MMU
673eae82 6
fbd71844 7#include <linux/mm_types.h>
187f1882 8#include <linux/bug.h>
e61ce6ad 9#include <linux/errno.h>
fbd71844 10
235a8f02
KS
11#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \
12 CONFIG_PGTABLE_LEVELS
13#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED
14#endif
15
6ee8630e
HD
16/*
17 * On almost all architectures and configurations, 0 can be used as the
18 * upper ceiling to free_pgtables(): on many architectures it has the same
19 * effect as using TASK_SIZE. However, there is one configuration which
20 * must impose a more careful limit, to avoid freeing kernel pgtables.
21 */
22#ifndef USER_PGTABLES_CEILING
23#define USER_PGTABLES_CEILING 0UL
24#endif
25
1da177e4 26#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
e2cda322
AA
27extern int ptep_set_access_flags(struct vm_area_struct *vma,
28 unsigned long address, pte_t *ptep,
29 pte_t entry, int dirty);
30#endif
31
32#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
bd5e88ad 33#ifdef CONFIG_TRANSPARENT_HUGEPAGE
e2cda322
AA
34extern int pmdp_set_access_flags(struct vm_area_struct *vma,
35 unsigned long address, pmd_t *pmdp,
36 pmd_t entry, int dirty);
bd5e88ad
VG
37#else
38static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
39 unsigned long address, pmd_t *pmdp,
40 pmd_t entry, int dirty)
41{
42 BUILD_BUG();
43 return 0;
44}
45#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1da177e4
LT
46#endif
47
48#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
e2cda322
AA
49static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
50 unsigned long address,
51 pte_t *ptep)
52{
53 pte_t pte = *ptep;
54 int r = 1;
55 if (!pte_young(pte))
56 r = 0;
57 else
58 set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
59 return r;
60}
61#endif
62
63#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
64#ifdef CONFIG_TRANSPARENT_HUGEPAGE
65static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
66 unsigned long address,
67 pmd_t *pmdp)
68{
69 pmd_t pmd = *pmdp;
70 int r = 1;
71 if (!pmd_young(pmd))
72 r = 0;
73 else
74 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
75 return r;
76}
bd5e88ad 77#else
e2cda322
AA
78static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
79 unsigned long address,
80 pmd_t *pmdp)
81{
bd5e88ad 82 BUILD_BUG();
e2cda322
AA
83 return 0;
84}
85#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1da177e4
LT
86#endif
87
88#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
e2cda322
AA
89int ptep_clear_flush_young(struct vm_area_struct *vma,
90 unsigned long address, pte_t *ptep);
91#endif
92
93#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
bd5e88ad
VG
94#ifdef CONFIG_TRANSPARENT_HUGEPAGE
95extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
96 unsigned long address, pmd_t *pmdp);
97#else
98/*
99 * Despite relevant to THP only, this API is called from generic rmap code
100 * under PageTransHuge(), hence needs a dummy implementation for !THP
101 */
102static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
103 unsigned long address, pmd_t *pmdp)
104{
105 BUILD_BUG();
106 return 0;
107}
108#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1da177e4
LT
109#endif
110
1da177e4 111#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
e2cda322
AA
112static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
113 unsigned long address,
114 pte_t *ptep)
115{
116 pte_t pte = *ptep;
117 pte_clear(mm, address, ptep);
118 return pte;
119}
120#endif
121
8809aa2d 122#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
e2cda322 123#ifdef CONFIG_TRANSPARENT_HUGEPAGE
8809aa2d
AK
124static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
125 unsigned long address,
126 pmd_t *pmdp)
e2cda322
AA
127{
128 pmd_t pmd = *pmdp;
2d28a227 129 pmd_clear(pmdp);
e2cda322 130 return pmd;
49b24d6b 131}
e2cda322 132#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1da177e4
LT
133#endif
134
8809aa2d 135#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
fcbe08d6 136#ifdef CONFIG_TRANSPARENT_HUGEPAGE
8809aa2d 137static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
fcbe08d6
MS
138 unsigned long address, pmd_t *pmdp,
139 int full)
140{
8809aa2d 141 return pmdp_huge_get_and_clear(mm, address, pmdp);
fcbe08d6
MS
142}
143#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
144#endif
145
a600388d 146#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
e2cda322
AA
147static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
148 unsigned long address, pte_t *ptep,
149 int full)
150{
151 pte_t pte;
152 pte = ptep_get_and_clear(mm, address, ptep);
153 return pte;
154}
a600388d
ZA
155#endif
156
9888a1ca
ZA
157/*
158 * Some architectures may be able to avoid expensive synchronization
159 * primitives when modifications are made to PTE's which are already
160 * not present, or in the process of an address space destruction.
161 */
162#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
e2cda322
AA
163static inline void pte_clear_not_present_full(struct mm_struct *mm,
164 unsigned long address,
165 pte_t *ptep,
166 int full)
167{
168 pte_clear(mm, address, ptep);
169}
a600388d
ZA
170#endif
171
1da177e4 172#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
e2cda322
AA
173extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
174 unsigned long address,
175 pte_t *ptep);
176#endif
177
8809aa2d
AK
178#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
179extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
e2cda322
AA
180 unsigned long address,
181 pmd_t *pmdp);
1da177e4
LT
182#endif
183
184#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
8c65b4a6 185struct mm_struct;
1da177e4
LT
186static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
187{
188 pte_t old_pte = *ptep;
189 set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
190}
191#endif
192
e2cda322
AA
193#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
194#ifdef CONFIG_TRANSPARENT_HUGEPAGE
195static inline void pmdp_set_wrprotect(struct mm_struct *mm,
196 unsigned long address, pmd_t *pmdp)
197{
198 pmd_t old_pmd = *pmdp;
199 set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
200}
bd5e88ad 201#else
e2cda322
AA
202static inline void pmdp_set_wrprotect(struct mm_struct *mm,
203 unsigned long address, pmd_t *pmdp)
204{
bd5e88ad 205 BUILD_BUG();
e2cda322
AA
206}
207#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
208#endif
209
15a25b2e
AK
210#ifndef pmdp_collapse_flush
211#ifdef CONFIG_TRANSPARENT_HUGEPAGE
f28b6ff8
AK
212extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
213 unsigned long address, pmd_t *pmdp);
15a25b2e
AK
214#else
215static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
216 unsigned long address,
217 pmd_t *pmdp)
218{
219 BUILD_BUG();
220 return *pmdp;
221}
222#define pmdp_collapse_flush pmdp_collapse_flush
223#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
224#endif
225
e3ebcf64 226#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
6b0b50b0
AK
227extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
228 pgtable_t pgtable);
e3ebcf64
GS
229#endif
230
231#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
6b0b50b0 232extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
e3ebcf64
GS
233#endif
234
46dcde73
GS
235#ifndef __HAVE_ARCH_PMDP_INVALIDATE
236extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
237 pmd_t *pmdp);
238#endif
239
1da177e4 240#ifndef __HAVE_ARCH_PTE_SAME
e2cda322
AA
241static inline int pte_same(pte_t pte_a, pte_t pte_b)
242{
243 return pte_val(pte_a) == pte_val(pte_b);
244}
245#endif
246
45961722
KW
247#ifndef __HAVE_ARCH_PTE_UNUSED
248/*
249 * Some architectures provide facilities to virtualization guests
250 * so that they can flag allocated pages as unused. This allows the
251 * host to transparently reclaim unused pages. This function returns
252 * whether the pte's page is unused.
253 */
254static inline int pte_unused(pte_t pte)
255{
256 return 0;
257}
258#endif
259
e2cda322
AA
260#ifndef __HAVE_ARCH_PMD_SAME
261#ifdef CONFIG_TRANSPARENT_HUGEPAGE
262static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
263{
264 return pmd_val(pmd_a) == pmd_val(pmd_b);
265}
266#else /* CONFIG_TRANSPARENT_HUGEPAGE */
267static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
268{
bd5e88ad 269 BUILD_BUG();
e2cda322
AA
270 return 0;
271}
272#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1da177e4
LT
273#endif
274
1da177e4
LT
275#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
276#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
277#endif
278
0b0968a3 279#ifndef __HAVE_ARCH_MOVE_PTE
8b1f3124 280#define move_pte(pte, prot, old_addr, new_addr) (pte)
8b1f3124
NP
281#endif
282
2c3cf556 283#ifndef pte_accessible
20841405 284# define pte_accessible(mm, pte) ((void)(pte), 1)
2c3cf556
RR
285#endif
286
61c77326
SL
287#ifndef flush_tlb_fix_spurious_fault
288#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
289#endif
290
0634a632
PM
291#ifndef pgprot_noncached
292#define pgprot_noncached(prot) (prot)
293#endif
294
2520bd31 295#ifndef pgprot_writecombine
296#define pgprot_writecombine pgprot_noncached
297#endif
298
d1b4bfbf
TK
299#ifndef pgprot_writethrough
300#define pgprot_writethrough pgprot_noncached
301#endif
302
8b921acf
LD
303#ifndef pgprot_device
304#define pgprot_device pgprot_noncached
305#endif
306
64e45507
PF
307#ifndef pgprot_modify
308#define pgprot_modify pgprot_modify
309static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
310{
311 if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
312 newprot = pgprot_noncached(newprot);
313 if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
314 newprot = pgprot_writecombine(newprot);
315 if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
316 newprot = pgprot_device(newprot);
317 return newprot;
318}
319#endif
320
1da177e4 321/*
8f6c99c1
HD
322 * When walking page tables, get the address of the next boundary,
323 * or the end address of the range if that comes earlier. Although no
324 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
1da177e4
LT
325 */
326
1da177e4
LT
327#define pgd_addr_end(addr, end) \
328({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
329 (__boundary - 1 < (end) - 1)? __boundary: (end); \
330})
1da177e4
LT
331
332#ifndef pud_addr_end
333#define pud_addr_end(addr, end) \
334({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
335 (__boundary - 1 < (end) - 1)? __boundary: (end); \
336})
337#endif
338
339#ifndef pmd_addr_end
340#define pmd_addr_end(addr, end) \
341({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
342 (__boundary - 1 < (end) - 1)? __boundary: (end); \
343})
344#endif
345
1da177e4
LT
346/*
347 * When walking page tables, we usually want to skip any p?d_none entries;
348 * and any p?d_bad entries - reporting the error before resetting to none.
349 * Do the tests inline, but report and clear the bad entry in mm/memory.c.
350 */
351void pgd_clear_bad(pgd_t *);
352void pud_clear_bad(pud_t *);
353void pmd_clear_bad(pmd_t *);
354
355static inline int pgd_none_or_clear_bad(pgd_t *pgd)
356{
357 if (pgd_none(*pgd))
358 return 1;
359 if (unlikely(pgd_bad(*pgd))) {
360 pgd_clear_bad(pgd);
361 return 1;
362 }
363 return 0;
364}
365
366static inline int pud_none_or_clear_bad(pud_t *pud)
367{
368 if (pud_none(*pud))
369 return 1;
370 if (unlikely(pud_bad(*pud))) {
371 pud_clear_bad(pud);
372 return 1;
373 }
374 return 0;
375}
376
377static inline int pmd_none_or_clear_bad(pmd_t *pmd)
378{
379 if (pmd_none(*pmd))
380 return 1;
381 if (unlikely(pmd_bad(*pmd))) {
382 pmd_clear_bad(pmd);
383 return 1;
384 }
385 return 0;
386}
9535239f 387
1ea0704e
JF
388static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
389 unsigned long addr,
390 pte_t *ptep)
391{
392 /*
393 * Get the current pte state, but zero it out to make it
394 * non-present, preventing the hardware from asynchronously
395 * updating it.
396 */
397 return ptep_get_and_clear(mm, addr, ptep);
398}
399
400static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
401 unsigned long addr,
402 pte_t *ptep, pte_t pte)
403{
404 /*
405 * The pte is non-present, so there's no hardware state to
406 * preserve.
407 */
408 set_pte_at(mm, addr, ptep, pte);
409}
410
411#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
412/*
413 * Start a pte protection read-modify-write transaction, which
414 * protects against asynchronous hardware modifications to the pte.
415 * The intention is not to prevent the hardware from making pte
416 * updates, but to prevent any updates it may make from being lost.
417 *
418 * This does not protect against other software modifications of the
419 * pte; the appropriate pte lock must be held over the transation.
420 *
421 * Note that this interface is intended to be batchable, meaning that
422 * ptep_modify_prot_commit may not actually update the pte, but merely
423 * queue the update to be done at some later time. The update must be
424 * actually committed before the pte lock is released, however.
425 */
426static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
427 unsigned long addr,
428 pte_t *ptep)
429{
430 return __ptep_modify_prot_start(mm, addr, ptep);
431}
432
433/*
434 * Commit an update to a pte, leaving any hardware-controlled bits in
435 * the PTE unmodified.
436 */
437static inline void ptep_modify_prot_commit(struct mm_struct *mm,
438 unsigned long addr,
439 pte_t *ptep, pte_t pte)
440{
441 __ptep_modify_prot_commit(mm, addr, ptep, pte);
442}
443#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
fe1a6875 444#endif /* CONFIG_MMU */
1ea0704e 445
9535239f
GU
446/*
447 * A facility to provide lazy MMU batching. This allows PTE updates and
448 * page invalidations to be delayed until a call to leave lazy MMU mode
449 * is issued. Some architectures may benefit from doing this, and it is
450 * beneficial for both shadow and direct mode hypervisors, which may batch
451 * the PTE updates which happen during this window. Note that using this
452 * interface requires that read hazards be removed from the code. A read
453 * hazard could result in the direct mode hypervisor case, since the actual
454 * write to the page tables may not yet have taken place, so reads though
455 * a raw PTE pointer after it has been modified are not guaranteed to be
456 * up to date. This mode can only be entered and left under the protection of
457 * the page table locks for all page tables which may be modified. In the UP
458 * case, this is required so that preemption is disabled, and in the SMP case,
459 * it must synchronize the delayed page table writes properly on other CPUs.
460 */
461#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
462#define arch_enter_lazy_mmu_mode() do {} while (0)
463#define arch_leave_lazy_mmu_mode() do {} while (0)
464#define arch_flush_lazy_mmu_mode() do {} while (0)
465#endif
466
467/*
7fd7d83d
JF
468 * A facility to provide batching of the reload of page tables and
469 * other process state with the actual context switch code for
470 * paravirtualized guests. By convention, only one of the batched
471 * update (lazy) modes (CPU, MMU) should be active at any given time,
472 * entry should never be nested, and entry and exits should always be
473 * paired. This is for sanity of maintaining and reasoning about the
474 * kernel code. In this case, the exit (end of the context switch) is
475 * in architecture-specific code, and so doesn't need a generic
476 * definition.
9535239f 477 */
7fd7d83d 478#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
224101ed 479#define arch_start_context_switch(prev) do {} while (0)
9535239f
GU
480#endif
481
0f8975ec
PE
482#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY
483static inline int pte_soft_dirty(pte_t pte)
484{
485 return 0;
486}
487
488static inline int pmd_soft_dirty(pmd_t pmd)
489{
490 return 0;
491}
492
493static inline pte_t pte_mksoft_dirty(pte_t pte)
494{
495 return pte;
496}
497
498static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
499{
500 return pmd;
501}
179ef71c 502
a7b76174
MS
503static inline pte_t pte_clear_soft_dirty(pte_t pte)
504{
505 return pte;
506}
507
508static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
509{
510 return pmd;
511}
512
179ef71c
CG
513static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
514{
515 return pte;
516}
517
518static inline int pte_swp_soft_dirty(pte_t pte)
519{
520 return 0;
521}
522
523static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
524{
525 return pte;
526}
0f8975ec
PE
527#endif
528
34801ba9 529#ifndef __HAVE_PFNMAP_TRACKING
530/*
5180da41
SS
531 * Interfaces that can be used by architecture code to keep track of
532 * memory type of pfn mappings specified by the remap_pfn_range,
533 * vm_insert_pfn.
534 */
535
536/*
537 * track_pfn_remap is called when a _new_ pfn mapping is being established
538 * by remap_pfn_range() for physical range indicated by pfn and size.
34801ba9 539 */
5180da41 540static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
b3b9c293
KK
541 unsigned long pfn, unsigned long addr,
542 unsigned long size)
34801ba9 543{
544 return 0;
545}
546
547/*
5180da41
SS
548 * track_pfn_insert is called when a _new_ single pfn is established
549 * by vm_insert_pfn().
550 */
551static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
552 unsigned long pfn)
553{
554 return 0;
555}
556
557/*
558 * track_pfn_copy is called when vma that is covering the pfnmap gets
34801ba9 559 * copied through copy_page_range().
560 */
5180da41 561static inline int track_pfn_copy(struct vm_area_struct *vma)
34801ba9 562{
563 return 0;
564}
565
566/*
d9fe4fab 567 * untrack_pfn is called while unmapping a pfnmap for a region.
34801ba9 568 * untrack can be called for a specific region indicated by pfn and size or
5180da41 569 * can be for the entire vma (in which case pfn, size are zero).
34801ba9 570 */
5180da41
SS
571static inline void untrack_pfn(struct vm_area_struct *vma,
572 unsigned long pfn, unsigned long size)
34801ba9 573{
574}
d9fe4fab
TK
575
576/*
577 * untrack_pfn_moved is called while mremapping a pfnmap for a new region.
578 */
579static inline void untrack_pfn_moved(struct vm_area_struct *vma)
580{
581}
34801ba9 582#else
5180da41 583extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
b3b9c293
KK
584 unsigned long pfn, unsigned long addr,
585 unsigned long size);
5180da41
SS
586extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
587 unsigned long pfn);
588extern int track_pfn_copy(struct vm_area_struct *vma);
589extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
590 unsigned long size);
d9fe4fab 591extern void untrack_pfn_moved(struct vm_area_struct *vma);
34801ba9 592#endif
593
816422ad
KS
594#ifdef __HAVE_COLOR_ZERO_PAGE
595static inline int is_zero_pfn(unsigned long pfn)
596{
597 extern unsigned long zero_pfn;
598 unsigned long offset_from_zero_pfn = pfn - zero_pfn;
599 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
600}
601
2f91ec8c
KS
602#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr))
603
816422ad
KS
604#else
605static inline int is_zero_pfn(unsigned long pfn)
606{
607 extern unsigned long zero_pfn;
608 return pfn == zero_pfn;
609}
610
611static inline unsigned long my_zero_pfn(unsigned long addr)
612{
613 extern unsigned long zero_pfn;
614 return zero_pfn;
615}
616#endif
617
1a5a9906
AA
618#ifdef CONFIG_MMU
619
5f6e8da7
AA
620#ifndef CONFIG_TRANSPARENT_HUGEPAGE
621static inline int pmd_trans_huge(pmd_t pmd)
622{
623 return 0;
624}
e2cda322
AA
625#ifndef __HAVE_ARCH_PMD_WRITE
626static inline int pmd_write(pmd_t pmd)
627{
628 BUG();
629 return 0;
630}
631#endif /* __HAVE_ARCH_PMD_WRITE */
1a5a9906
AA
632#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
633
26c19178
AA
634#ifndef pmd_read_atomic
635static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
636{
637 /*
638 * Depend on compiler for an atomic pmd read. NOTE: this is
639 * only going to work, if the pmdval_t isn't larger than
640 * an unsigned long.
641 */
642 return *pmdp;
643}
644#endif
645
b3084f4d
AK
646#ifndef pmd_move_must_withdraw
647static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
648 spinlock_t *old_pmd_ptl)
649{
650 /*
651 * With split pmd lock we also need to move preallocated
652 * PTE page table if new_pmd is on different PMD page table.
653 */
654 return new_pmd_ptl != old_pmd_ptl;
655}
656#endif
657
1a5a9906
AA
658/*
659 * This function is meant to be used by sites walking pagetables with
660 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
661 * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
662 * into a null pmd and the transhuge page fault can convert a null pmd
663 * into an hugepmd or into a regular pmd (if the hugepage allocation
664 * fails). While holding the mmap_sem in read mode the pmd becomes
665 * stable and stops changing under us only if it's not null and not a
666 * transhuge pmd. When those races occurs and this function makes a
667 * difference vs the standard pmd_none_or_clear_bad, the result is
668 * undefined so behaving like if the pmd was none is safe (because it
669 * can return none anyway). The compiler level barrier() is critically
670 * important to compute the two checks atomically on the same pmdval.
26c19178
AA
671 *
672 * For 32bit kernels with a 64bit large pmd_t this automatically takes
673 * care of reading the pmd atomically to avoid SMP race conditions
674 * against pmd_populate() when the mmap_sem is hold for reading by the
675 * caller (a special atomic read not done by "gcc" as in the generic
676 * version above, is also needed when THP is disabled because the page
677 * fault can populate the pmd from under us).
1a5a9906
AA
678 */
679static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
680{
26c19178 681 pmd_t pmdval = pmd_read_atomic(pmd);
1a5a9906
AA
682 /*
683 * The barrier will stabilize the pmdval in a register or on
684 * the stack so that it will stop changing under the code.
e4eed03f
AA
685 *
686 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
687 * pmd_read_atomic is allowed to return a not atomic pmdval
688 * (for example pointing to an hugepage that has never been
689 * mapped in the pmd). The below checks will only care about
690 * the low part of the pmd with 32bit PAE x86 anyway, with the
691 * exception of pmd_none(). So the important thing is that if
692 * the low part of the pmd is found null, the high part will
693 * be also null or the pmd_none() check below would be
694 * confused.
1a5a9906
AA
695 */
696#ifdef CONFIG_TRANSPARENT_HUGEPAGE
697 barrier();
698#endif
ee53664b 699 if (pmd_none(pmdval) || pmd_trans_huge(pmdval))
1a5a9906
AA
700 return 1;
701 if (unlikely(pmd_bad(pmdval))) {
ee53664b 702 pmd_clear_bad(pmd);
1a5a9906
AA
703 return 1;
704 }
705 return 0;
706}
707
708/*
709 * This is a noop if Transparent Hugepage Support is not built into
710 * the kernel. Otherwise it is equivalent to
711 * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
712 * places that already verified the pmd is not none and they want to
713 * walk ptes while holding the mmap sem in read mode (write mode don't
714 * need this). If THP is not enabled, the pmd can't go away under the
715 * code even if MADV_DONTNEED runs, but if THP is enabled we need to
716 * run a pmd_trans_unstable before walking the ptes after
717 * split_huge_page_pmd returns (because it may have run when the pmd
718 * become null, but then a page fault can map in a THP and not a
719 * regular page).
720 */
721static inline int pmd_trans_unstable(pmd_t *pmd)
722{
723#ifdef CONFIG_TRANSPARENT_HUGEPAGE
724 return pmd_none_or_trans_huge_or_clear_bad(pmd);
725#else
726 return 0;
5f6e8da7 727#endif
1a5a9906
AA
728}
729
e7bb4b6d
MG
730#ifndef CONFIG_NUMA_BALANCING
731/*
732 * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
733 * the only case the kernel cares is for NUMA balancing and is only ever set
734 * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked
735 * _PAGE_PROTNONE so by by default, implement the helper as "always no". It
736 * is the responsibility of the caller to distinguish between PROT_NONE
737 * protections and NUMA hinting fault protections.
738 */
739static inline int pte_protnone(pte_t pte)
740{
741 return 0;
742}
743
744static inline int pmd_protnone(pmd_t pmd)
745{
746 return 0;
747}
748#endif /* CONFIG_NUMA_BALANCING */
749
1a5a9906 750#endif /* CONFIG_MMU */
5f6e8da7 751
e61ce6ad
TK
752#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
753int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
754int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
b9820d8f
TK
755int pud_clear_huge(pud_t *pud);
756int pmd_clear_huge(pmd_t *pmd);
e61ce6ad
TK
757#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
758static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
759{
760 return 0;
761}
762static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
763{
764 return 0;
765}
b9820d8f
TK
766static inline int pud_clear_huge(pud_t *pud)
767{
768 return 0;
769}
770static inline int pmd_clear_huge(pmd_t *pmd)
771{
772 return 0;
773}
e61ce6ad
TK
774#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
775
1da177e4
LT
776#endif /* !__ASSEMBLY__ */
777
40d158e6
AV
778#ifndef io_remap_pfn_range
779#define io_remap_pfn_range remap_pfn_range
780#endif
781
1da177e4 782#endif /* _ASM_GENERIC_PGTABLE_H */