Commit | Line | Data |
---|---|---|
a520110e CH |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _LINUX_PAGEWALK_H | |
3 | #define _LINUX_PAGEWALK_H | |
4 | ||
5 | #include <linux/mm.h> | |
6 | ||
7b86ac33 CH |
7 | struct mm_walk; |
8 | ||
49b06385 SB |
9 | /* Locking requirement during a page walk. */ |
10 | enum page_walk_lock { | |
11 | /* mmap_lock should be locked for read to stabilize the vma tree */ | |
12 | PGWALK_RDLOCK = 0, | |
13 | /* vma will be write-locked during the walk */ | |
14 | PGWALK_WRLOCK = 1, | |
15 | /* vma is expected to be already write-locked during the walk */ | |
16 | PGWALK_WRLOCK_VERIFY = 2, | |
17 | }; | |
18 | ||
a520110e | 19 | /** |
91ab1a41 | 20 | * struct mm_walk_ops - callbacks for walk_page_range |
3afc4236 SP |
21 | * @pgd_entry: if set, called for each non-empty PGD (top-level) entry |
22 | * @p4d_entry: if set, called for each non-empty P4D entry | |
23 | * @pud_entry: if set, called for each non-empty PUD entry | |
24 | * @pmd_entry: if set, called for each non-empty PMD entry | |
7b86ac33 CH |
25 | * this handler is required to be able to handle |
26 | * pmd_trans_huge() pmds. They may simply choose to | |
27 | * split_huge_page() instead of handling it explicitly. | |
e2f8f44b REB |
28 | * @pte_entry: if set, called for each PTE (lowest-level) entry, |
29 | * including empty ones | |
b7a16c7a | 30 | * @pte_hole: if set, called for each hole at all levels, |
e2f8f44b REB |
31 | * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD. |
32 | * Any folded depths (where PTRS_PER_P?D is equal to 1) | |
33 | * are skipped. | |
dd361e50 PX |
34 | * @hugetlb_entry: if set, called for each hugetlb entry. This hook |
35 | * function is called with the vma lock held, in order to | |
36 | * protect against a concurrent freeing of the pte_t* or | |
37 | * the ptl. In some cases, the hook function needs to drop | |
38 | * and retake the vma lock in order to avoid deadlocks | |
39 | * while calling other functions. In such cases the hook | |
40 | * function must either refrain from accessing the pte or | |
41 | * ptl after dropping the vma lock, or else revalidate | |
42 | * those items after re-acquiring the vma lock and before | |
43 | * accessing them. | |
7b86ac33 CH |
44 | * @test_walk: caller specific callback function to determine whether |
45 | * we walk over the current vma or not. Returning 0 means | |
46 | * "do page table walk over the current vma", returning | |
47 | * a negative value means "abort current page table walk | |
48 | * right now" and returning 1 means "skip the current vma" | |
c31783ee DH |
49 | * Note that this callback is not called when the caller |
50 | * passes in a single VMA as for walk_page_vma(). | |
ecaad8ac TH |
51 | * @pre_vma: if set, called before starting walk on a non-null vma. |
52 | * @post_vma: if set, called after a walk on a non-null vma, provided | |
53 | * that @pre_vma and the vma walk succeeded. | |
3afc4236 SP |
54 | * |
55 | * p?d_entry callbacks are called even if those levels are folded on a | |
56 | * particular architecture/configuration. | |
a520110e | 57 | */ |
7b86ac33 | 58 | struct mm_walk_ops { |
3afc4236 SP |
59 | int (*pgd_entry)(pgd_t *pgd, unsigned long addr, |
60 | unsigned long next, struct mm_walk *walk); | |
61 | int (*p4d_entry)(p4d_t *p4d, unsigned long addr, | |
62 | unsigned long next, struct mm_walk *walk); | |
a520110e CH |
63 | int (*pud_entry)(pud_t *pud, unsigned long addr, |
64 | unsigned long next, struct mm_walk *walk); | |
65 | int (*pmd_entry)(pmd_t *pmd, unsigned long addr, | |
66 | unsigned long next, struct mm_walk *walk); | |
67 | int (*pte_entry)(pte_t *pte, unsigned long addr, | |
68 | unsigned long next, struct mm_walk *walk); | |
69 | int (*pte_hole)(unsigned long addr, unsigned long next, | |
b7a16c7a | 70 | int depth, struct mm_walk *walk); |
a520110e CH |
71 | int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, |
72 | unsigned long addr, unsigned long next, | |
73 | struct mm_walk *walk); | |
74 | int (*test_walk)(unsigned long addr, unsigned long next, | |
75 | struct mm_walk *walk); | |
ecaad8ac TH |
76 | int (*pre_vma)(unsigned long start, unsigned long end, |
77 | struct mm_walk *walk); | |
78 | void (*post_vma)(struct mm_walk *walk); | |
49b06385 | 79 | enum page_walk_lock walk_lock; |
7b86ac33 CH |
80 | }; |
81 | ||
3afc4236 SP |
82 | /* |
83 | * Action for pud_entry / pmd_entry callbacks. | |
84 | * ACTION_SUBTREE is the default | |
85 | */ | |
86 | enum page_walk_action { | |
87 | /* Descend to next level, splitting huge pages if needed and possible */ | |
88 | ACTION_SUBTREE = 0, | |
89 | /* Continue to next entry at this level (ignoring any subtree) */ | |
90 | ACTION_CONTINUE = 1, | |
91 | /* Call again for this entry */ | |
92 | ACTION_AGAIN = 2 | |
93 | }; | |
94 | ||
7b86ac33 | 95 | /** |
91ab1a41 | 96 | * struct mm_walk - walk_page_range data |
7b86ac33 CH |
97 | * @ops: operation to call during the walk |
98 | * @mm: mm_struct representing the target process of page table walk | |
e47690d7 | 99 | * @pgd: pointer to PGD; only valid with no_vma (otherwise set to NULL) |
7b86ac33 | 100 | * @vma: vma currently walked (NULL if walking outside vmas) |
3afc4236 | 101 | * @action: next action to perform (see enum page_walk_action) |
488ae6a2 | 102 | * @no_vma: walk ignoring vmas (vma will always be NULL) |
7b86ac33 CH |
103 | * @private: private data for callbacks' usage |
104 | * | |
105 | * (see the comment on walk_page_range() for more details) | |
106 | */ | |
107 | struct mm_walk { | |
108 | const struct mm_walk_ops *ops; | |
a520110e | 109 | struct mm_struct *mm; |
e47690d7 | 110 | pgd_t *pgd; |
a520110e | 111 | struct vm_area_struct *vma; |
3afc4236 | 112 | enum page_walk_action action; |
488ae6a2 | 113 | bool no_vma; |
a520110e CH |
114 | void *private; |
115 | }; | |
116 | ||
7b86ac33 CH |
117 | int walk_page_range(struct mm_struct *mm, unsigned long start, |
118 | unsigned long end, const struct mm_walk_ops *ops, | |
119 | void *private); | |
488ae6a2 SP |
120 | int walk_page_range_novma(struct mm_struct *mm, unsigned long start, |
121 | unsigned long end, const struct mm_walk_ops *ops, | |
e47690d7 | 122 | pgd_t *pgd, |
488ae6a2 | 123 | void *private); |
e07cda5f DH |
124 | int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, |
125 | unsigned long end, const struct mm_walk_ops *ops, | |
126 | void *private); | |
7b86ac33 CH |
127 | int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, |
128 | void *private); | |
ecaad8ac TH |
129 | int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, |
130 | pgoff_t nr, const struct mm_walk_ops *ops, | |
131 | void *private); | |
a520110e | 132 | |
aa39ca69 DH |
133 | typedef int __bitwise folio_walk_flags_t; |
134 | ||
135 | /* | |
136 | * Walk migration entries as well. Careful: a large folio might get split | |
137 | * concurrently. | |
138 | */ | |
139 | #define FW_MIGRATION ((__force folio_walk_flags_t)BIT(0)) | |
140 | ||
141 | /* Walk shared zeropages (small + huge) as well. */ | |
142 | #define FW_ZEROPAGE ((__force folio_walk_flags_t)BIT(1)) | |
143 | ||
144 | enum folio_walk_level { | |
145 | FW_LEVEL_PTE, | |
146 | FW_LEVEL_PMD, | |
147 | FW_LEVEL_PUD, | |
148 | }; | |
149 | ||
150 | /** | |
151 | * struct folio_walk - folio_walk_start() / folio_walk_end() data | |
152 | * @page: exact folio page referenced (if applicable) | |
153 | * @level: page table level identifying the entry type | |
154 | * @pte: pointer to the page table entry (FW_LEVEL_PTE). | |
155 | * @pmd: pointer to the page table entry (FW_LEVEL_PMD). | |
156 | * @pud: pointer to the page table entry (FW_LEVEL_PUD). | |
157 | * @ptl: pointer to the page table lock. | |
158 | * | |
159 | * (see folio_walk_start() documentation for more details) | |
160 | */ | |
161 | struct folio_walk { | |
162 | /* public */ | |
163 | struct page *page; | |
164 | enum folio_walk_level level; | |
165 | union { | |
166 | pte_t *ptep; | |
167 | pud_t *pudp; | |
168 | pmd_t *pmdp; | |
169 | }; | |
170 | union { | |
171 | pte_t pte; | |
172 | pud_t pud; | |
173 | pmd_t pmd; | |
174 | }; | |
175 | /* private */ | |
176 | struct vm_area_struct *vma; | |
177 | spinlock_t *ptl; | |
178 | }; | |
179 | ||
180 | struct folio *folio_walk_start(struct folio_walk *fw, | |
181 | struct vm_area_struct *vma, unsigned long addr, | |
182 | folio_walk_flags_t flags); | |
183 | ||
184 | #define folio_walk_end(__fw, __vma) do { \ | |
185 | spin_unlock((__fw)->ptl); \ | |
186 | if (likely((__fw)->level == FW_LEVEL_PTE)) \ | |
187 | pte_unmap((__fw)->ptep); \ | |
188 | vma_pgtable_walk_end(__vma); \ | |
189 | } while (0) | |
190 | ||
a520110e | 191 | #endif /* _LINUX_PAGEWALK_H */ |