Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
5b99cd0e HC |
2 | #ifndef _LINUX_MM_TYPES_H |
3 | #define _LINUX_MM_TYPES_H | |
4 | ||
2e58f173 IM |
5 | #include <linux/mm_types_task.h> |
6 | ||
4f9a58d7 | 7 | #include <linux/auxvec.h> |
78db3412 | 8 | #include <linux/kref.h> |
5b99cd0e HC |
9 | #include <linux/list.h> |
10 | #include <linux/spinlock.h> | |
c92ff1bd | 11 | #include <linux/rbtree.h> |
d4af56c5 | 12 | #include <linux/maple_tree.h> |
c92ff1bd MS |
13 | #include <linux/rwsem.h> |
14 | #include <linux/completion.h> | |
cddb8a5c | 15 | #include <linux/cpumask.h> |
d4b3b638 | 16 | #include <linux/uprobes.h> |
8d491de6 | 17 | #include <linux/rcupdate.h> |
bbeae5b0 | 18 | #include <linux/page-flags-layout.h> |
ec8d7c14 | 19 | #include <linux/workqueue.h> |
57efa1fe | 20 | #include <linux/seqlock.h> |
2e58f173 | 21 | |
c92ff1bd | 22 | #include <asm/mmu.h> |
5b99cd0e | 23 | |
4f9a58d7 OH |
24 | #ifndef AT_VECTOR_SIZE_ARCH |
25 | #define AT_VECTOR_SIZE_ARCH 0 | |
26 | #endif | |
27 | #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) | |
28 | ||
82e69a12 | 29 | #define INIT_PASID 0 |
1c8f4220 | 30 | |
5b99cd0e | 31 | struct address_space; |
1306a85a | 32 | struct mem_cgroup; |
5b99cd0e HC |
33 | |
34 | /* | |
35 | * Each physical page in the system has a struct page associated with | |
36 | * it to keep track of whatever it is we are using the page for at the | |
37 | * moment. Note that we have no way to track which tasks are using | |
38 | * a page, though if it is a pagecache page, rmap structures can tell us | |
97b4a671 | 39 | * who is mapping it. |
be50015d | 40 | * |
97b4a671 MW |
41 | * If you allocate the page using alloc_pages(), you can use some of the |
42 | * space in struct page for your own purposes. The five words in the main | |
43 | * union are available, except for bit 0 of the first word which must be | |
44 | * kept clear. Many users use this word to store a pointer to an object | |
45 | * which is guaranteed to be aligned. If you use the same storage as | |
46 | * page->mapping, you must restore it to NULL before freeing the page. | |
be50015d | 47 | * |
97b4a671 MW |
48 | * If your page will not be mapped to userspace, you can also use the four |
49 | * bytes in the mapcount union, but you must call page_mapcount_reset() | |
50 | * before freeing it. | |
51 | * | |
52 | * If you want to use the refcount field, it must be used in such a way | |
53 | * that other CPUs temporarily incrementing and then decrementing the | |
54 | * refcount does not cause problems. On receiving the page from | |
55 | * alloc_pages(), the refcount will be positive. | |
56 | * | |
57 | * If you allocate pages of order > 0, you can use some of the fields | |
58 | * in each subpage, but you may need to restore some of their values | |
59 | * afterwards. | |
fc9bb8c7 | 60 | * |
d122019b MWO |
61 | * SLUB uses cmpxchg_double() to atomically update its freelist and counters. |
62 | * That requires that freelist & counters in struct slab be adjacent and | |
63 | * double-word aligned. Because struct slab currently just reinterprets the | |
64 | * bits of struct page, we align all struct pages to double-word boundaries, | |
65 | * and ensure that 'freelist' is aligned within struct slab. | |
5b99cd0e | 66 | */ |
e20df2c6 MW |
67 | #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE |
68 | #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) | |
69 | #else | |
0dd4da5b | 70 | #define _struct_page_alignment |
7d27a04b | 71 | #endif |
e20df2c6 | 72 | |
5b99cd0e HC |
73 | struct page { |
74 | unsigned long flags; /* Atomic flags, some possibly | |
75 | * updated asynchronously */ | |
b7ccc7f8 | 76 | /* |
4da1984e MW |
77 | * Five words (20/40 bytes) are available in this union. |
78 | * WARNING: bit 0 of the first word is used for PageTail(). That | |
79 | * means the other users of this union MUST NOT use the bit to | |
b7ccc7f8 MW |
80 | * avoid collision and false-positive PageTail(). |
81 | */ | |
8456a648 | 82 | union { |
66a6ffd2 | 83 | struct { /* Page cache and anonymous pages */ |
4da1984e MW |
84 | /** |
85 | * @lru: Pageout list, eg. active_list protected by | |
15b44736 | 86 | * lruvec->lru_lock. Sometimes used as a generic list |
4da1984e MW |
87 | * by the page owner. |
88 | */ | |
07ca7606 HD |
89 | union { |
90 | struct list_head lru; | |
bf75f200 | 91 | |
07ca7606 HD |
92 | /* Or, for the Unevictable "LRU list" slot */ |
93 | struct { | |
94 | /* Always even, to negate PageTail */ | |
95 | void *__filler; | |
96 | /* Count page's or folio's mlocks */ | |
97 | unsigned int mlock_count; | |
98 | }; | |
bf75f200 MG |
99 | |
100 | /* Or, free page */ | |
101 | struct list_head buddy_list; | |
102 | struct list_head pcp_list; | |
07ca7606 | 103 | }; |
66a6ffd2 MW |
104 | /* See page-flags.h for PAGE_MAPPING_FLAGS */ |
105 | struct address_space *mapping; | |
106 | pgoff_t index; /* Our offset within mapping. */ | |
107 | /** | |
108 | * @private: Mapping-private opaque data. | |
109 | * Usually used for buffer_heads if PagePrivate. | |
110 | * Used for swp_entry_t if PageSwapCache. | |
111 | * Indicates order in the buddy system if PageBuddy. | |
112 | */ | |
113 | unsigned long private; | |
114 | }; | |
c25fff71 | 115 | struct { /* page_pool used by netstack */ |
c07aea3e MC |
116 | /** |
117 | * @pp_magic: magic value to avoid recycling non | |
118 | * page_pool allocated pages. | |
119 | */ | |
120 | unsigned long pp_magic; | |
121 | struct page_pool *pp; | |
122 | unsigned long _pp_mapping_pad; | |
0e9d2a0a | 123 | unsigned long dma_addr; |
f915b75b YL |
124 | union { |
125 | /** | |
126 | * dma_addr_upper: might require a 64-bit | |
127 | * value on 32-bit architectures. | |
128 | */ | |
129 | unsigned long dma_addr_upper; | |
130 | /** | |
131 | * For frag page support, not supported in | |
132 | * 32-bit architectures with 64-bit DMA. | |
133 | */ | |
134 | atomic_long_t pp_frag_count; | |
135 | }; | |
c25fff71 | 136 | }; |
4da1984e MW |
137 | struct { /* Tail pages of compound page */ |
138 | unsigned long compound_head; /* Bit zero is set */ | |
139 | ||
140 | /* First tail page only */ | |
141 | unsigned char compound_dtor; | |
142 | unsigned char compound_order; | |
143 | atomic_t compound_mapcount; | |
5232c63f MWO |
144 | atomic_t compound_pincount; |
145 | #ifdef CONFIG_64BIT | |
1378a5ee | 146 | unsigned int compound_nr; /* 1 << compound_order */ |
5232c63f | 147 | #endif |
4da1984e MW |
148 | }; |
149 | struct { /* Second tail page of compound page */ | |
150 | unsigned long _compound_pad_1; /* compound_head */ | |
5232c63f | 151 | unsigned long _compound_pad_2; |
87eaceb3 | 152 | /* For both global and memcg */ |
4da1984e MW |
153 | struct list_head deferred_list; |
154 | }; | |
66a6ffd2 | 155 | struct { /* Page table pages */ |
4da1984e MW |
156 | unsigned long _pt_pad_1; /* compound_head */ |
157 | pgtable_t pmd_huge_pte; /* protected by page->ptl */ | |
66a6ffd2 | 158 | unsigned long _pt_pad_2; /* mapping */ |
4231aba0 NP |
159 | union { |
160 | struct mm_struct *pt_mm; /* x86 pgds only */ | |
161 | atomic_t pt_frag_refcount; /* powerpc */ | |
162 | }; | |
7d27a04b | 163 | #if ALLOC_SPLIT_PTLOCKS |
66a6ffd2 | 164 | spinlock_t *ptl; |
7d27a04b | 165 | #else |
66a6ffd2 | 166 | spinlock_t ptl; |
7d27a04b | 167 | #endif |
7d27a04b | 168 | }; |
50e7fbc3 MW |
169 | struct { /* ZONE_DEVICE pages */ |
170 | /** @pgmap: Points to the hosting device page map. */ | |
171 | struct dev_pagemap *pgmap; | |
8a164fef | 172 | void *zone_device_data; |
76470ccd RC |
173 | /* |
174 | * ZONE_DEVICE private pages are counted as being | |
175 | * mapped so the next 3 words hold the mapping, index, | |
176 | * and private fields from the source anonymous or | |
177 | * page cache page while the page is migrated to device | |
178 | * private memory. | |
179 | * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also | |
180 | * use the mapping, index, and private fields when | |
181 | * pmem backed DAX files are mapped. | |
182 | */ | |
50e7fbc3 | 183 | }; |
4da1984e MW |
184 | |
185 | /** @rcu_head: You can use this to free a page by RCU. */ | |
186 | struct rcu_head rcu_head; | |
7d27a04b MW |
187 | }; |
188 | ||
b21999da MW |
189 | union { /* This union is 4 bytes in size. */ |
190 | /* | |
191 | * If the page can be mapped to userspace, encodes the number | |
192 | * of times this page is referenced by a page table. | |
193 | */ | |
194 | atomic_t _mapcount; | |
195 | ||
6e292b9b MW |
196 | /* |
197 | * If the page is neither PageSlab nor mappable to userspace, | |
198 | * the value stored here may help determine what this page | |
199 | * is used for. See page-flags.h for a list of page types | |
200 | * which are currently stored here. | |
201 | */ | |
202 | unsigned int page_type; | |
81819f0f | 203 | }; |
fc9bb8c7 | 204 | |
b21999da MW |
205 | /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ |
206 | atomic_t _refcount; | |
207 | ||
1306a85a | 208 | #ifdef CONFIG_MEMCG |
bcfe06bf | 209 | unsigned long memcg_data; |
1306a85a JW |
210 | #endif |
211 | ||
5b99cd0e HC |
212 | /* |
213 | * On machines where all RAM is mapped into kernel address space, | |
214 | * we can simply calculate the virtual address. On machines with | |
215 | * highmem some memory is mapped into kernel virtual memory | |
216 | * dynamically, so we need a place to store that address. | |
217 | * Note that this field could be 16 bits on x86 ... ;) | |
218 | * | |
219 | * Architectures with slow multiplication can define | |
220 | * WANT_PAGE_VIRTUAL in asm/page.h | |
221 | */ | |
222 | #if defined(WANT_PAGE_VIRTUAL) | |
223 | void *virtual; /* Kernel virtual address (NULL if | |
224 | not kmapped, ie. highmem) */ | |
225 | #endif /* WANT_PAGE_VIRTUAL */ | |
dfec072e | 226 | |
90572890 PZ |
227 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS |
228 | int _last_cpupid; | |
57e0a030 | 229 | #endif |
e20df2c6 | 230 | } _struct_page_alignment; |
5b99cd0e | 231 | |
7b230db3 MWO |
232 | /** |
233 | * struct folio - Represents a contiguous set of bytes. | |
234 | * @flags: Identical to the page flags. | |
235 | * @lru: Least Recently Used list; tracks how recently this folio was used. | |
334f6f53 | 236 | * @mlock_count: Number of times this folio has been pinned by mlock(). |
7b230db3 MWO |
237 | * @mapping: The file this page belongs to, or refers to the anon_vma for |
238 | * anonymous memory. | |
239 | * @index: Offset within the file, in units of pages. For anonymous memory, | |
240 | * this is the index from the beginning of the mmap. | |
241 | * @private: Filesystem per-folio data (see folio_attach_private()). | |
242 | * Used for swp_entry_t if folio_test_swapcache(). | |
243 | * @_mapcount: Do not access this member directly. Use folio_mapcount() to | |
244 | * find out how many times this folio is mapped by userspace. | |
245 | * @_refcount: Do not access this member directly. Use folio_ref_count() | |
246 | * to find how many references there are to this folio. | |
247 | * @memcg_data: Memory Control Group data. | |
379708ff MWO |
248 | * @_flags_1: For large folios, additional page flags. |
249 | * @__head: Points to the folio. Do not use. | |
250 | * @_folio_dtor: Which destructor to use for this folio. | |
251 | * @_folio_order: Do not use directly, call folio_order(). | |
252 | * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). | |
253 | * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). | |
254 | * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). | |
7b230db3 MWO |
255 | * |
256 | * A folio is a physically, virtually and logically contiguous set | |
257 | * of bytes. It is a power-of-two in size, and it is aligned to that | |
258 | * same power-of-two. It is at least as large as %PAGE_SIZE. If it is | |
259 | * in the page cache, it is at a file offset which is a multiple of that | |
260 | * power-of-two. It may be mapped into userspace at an address which is | |
261 | * at an arbitrary page offset, but its kernel virtual address is aligned | |
262 | * to its size. | |
263 | */ | |
264 | struct folio { | |
265 | /* private: don't document the anon union */ | |
266 | union { | |
267 | struct { | |
268 | /* public: */ | |
269 | unsigned long flags; | |
07ca7606 HD |
270 | union { |
271 | struct list_head lru; | |
334f6f53 | 272 | /* private: avoid cluttering the output */ |
07ca7606 HD |
273 | struct { |
274 | void *__filler; | |
334f6f53 | 275 | /* public: */ |
07ca7606 | 276 | unsigned int mlock_count; |
334f6f53 | 277 | /* private: */ |
07ca7606 | 278 | }; |
334f6f53 | 279 | /* public: */ |
07ca7606 | 280 | }; |
7b230db3 MWO |
281 | struct address_space *mapping; |
282 | pgoff_t index; | |
283 | void *private; | |
284 | atomic_t _mapcount; | |
285 | atomic_t _refcount; | |
286 | #ifdef CONFIG_MEMCG | |
287 | unsigned long memcg_data; | |
288 | #endif | |
289 | /* private: the union with struct page is transitional */ | |
290 | }; | |
291 | struct page page; | |
292 | }; | |
379708ff MWO |
293 | unsigned long _flags_1; |
294 | unsigned long __head; | |
295 | unsigned char _folio_dtor; | |
296 | unsigned char _folio_order; | |
297 | atomic_t _total_mapcount; | |
298 | atomic_t _pincount; | |
299 | #ifdef CONFIG_64BIT | |
300 | unsigned int _folio_nr_pages; | |
301 | #endif | |
7b230db3 MWO |
302 | }; |
303 | ||
7b230db3 MWO |
304 | #define FOLIO_MATCH(pg, fl) \ |
305 | static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) | |
306 | FOLIO_MATCH(flags, flags); | |
307 | FOLIO_MATCH(lru, lru); | |
536f4217 | 308 | FOLIO_MATCH(mapping, mapping); |
7b230db3 MWO |
309 | FOLIO_MATCH(compound_head, lru); |
310 | FOLIO_MATCH(index, index); | |
311 | FOLIO_MATCH(private, private); | |
312 | FOLIO_MATCH(_mapcount, _mapcount); | |
313 | FOLIO_MATCH(_refcount, _refcount); | |
314 | #ifdef CONFIG_MEMCG | |
315 | FOLIO_MATCH(memcg_data, memcg_data); | |
316 | #endif | |
317 | #undef FOLIO_MATCH | |
379708ff MWO |
318 | #define FOLIO_MATCH(pg, fl) \ |
319 | static_assert(offsetof(struct folio, fl) == \ | |
320 | offsetof(struct page, pg) + sizeof(struct page)) | |
321 | FOLIO_MATCH(flags, _flags_1); | |
322 | FOLIO_MATCH(compound_head, __head); | |
323 | FOLIO_MATCH(compound_dtor, _folio_dtor); | |
324 | FOLIO_MATCH(compound_order, _folio_order); | |
325 | FOLIO_MATCH(compound_mapcount, _total_mapcount); | |
326 | FOLIO_MATCH(compound_pincount, _pincount); | |
327 | #ifdef CONFIG_64BIT | |
328 | FOLIO_MATCH(compound_nr, _folio_nr_pages); | |
329 | #endif | |
330 | #undef FOLIO_MATCH | |
7b230db3 | 331 | |
dd10ab04 MWO |
332 | static inline atomic_t *folio_mapcount_ptr(struct folio *folio) |
333 | { | |
334 | struct page *tail = &folio->page + 1; | |
335 | return &tail->compound_mapcount; | |
336 | } | |
337 | ||
169226f7 YS |
338 | static inline atomic_t *compound_mapcount_ptr(struct page *page) |
339 | { | |
340 | return &page[1].compound_mapcount; | |
341 | } | |
342 | ||
47e29d32 JH |
343 | static inline atomic_t *compound_pincount_ptr(struct page *page) |
344 | { | |
5232c63f | 345 | return &page[1].compound_pincount; |
47e29d32 JH |
346 | } |
347 | ||
d1402fc7 LG |
348 | /* |
349 | * Used for sizing the vmemmap region on some architectures | |
350 | */ | |
351 | #define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page))) | |
352 | ||
b63ae8ca AD |
353 | #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) |
354 | #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) | |
355 | ||
85d0a2ed MWO |
356 | /* |
357 | * page_private can be used on tail pages. However, PagePrivate is only | |
358 | * checked by the VM on the head page. So page_private on the tail pages | |
359 | * should be used for data that's ancillary to the head page (eg attaching | |
360 | * buffer heads to tail pages after attaching buffer heads to the head page) | |
361 | */ | |
b03641af | 362 | #define page_private(page) ((page)->private) |
60e65a6f GJ |
363 | |
364 | static inline void set_page_private(struct page *page, unsigned long private) | |
365 | { | |
366 | page->private = private; | |
367 | } | |
b03641af | 368 | |
85d0a2ed MWO |
369 | static inline void *folio_get_private(struct folio *folio) |
370 | { | |
371 | return folio->private; | |
372 | } | |
373 | ||
b63ae8ca AD |
374 | struct page_frag_cache { |
375 | void * va; | |
376 | #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) | |
377 | __u16 offset; | |
378 | __u16 size; | |
379 | #else | |
380 | __u32 offset; | |
381 | #endif | |
382 | /* we maintain a pagecount bias, so that we dont dirty cache line | |
0139aa7b | 383 | * containing page->_refcount every time we allocate a fragment. |
b63ae8ca AD |
384 | */ |
385 | unsigned int pagecnt_bias; | |
386 | bool pfmemalloc; | |
387 | }; | |
388 | ||
64b990d2 | 389 | typedef unsigned long vm_flags_t; |
ca16d140 | 390 | |
8feae131 DH |
391 | /* |
392 | * A region containing a mapping of a non-memory backed file under NOMMU | |
393 | * conditions. These are held in a global tree and are pinned by the VMAs that | |
394 | * map parts of them. | |
395 | */ | |
396 | struct vm_region { | |
397 | struct rb_node vm_rb; /* link in global region tree */ | |
ca16d140 | 398 | vm_flags_t vm_flags; /* VMA vm_flags */ |
8feae131 DH |
399 | unsigned long vm_start; /* start address of region */ |
400 | unsigned long vm_end; /* region initialised to here */ | |
dd8632a1 | 401 | unsigned long vm_top; /* region allocated to here */ |
8feae131 DH |
402 | unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ |
403 | struct file *vm_file; /* the backing file or NULL */ | |
404 | ||
1e2ae599 | 405 | int vm_usage; /* region usage count (access under nommu_region_sem) */ |
cfe79c00 MF |
406 | bool vm_icache_flushed : 1; /* true if the icache has been flushed for |
407 | * this region */ | |
8feae131 DH |
408 | }; |
409 | ||
745f234b AA |
410 | #ifdef CONFIG_USERFAULTFD |
411 | #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, }) | |
412 | struct vm_userfaultfd_ctx { | |
413 | struct userfaultfd_ctx *ctx; | |
414 | }; | |
415 | #else /* CONFIG_USERFAULTFD */ | |
416 | #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {}) | |
417 | struct vm_userfaultfd_ctx {}; | |
418 | #endif /* CONFIG_USERFAULTFD */ | |
419 | ||
78db3412 SB |
420 | struct anon_vma_name { |
421 | struct kref kref; | |
422 | /* The name needs to be at the end because it is dynamically sized. */ | |
423 | char name[]; | |
424 | }; | |
425 | ||
c92ff1bd | 426 | /* |
552657b7 | 427 | * This struct describes a virtual memory area. There is one of these |
428 | * per VM-area/task. A VM area is any part of the process virtual memory | |
c92ff1bd MS |
429 | * space that has a special rule for the page-fault handlers (ie a shared |
430 | * library, the executable area etc). | |
431 | */ | |
432 | struct vm_area_struct { | |
e4c6bfd2 RR |
433 | /* The first cache line has the info for VMA tree walking. */ |
434 | ||
c92ff1bd MS |
435 | unsigned long vm_start; /* Our start address within vm_mm. */ |
436 | unsigned long vm_end; /* The first byte after our end address | |
437 | within vm_mm. */ | |
438 | ||
e4c6bfd2 | 439 | struct mm_struct *vm_mm; /* The address space we belong to. */ |
574c5b3d TH |
440 | |
441 | /* | |
442 | * Access permissions of this VMA. | |
443 | * See vmf_insert_mixed_prot() for discussion. | |
444 | */ | |
445 | pgprot_t vm_page_prot; | |
e4c6bfd2 RR |
446 | unsigned long vm_flags; /* Flags, see mm.h. */ |
447 | ||
c92ff1bd MS |
448 | /* |
449 | * For areas with an address space and backing store, | |
27ba0644 | 450 | * linkage into the address_space->i_mmap interval tree. |
9a10064f CC |
451 | * |
452 | * For private anonymous mappings, a pointer to a null terminated string | |
453 | * containing the name given to the vma, or NULL if unnamed. | |
c92ff1bd | 454 | */ |
9a10064f CC |
455 | |
456 | union { | |
457 | struct { | |
458 | struct rb_node rb; | |
459 | unsigned long rb_subtree_last; | |
460 | } shared; | |
5c26f6ac SB |
461 | /* |
462 | * Serialized by mmap_sem. Never use directly because it is | |
463 | * valid only when vm_file is NULL. Use anon_vma_name instead. | |
464 | */ | |
78db3412 | 465 | struct anon_vma_name *anon_name; |
9a10064f | 466 | }; |
c92ff1bd MS |
467 | |
468 | /* | |
469 | * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma | |
470 | * list, after a COW of one of the file pages. A MAP_SHARED vma | |
471 | * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack | |
472 | * or brk vma (with NULL file) can only be in an anon_vma list. | |
473 | */ | |
c1e8d7c6 | 474 | struct list_head anon_vma_chain; /* Serialized by mmap_lock & |
5beb4930 | 475 | * page_table_lock */ |
c92ff1bd MS |
476 | struct anon_vma *anon_vma; /* Serialized by page_table_lock */ |
477 | ||
478 | /* Function pointers to deal with this struct. */ | |
f0f37e2f | 479 | const struct vm_operations_struct *vm_ops; |
c92ff1bd MS |
480 | |
481 | /* Information about our backing store: */ | |
482 | unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE | |
ea1754a0 | 483 | units */ |
c92ff1bd MS |
484 | struct file * vm_file; /* File we map to (can be NULL). */ |
485 | void * vm_private_data; /* was vm_pte (shared mem) */ | |
c92ff1bd | 486 | |
219f8a2e | 487 | #ifdef CONFIG_SWAP |
ec560175 | 488 | atomic_long_t swap_readahead_info; |
219f8a2e | 489 | #endif |
c92ff1bd | 490 | #ifndef CONFIG_MMU |
8feae131 | 491 | struct vm_region *vm_region; /* NOMMU mapping region */ |
c92ff1bd MS |
492 | #endif |
493 | #ifdef CONFIG_NUMA | |
494 | struct mempolicy *vm_policy; /* NUMA policy for the VMA */ | |
495 | #endif | |
745f234b | 496 | struct vm_userfaultfd_ctx vm_userfaultfd_ctx; |
3859a271 | 497 | } __randomize_layout; |
c92ff1bd | 498 | |
db446a08 | 499 | struct kioctx_table; |
c92ff1bd | 500 | struct mm_struct { |
c1a2f7f0 | 501 | struct { |
d4af56c5 | 502 | struct maple_tree mm_mt; |
efc1a3b1 | 503 | #ifdef CONFIG_MMU |
c1a2f7f0 | 504 | unsigned long (*get_unmapped_area) (struct file *filp, |
c92ff1bd MS |
505 | unsigned long addr, unsigned long len, |
506 | unsigned long pgoff, unsigned long flags); | |
efc1a3b1 | 507 | #endif |
c1a2f7f0 RR |
508 | unsigned long mmap_base; /* base of mmap area */ |
509 | unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ | |
1b028f78 | 510 | #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES |
041711ce | 511 | /* Base addresses for compatible mmap() */ |
c1a2f7f0 RR |
512 | unsigned long mmap_compat_base; |
513 | unsigned long mmap_compat_legacy_base; | |
1b028f78 | 514 | #endif |
c1a2f7f0 | 515 | unsigned long task_size; /* size of task vm space */ |
c1a2f7f0 RR |
516 | pgd_t * pgd; |
517 | ||
227a4aad MD |
518 | #ifdef CONFIG_MEMBARRIER |
519 | /** | |
520 | * @membarrier_state: Flags controlling membarrier behavior. | |
521 | * | |
522 | * This field is close to @pgd to hopefully fit in the same | |
523 | * cache-line, which needs to be touched by switch_mm(). | |
524 | */ | |
525 | atomic_t membarrier_state; | |
526 | #endif | |
527 | ||
c1a2f7f0 RR |
528 | /** |
529 | * @mm_users: The number of users including userspace. | |
530 | * | |
531 | * Use mmget()/mmget_not_zero()/mmput() to modify. When this | |
532 | * drops to 0 (i.e. when the task exits and there are no other | |
533 | * temporary reference holders), we also release a reference on | |
534 | * @mm_count (which may then free the &struct mm_struct if | |
535 | * @mm_count also drops to 0). | |
536 | */ | |
537 | atomic_t mm_users; | |
538 | ||
539 | /** | |
540 | * @mm_count: The number of references to &struct mm_struct | |
541 | * (@mm_users count as 1). | |
542 | * | |
543 | * Use mmgrab()/mmdrop() to modify. When this drops to 0, the | |
544 | * &struct mm_struct is freed. | |
545 | */ | |
546 | atomic_t mm_count; | |
b279ddc3 | 547 | |
c4812909 | 548 | #ifdef CONFIG_MMU |
c1a2f7f0 | 549 | atomic_long_t pgtables_bytes; /* PTE page table pages */ |
5a3fbef3 | 550 | #endif |
c1a2f7f0 | 551 | int map_count; /* number of VMAs */ |
481b4bb5 | 552 | |
c1a2f7f0 RR |
553 | spinlock_t page_table_lock; /* Protects page tables and some |
554 | * counters | |
555 | */ | |
2e302543 FT |
556 | /* |
557 | * With some kernel config, the current mmap_lock's offset | |
558 | * inside 'mm_struct' is at 0x120, which is very optimal, as | |
559 | * its two hot fields 'count' and 'owner' sit in 2 different | |
560 | * cachelines, and when mmap_lock is highly contended, both | |
561 | * of the 2 fields will be accessed frequently, current layout | |
562 | * will help to reduce cache bouncing. | |
563 | * | |
564 | * So please be careful with adding new fields before | |
565 | * mmap_lock, which can easily push the 2 fields into one | |
566 | * cacheline. | |
567 | */ | |
da1c55f1 | 568 | struct rw_semaphore mmap_lock; |
c92ff1bd | 569 | |
c1a2f7f0 RR |
570 | struct list_head mmlist; /* List of maybe swapped mm's. These |
571 | * are globally strung together off | |
572 | * init_mm.mmlist, and are protected | |
573 | * by mmlist_lock | |
574 | */ | |
c92ff1bd | 575 | |
c92ff1bd | 576 | |
c1a2f7f0 RR |
577 | unsigned long hiwater_rss; /* High-watermark of RSS usage */ |
578 | unsigned long hiwater_vm; /* High-water virtual memory usage */ | |
c92ff1bd | 579 | |
c1a2f7f0 RR |
580 | unsigned long total_vm; /* Total pages mapped */ |
581 | unsigned long locked_vm; /* Pages that have PG_mlocked set */ | |
70f8a3ca | 582 | atomic64_t pinned_vm; /* Refcount permanently increased */ |
c1a2f7f0 RR |
583 | unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ |
584 | unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ | |
585 | unsigned long stack_vm; /* VM_STACK */ | |
586 | unsigned long def_flags; | |
88aa7cc6 | 587 | |
2e302543 FT |
588 | /** |
589 | * @write_protect_seq: Locked when any thread is write | |
590 | * protecting pages mapped by this mm to enforce a later COW, | |
591 | * for instance during page table copying for fork(). | |
592 | */ | |
593 | seqcount_t write_protect_seq; | |
594 | ||
c1a2f7f0 | 595 | spinlock_t arg_lock; /* protect the below fields */ |
2e302543 | 596 | |
c1a2f7f0 RR |
597 | unsigned long start_code, end_code, start_data, end_data; |
598 | unsigned long start_brk, brk, start_stack; | |
599 | unsigned long arg_start, arg_end, env_start, env_end; | |
c92ff1bd | 600 | |
c1a2f7f0 | 601 | unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ |
c92ff1bd | 602 | |
c1a2f7f0 RR |
603 | /* |
604 | * Special counters, in some configurations protected by the | |
605 | * page_table_lock, in other configurations by being atomic. | |
606 | */ | |
607 | struct mm_rss_stat rss_stat; | |
801460d0 | 608 | |
c1a2f7f0 | 609 | struct linux_binfmt *binfmt; |
6345d24d | 610 | |
c1a2f7f0 RR |
611 | /* Architecture-specific MM context */ |
612 | mm_context_t context; | |
c92ff1bd | 613 | |
c1a2f7f0 | 614 | unsigned long flags; /* Must use atomic bitops to access */ |
c92ff1bd | 615 | |
858f0993 | 616 | #ifdef CONFIG_AIO |
c1a2f7f0 RR |
617 | spinlock_t ioctx_lock; |
618 | struct kioctx_table __rcu *ioctx_table; | |
858f0993 | 619 | #endif |
f98bafa0 | 620 | #ifdef CONFIG_MEMCG |
c1a2f7f0 RR |
621 | /* |
622 | * "owner" points to a task that is regarded as the canonical | |
623 | * user/owner of this mm. All of the following must be true in | |
624 | * order for it to be changed: | |
625 | * | |
626 | * current == mm->owner | |
627 | * current->mm != mm | |
628 | * new_owner->mm == mm | |
629 | * new_owner->alloc_lock is held | |
630 | */ | |
631 | struct task_struct __rcu *owner; | |
78fb7466 | 632 | #endif |
c1a2f7f0 | 633 | struct user_namespace *user_ns; |
925d1c40 | 634 | |
c1a2f7f0 RR |
635 | /* store ref to file /proc/<pid>/exe symlink points to */ |
636 | struct file __rcu *exe_file; | |
cddb8a5c | 637 | #ifdef CONFIG_MMU_NOTIFIER |
984cfe4e | 638 | struct mmu_notifier_subscriptions *notifier_subscriptions; |
e7a00c45 | 639 | #endif |
e009bb30 | 640 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS |
c1a2f7f0 | 641 | pgtable_t pmd_huge_pte; /* protected by page_table_lock */ |
cbee9f88 PZ |
642 | #endif |
643 | #ifdef CONFIG_NUMA_BALANCING | |
c1a2f7f0 | 644 | /* |
7014887a DH |
645 | * numa_next_scan is the next time that PTEs will be remapped |
646 | * PROT_NONE to trigger NUMA hinting faults; such faults gather | |
647 | * statistics and migrate pages to new nodes if necessary. | |
c1a2f7f0 RR |
648 | */ |
649 | unsigned long numa_next_scan; | |
cbee9f88 | 650 | |
7014887a | 651 | /* Restart point for scanning and remapping PTEs. */ |
c1a2f7f0 | 652 | unsigned long numa_scan_offset; |
6e5fb223 | 653 | |
7014887a | 654 | /* numa_scan_seq prevents two threads remapping PTEs. */ |
c1a2f7f0 | 655 | int numa_scan_seq; |
20841405 | 656 | #endif |
c1a2f7f0 RR |
657 | /* |
658 | * An operation with batched TLB flushing is going on. Anything | |
659 | * that can move process memory needs to flush the TLB when | |
7014887a | 660 | * moving a PROT_NONE mapped page. |
c1a2f7f0 RR |
661 | */ |
662 | atomic_t tlb_flush_pending; | |
3ea27719 | 663 | #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH |
c1a2f7f0 | 664 | /* See flush_tlb_batched_pending() */ |
5ee2fa2f | 665 | atomic_t tlb_flush_batched; |
6345d24d | 666 | #endif |
c1a2f7f0 | 667 | struct uprobes_state uprobes_state; |
8d491de6 TG |
668 | #ifdef CONFIG_PREEMPT_RT |
669 | struct rcu_head delayed_drop; | |
670 | #endif | |
5d317b2b | 671 | #ifdef CONFIG_HUGETLB_PAGE |
c1a2f7f0 | 672 | atomic_long_t hugetlb_usage; |
5d317b2b | 673 | #endif |
c1a2f7f0 | 674 | struct work_struct async_put_work; |
52ad9bc6 | 675 | |
7a853c2d | 676 | #ifdef CONFIG_IOMMU_SVA |
52ad9bc6 | 677 | u32 pasid; |
76093853 | 678 | #endif |
679 | #ifdef CONFIG_KSM | |
680 | /* | |
681 | * Represent how many pages of this process are involved in KSM | |
682 | * merging. | |
683 | */ | |
684 | unsigned long ksm_merging_pages; | |
cb4df4ca | 685 | /* |
686 | * Represent how many pages are checked for ksm merging | |
687 | * including merged and not merged. | |
688 | */ | |
689 | unsigned long ksm_rmap_items; | |
52ad9bc6 | 690 | #endif |
bd74fdae YZ |
691 | #ifdef CONFIG_LRU_GEN |
692 | struct { | |
693 | /* this mm_struct is on lru_gen_mm_list */ | |
694 | struct list_head list; | |
695 | /* | |
696 | * Set when switching to this mm_struct, as a hint of | |
697 | * whether it has been used since the last time per-node | |
698 | * page table walkers cleared the corresponding bits. | |
699 | */ | |
700 | unsigned long bitmap; | |
701 | #ifdef CONFIG_MEMCG | |
702 | /* points to the memcg of "owner" above */ | |
703 | struct mem_cgroup *memcg; | |
704 | #endif | |
705 | } lru_gen; | |
706 | #endif /* CONFIG_LRU_GEN */ | |
c1a2f7f0 RR |
707 | } __randomize_layout; |
708 | ||
709 | /* | |
710 | * The mm_cpumask needs to be at the end of mm_struct, because it | |
711 | * is dynamically sized based on nr_cpu_ids. | |
712 | */ | |
713 | unsigned long cpu_bitmap[]; | |
714 | }; | |
c92ff1bd | 715 | |
d4af56c5 | 716 | #define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) |
abe722a1 IM |
717 | extern struct mm_struct init_mm; |
718 | ||
c1a2f7f0 | 719 | /* Pointer magic because the dynamic array size confuses some compilers. */ |
6345d24d LT |
720 | static inline void mm_init_cpumask(struct mm_struct *mm) |
721 | { | |
c1a2f7f0 RR |
722 | unsigned long cpu_bitmap = (unsigned long)mm; |
723 | ||
724 | cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap); | |
725 | cpumask_clear((struct cpumask *)cpu_bitmap); | |
6345d24d LT |
726 | } |
727 | ||
45e575ab | 728 | /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ |
de03c72c KM |
729 | static inline cpumask_t *mm_cpumask(struct mm_struct *mm) |
730 | { | |
c1a2f7f0 | 731 | return (struct cpumask *)&mm->cpu_bitmap; |
de03c72c | 732 | } |
45e575ab | 733 | |
bd74fdae YZ |
734 | #ifdef CONFIG_LRU_GEN |
735 | ||
736 | struct lru_gen_mm_list { | |
737 | /* mm_struct list for page table walkers */ | |
738 | struct list_head fifo; | |
739 | /* protects the list above */ | |
740 | spinlock_t lock; | |
741 | }; | |
742 | ||
743 | void lru_gen_add_mm(struct mm_struct *mm); | |
744 | void lru_gen_del_mm(struct mm_struct *mm); | |
745 | #ifdef CONFIG_MEMCG | |
746 | void lru_gen_migrate_mm(struct mm_struct *mm); | |
747 | #endif | |
748 | ||
749 | static inline void lru_gen_init_mm(struct mm_struct *mm) | |
750 | { | |
751 | INIT_LIST_HEAD(&mm->lru_gen.list); | |
752 | mm->lru_gen.bitmap = 0; | |
753 | #ifdef CONFIG_MEMCG | |
754 | mm->lru_gen.memcg = NULL; | |
755 | #endif | |
756 | } | |
757 | ||
758 | static inline void lru_gen_use_mm(struct mm_struct *mm) | |
759 | { | |
760 | /* | |
761 | * When the bitmap is set, page reclaim knows this mm_struct has been | |
762 | * used since the last time it cleared the bitmap. So it might be worth | |
763 | * walking the page tables of this mm_struct to clear the accessed bit. | |
764 | */ | |
765 | WRITE_ONCE(mm->lru_gen.bitmap, -1); | |
766 | } | |
767 | ||
768 | #else /* !CONFIG_LRU_GEN */ | |
769 | ||
770 | static inline void lru_gen_add_mm(struct mm_struct *mm) | |
771 | { | |
772 | } | |
773 | ||
774 | static inline void lru_gen_del_mm(struct mm_struct *mm) | |
775 | { | |
776 | } | |
777 | ||
778 | #ifdef CONFIG_MEMCG | |
779 | static inline void lru_gen_migrate_mm(struct mm_struct *mm) | |
780 | { | |
781 | } | |
782 | #endif | |
783 | ||
784 | static inline void lru_gen_init_mm(struct mm_struct *mm) | |
785 | { | |
786 | } | |
787 | ||
788 | static inline void lru_gen_use_mm(struct mm_struct *mm) | |
789 | { | |
790 | } | |
791 | ||
792 | #endif /* CONFIG_LRU_GEN */ | |
793 | ||
f39af059 MWO |
794 | struct vma_iterator { |
795 | struct ma_state mas; | |
796 | }; | |
797 | ||
798 | #define VMA_ITERATOR(name, __mm, __addr) \ | |
799 | struct vma_iterator name = { \ | |
800 | .mas = { \ | |
801 | .tree = &(__mm)->mm_mt, \ | |
802 | .index = __addr, \ | |
803 | .node = MAS_START, \ | |
804 | }, \ | |
805 | } | |
806 | ||
807 | static inline void vma_iter_init(struct vma_iterator *vmi, | |
808 | struct mm_struct *mm, unsigned long addr) | |
809 | { | |
810 | vmi->mas.tree = &mm->mm_mt; | |
811 | vmi->mas.index = addr; | |
812 | vmi->mas.node = MAS_START; | |
813 | } | |
814 | ||
56236a59 | 815 | struct mmu_gather; |
a72afd87 | 816 | extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); |
d8b45053 | 817 | extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); |
ae8eba8b | 818 | extern void tlb_finish_mmu(struct mmu_gather *tlb); |
56236a59 | 819 | |
f872f540 AL |
820 | struct vm_fault; |
821 | ||
3d353901 SJ |
822 | /** |
823 | * typedef vm_fault_t - Return type for page fault handlers. | |
824 | * | |
825 | * Page fault handlers return a bitmask of %VM_FAULT values. | |
826 | */ | |
827 | typedef __bitwise unsigned int vm_fault_t; | |
828 | ||
829 | /** | |
830 | * enum vm_fault_reason - Page fault handlers return a bitmask of | |
831 | * these values to tell the core VM what happened when handling the | |
832 | * fault. Used to decide whether a process gets delivered SIGBUS or | |
833 | * just gets major/minor fault counters bumped up. | |
834 | * | |
835 | * @VM_FAULT_OOM: Out Of Memory | |
836 | * @VM_FAULT_SIGBUS: Bad access | |
837 | * @VM_FAULT_MAJOR: Page read from storage | |
838 | * @VM_FAULT_WRITE: Special case for get_user_pages | |
839 | * @VM_FAULT_HWPOISON: Hit poisoned small page | |
840 | * @VM_FAULT_HWPOISON_LARGE: Hit poisoned large page. Index encoded | |
841 | * in upper bits | |
842 | * @VM_FAULT_SIGSEGV: segmentation fault | |
843 | * @VM_FAULT_NOPAGE: ->fault installed the pte, not return page | |
844 | * @VM_FAULT_LOCKED: ->fault locked the returned page | |
845 | * @VM_FAULT_RETRY: ->fault blocked, must retry | |
846 | * @VM_FAULT_FALLBACK: huge page fault failed, fall back to small | |
847 | * @VM_FAULT_DONE_COW: ->fault has fully handled COW | |
848 | * @VM_FAULT_NEEDDSYNC: ->fault did not modify page tables and needs | |
849 | * fsync() to complete (for synchronous page faults | |
850 | * in DAX) | |
d9272525 | 851 | * @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released |
3d353901 SJ |
852 | * @VM_FAULT_HINDEX_MASK: mask HINDEX value |
853 | * | |
854 | */ | |
855 | enum vm_fault_reason { | |
856 | VM_FAULT_OOM = (__force vm_fault_t)0x000001, | |
857 | VM_FAULT_SIGBUS = (__force vm_fault_t)0x000002, | |
858 | VM_FAULT_MAJOR = (__force vm_fault_t)0x000004, | |
859 | VM_FAULT_WRITE = (__force vm_fault_t)0x000008, | |
860 | VM_FAULT_HWPOISON = (__force vm_fault_t)0x000010, | |
861 | VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020, | |
862 | VM_FAULT_SIGSEGV = (__force vm_fault_t)0x000040, | |
863 | VM_FAULT_NOPAGE = (__force vm_fault_t)0x000100, | |
864 | VM_FAULT_LOCKED = (__force vm_fault_t)0x000200, | |
865 | VM_FAULT_RETRY = (__force vm_fault_t)0x000400, | |
866 | VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800, | |
867 | VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, | |
868 | VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, | |
d9272525 | 869 | VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000, |
3d353901 SJ |
870 | VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000, |
871 | }; | |
872 | ||
873 | /* Encode hstate index for a hwpoisoned large page */ | |
874 | #define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16)) | |
fcae96ff | 875 | #define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf) |
3d353901 SJ |
876 | |
877 | #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \ | |
878 | VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \ | |
879 | VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK) | |
880 | ||
881 | #define VM_FAULT_RESULT_TRACE \ | |
882 | { VM_FAULT_OOM, "OOM" }, \ | |
883 | { VM_FAULT_SIGBUS, "SIGBUS" }, \ | |
884 | { VM_FAULT_MAJOR, "MAJOR" }, \ | |
885 | { VM_FAULT_WRITE, "WRITE" }, \ | |
886 | { VM_FAULT_HWPOISON, "HWPOISON" }, \ | |
887 | { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \ | |
888 | { VM_FAULT_SIGSEGV, "SIGSEGV" }, \ | |
889 | { VM_FAULT_NOPAGE, "NOPAGE" }, \ | |
890 | { VM_FAULT_LOCKED, "LOCKED" }, \ | |
891 | { VM_FAULT_RETRY, "RETRY" }, \ | |
892 | { VM_FAULT_FALLBACK, "FALLBACK" }, \ | |
893 | { VM_FAULT_DONE_COW, "DONE_COW" }, \ | |
894 | { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" } | |
895 | ||
f872f540 AL |
896 | struct vm_special_mapping { |
897 | const char *name; /* The name, e.g. "[vdso]". */ | |
898 | ||
899 | /* | |
900 | * If .fault is not provided, this points to a | |
901 | * NULL-terminated array of pages that back the special mapping. | |
902 | * | |
903 | * This must not be NULL unless .fault is provided. | |
904 | */ | |
a62c34bd | 905 | struct page **pages; |
f872f540 AL |
906 | |
907 | /* | |
908 | * If non-NULL, then this is called to resolve page faults | |
909 | * on the special mapping. If used, .pages is not checked. | |
910 | */ | |
b3ec9f33 SJ |
911 | vm_fault_t (*fault)(const struct vm_special_mapping *sm, |
912 | struct vm_area_struct *vma, | |
913 | struct vm_fault *vmf); | |
b059a453 DS |
914 | |
915 | int (*mremap)(const struct vm_special_mapping *sm, | |
916 | struct vm_area_struct *new_vma); | |
a62c34bd AL |
917 | }; |
918 | ||
d17d8f9d DH |
919 | enum tlb_flush_reason { |
920 | TLB_FLUSH_ON_TASK_SWITCH, | |
921 | TLB_REMOTE_SHOOTDOWN, | |
922 | TLB_LOCAL_SHOOTDOWN, | |
923 | TLB_LOCAL_MM_SHOOTDOWN, | |
5b74283a | 924 | TLB_REMOTE_SEND_IPI, |
d17d8f9d DH |
925 | NR_TLB_FLUSH_REASONS, |
926 | }; | |
927 | ||
bd6dace7 TH |
928 | /* |
929 | * A swap entry has to fit into a "unsigned long", as the entry is hidden | |
930 | * in the "index" field of the swapper address space. | |
931 | */ | |
932 | typedef struct { | |
933 | unsigned long val; | |
934 | } swp_entry_t; | |
935 | ||
36090def AB |
936 | /** |
937 | * enum fault_flag - Fault flag definitions. | |
938 | * @FAULT_FLAG_WRITE: Fault was a write fault. | |
939 | * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. | |
940 | * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. | |
941 | * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying. | |
942 | * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region. | |
943 | * @FAULT_FLAG_TRIED: The fault has been tried once. | |
944 | * @FAULT_FLAG_USER: The fault originated in userspace. | |
945 | * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. | |
946 | * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. | |
947 | * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. | |
c89357e2 DH |
948 | * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to unshare (and mark |
949 | * exclusive) a possibly shared anonymous page that is | |
950 | * mapped R/O. | |
f46f2ade PX |
951 | * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached. |
952 | * We should only access orig_pte if this flag set. | |
36090def AB |
953 | * |
954 | * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify | |
955 | * whether we would allow page faults to retry by specifying these two | |
956 | * fault flags correctly. Currently there can be three legal combinations: | |
957 | * | |
958 | * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and | |
959 | * this is the first try | |
960 | * | |
961 | * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and | |
962 | * we've already tried at least once | |
963 | * | |
964 | * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry | |
965 | * | |
966 | * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never | |
967 | * be used. Note that page faults can be allowed to retry for multiple times, | |
968 | * in which case we'll have an initial fault with flags (a) then later on | |
969 | * continuous faults with flags (b). We should always try to detect pending | |
970 | * signals before a retry to make sure the continuous page faults can still be | |
971 | * interrupted if necessary. | |
c89357e2 DH |
972 | * |
973 | * The combination FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE is illegal. | |
974 | * FAULT_FLAG_UNSHARE is ignored and treated like an ordinary read fault when | |
975 | * no existing R/O-mapped anonymous page is encountered. | |
36090def AB |
976 | */ |
977 | enum fault_flag { | |
978 | FAULT_FLAG_WRITE = 1 << 0, | |
979 | FAULT_FLAG_MKWRITE = 1 << 1, | |
980 | FAULT_FLAG_ALLOW_RETRY = 1 << 2, | |
981 | FAULT_FLAG_RETRY_NOWAIT = 1 << 3, | |
982 | FAULT_FLAG_KILLABLE = 1 << 4, | |
983 | FAULT_FLAG_TRIED = 1 << 5, | |
984 | FAULT_FLAG_USER = 1 << 6, | |
985 | FAULT_FLAG_REMOTE = 1 << 7, | |
986 | FAULT_FLAG_INSTRUCTION = 1 << 8, | |
987 | FAULT_FLAG_INTERRUPTIBLE = 1 << 9, | |
c89357e2 | 988 | FAULT_FLAG_UNSHARE = 1 << 10, |
f46f2ade | 989 | FAULT_FLAG_ORIG_PTE_VALID = 1 << 11, |
36090def AB |
990 | }; |
991 | ||
05e90bd0 PX |
992 | typedef unsigned int __bitwise zap_flags_t; |
993 | ||
5b99cd0e | 994 | #endif /* _LINUX_MM_TYPES_H */ |