| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * DAMON Primitives for Virtual Address Spaces |
| 4 | * |
| 5 | * Author: SeongJae Park <sjpark@amazon.de> |
| 6 | */ |
| 7 | |
| 8 | #define pr_fmt(fmt) "damon-va: " fmt |
| 9 | |
| 10 | #include <asm-generic/mman-common.h> |
| 11 | #include <linux/highmem.h> |
| 12 | #include <linux/hugetlb.h> |
| 13 | #include <linux/mmu_notifier.h> |
| 14 | #include <linux/page_idle.h> |
| 15 | #include <linux/pagewalk.h> |
| 16 | #include <linux/sched/mm.h> |
| 17 | |
| 18 | #include "ops-common.h" |
| 19 | |
| 20 | #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST |
| 21 | #undef DAMON_MIN_REGION |
| 22 | #define DAMON_MIN_REGION 1 |
| 23 | #endif |
| 24 | |
| 25 | /* |
| 26 | * 't->pid' should be the pointer to the relevant 'struct pid' having reference |
| 27 | * count. Caller must put the returned task, unless it is NULL. |
| 28 | */ |
| 29 | static inline struct task_struct *damon_get_task_struct(struct damon_target *t) |
| 30 | { |
| 31 | return get_pid_task(t->pid, PIDTYPE_PID); |
| 32 | } |
| 33 | |
| 34 | /* |
| 35 | * Get the mm_struct of the given target |
| 36 | * |
| 37 | * Caller _must_ put the mm_struct after use, unless it is NULL. |
| 38 | * |
| 39 | * Returns the mm_struct of the target on success, NULL on failure |
| 40 | */ |
| 41 | static struct mm_struct *damon_get_mm(struct damon_target *t) |
| 42 | { |
| 43 | struct task_struct *task; |
| 44 | struct mm_struct *mm; |
| 45 | |
| 46 | task = damon_get_task_struct(t); |
| 47 | if (!task) |
| 48 | return NULL; |
| 49 | |
| 50 | mm = get_task_mm(task); |
| 51 | put_task_struct(task); |
| 52 | return mm; |
| 53 | } |
| 54 | |
| 55 | /* |
| 56 | * Functions for the initial monitoring target regions construction |
| 57 | */ |
| 58 | |
| 59 | /* |
| 60 | * Size-evenly split a region into 'nr_pieces' small regions |
| 61 | * |
| 62 | * Returns 0 on success, or negative error code otherwise. |
| 63 | */ |
| 64 | static int damon_va_evenly_split_region(struct damon_target *t, |
| 65 | struct damon_region *r, unsigned int nr_pieces) |
| 66 | { |
| 67 | unsigned long sz_orig, sz_piece, orig_end; |
| 68 | struct damon_region *n = NULL, *next; |
| 69 | unsigned long start; |
| 70 | |
| 71 | if (!r || !nr_pieces) |
| 72 | return -EINVAL; |
| 73 | |
| 74 | orig_end = r->ar.end; |
| 75 | sz_orig = damon_sz_region(r); |
| 76 | sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); |
| 77 | |
| 78 | if (!sz_piece) |
| 79 | return -EINVAL; |
| 80 | |
| 81 | r->ar.end = r->ar.start + sz_piece; |
| 82 | next = damon_next_region(r); |
| 83 | for (start = r->ar.end; start + sz_piece <= orig_end; |
| 84 | start += sz_piece) { |
| 85 | n = damon_new_region(start, start + sz_piece); |
| 86 | if (!n) |
| 87 | return -ENOMEM; |
| 88 | damon_insert_region(n, r, next, t); |
| 89 | r = n; |
| 90 | } |
| 91 | /* complement last region for possible rounding error */ |
| 92 | if (n) |
| 93 | n->ar.end = orig_end; |
| 94 | |
| 95 | return 0; |
| 96 | } |
| 97 | |
| 98 | static unsigned long sz_range(struct damon_addr_range *r) |
| 99 | { |
| 100 | return r->end - r->start; |
| 101 | } |
| 102 | |
| 103 | /* |
| 104 | * Find three regions separated by two biggest unmapped regions |
| 105 | * |
| 106 | * vma the head vma of the target address space |
| 107 | * regions an array of three address ranges that results will be saved |
| 108 | * |
| 109 | * This function receives an address space and finds three regions in it which |
| 110 | * separated by the two biggest unmapped regions in the space. Please refer to |
| 111 | * below comments of '__damon_va_init_regions()' function to know why this is |
| 112 | * necessary. |
| 113 | * |
| 114 | * Returns 0 if success, or negative error code otherwise. |
| 115 | */ |
| 116 | static int __damon_va_three_regions(struct mm_struct *mm, |
| 117 | struct damon_addr_range regions[3]) |
| 118 | { |
| 119 | struct damon_addr_range first_gap = {0}, second_gap = {0}; |
| 120 | VMA_ITERATOR(vmi, mm, 0); |
| 121 | struct vm_area_struct *vma, *prev = NULL; |
| 122 | unsigned long start; |
| 123 | |
| 124 | /* |
| 125 | * Find the two biggest gaps so that first_gap > second_gap > others. |
| 126 | * If this is too slow, it can be optimised to examine the maple |
| 127 | * tree gaps. |
| 128 | */ |
| 129 | for_each_vma(vmi, vma) { |
| 130 | unsigned long gap; |
| 131 | |
| 132 | if (!prev) { |
| 133 | start = vma->vm_start; |
| 134 | goto next; |
| 135 | } |
| 136 | gap = vma->vm_start - prev->vm_end; |
| 137 | |
| 138 | if (gap > sz_range(&first_gap)) { |
| 139 | second_gap = first_gap; |
| 140 | first_gap.start = prev->vm_end; |
| 141 | first_gap.end = vma->vm_start; |
| 142 | } else if (gap > sz_range(&second_gap)) { |
| 143 | second_gap.start = prev->vm_end; |
| 144 | second_gap.end = vma->vm_start; |
| 145 | } |
| 146 | next: |
| 147 | prev = vma; |
| 148 | } |
| 149 | |
| 150 | if (!sz_range(&second_gap) || !sz_range(&first_gap)) |
| 151 | return -EINVAL; |
| 152 | |
| 153 | /* Sort the two biggest gaps by address */ |
| 154 | if (first_gap.start > second_gap.start) |
| 155 | swap(first_gap, second_gap); |
| 156 | |
| 157 | /* Store the result */ |
| 158 | regions[0].start = ALIGN(start, DAMON_MIN_REGION); |
| 159 | regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); |
| 160 | regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); |
| 161 | regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); |
| 162 | regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); |
| 163 | regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION); |
| 164 | |
| 165 | return 0; |
| 166 | } |
| 167 | |
| 168 | /* |
| 169 | * Get the three regions in the given target (task) |
| 170 | * |
| 171 | * Returns 0 on success, negative error code otherwise. |
| 172 | */ |
| 173 | static int damon_va_three_regions(struct damon_target *t, |
| 174 | struct damon_addr_range regions[3]) |
| 175 | { |
| 176 | struct mm_struct *mm; |
| 177 | int rc; |
| 178 | |
| 179 | mm = damon_get_mm(t); |
| 180 | if (!mm) |
| 181 | return -EINVAL; |
| 182 | |
| 183 | mmap_read_lock(mm); |
| 184 | rc = __damon_va_three_regions(mm, regions); |
| 185 | mmap_read_unlock(mm); |
| 186 | |
| 187 | mmput(mm); |
| 188 | return rc; |
| 189 | } |
| 190 | |
| 191 | /* |
| 192 | * Initialize the monitoring target regions for the given target (task) |
| 193 | * |
| 194 | * t the given target |
| 195 | * |
| 196 | * Because only a number of small portions of the entire address space |
| 197 | * is actually mapped to the memory and accessed, monitoring the unmapped |
| 198 | * regions is wasteful. That said, because we can deal with small noises, |
| 199 | * tracking every mapping is not strictly required but could even incur a high |
| 200 | * overhead if the mapping frequently changes or the number of mappings is |
| 201 | * high. The adaptive regions adjustment mechanism will further help to deal |
| 202 | * with the noise by simply identifying the unmapped areas as a region that |
| 203 | * has no access. Moreover, applying the real mappings that would have many |
| 204 | * unmapped areas inside will make the adaptive mechanism quite complex. That |
| 205 | * said, too huge unmapped areas inside the monitoring target should be removed |
| 206 | * to not take the time for the adaptive mechanism. |
| 207 | * |
| 208 | * For the reason, we convert the complex mappings to three distinct regions |
| 209 | * that cover every mapped area of the address space. Also the two gaps |
| 210 | * between the three regions are the two biggest unmapped areas in the given |
| 211 | * address space. In detail, this function first identifies the start and the |
| 212 | * end of the mappings and the two biggest unmapped areas of the address space. |
| 213 | * Then, it constructs the three regions as below: |
| 214 | * |
| 215 | * [mappings[0]->start, big_two_unmapped_areas[0]->start) |
| 216 | * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) |
| 217 | * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) |
| 218 | * |
| 219 | * As usual memory map of processes is as below, the gap between the heap and |
| 220 | * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed |
| 221 | * region and the stack will be two biggest unmapped regions. Because these |
| 222 | * gaps are exceptionally huge areas in usual address space, excluding these |
| 223 | * two biggest unmapped regions will be sufficient to make a trade-off. |
| 224 | * |
| 225 | * <heap> |
| 226 | * <BIG UNMAPPED REGION 1> |
| 227 | * <uppermost mmap()-ed region> |
| 228 | * (other mmap()-ed regions and small unmapped regions) |
| 229 | * <lowermost mmap()-ed region> |
| 230 | * <BIG UNMAPPED REGION 2> |
| 231 | * <stack> |
| 232 | */ |
| 233 | static void __damon_va_init_regions(struct damon_ctx *ctx, |
| 234 | struct damon_target *t) |
| 235 | { |
| 236 | struct damon_target *ti; |
| 237 | struct damon_region *r; |
| 238 | struct damon_addr_range regions[3]; |
| 239 | unsigned long sz = 0, nr_pieces; |
| 240 | int i, tidx = 0; |
| 241 | |
| 242 | if (damon_va_three_regions(t, regions)) { |
| 243 | damon_for_each_target(ti, ctx) { |
| 244 | if (ti == t) |
| 245 | break; |
| 246 | tidx++; |
| 247 | } |
| 248 | pr_debug("Failed to get three regions of %dth target\n", tidx); |
| 249 | return; |
| 250 | } |
| 251 | |
| 252 | for (i = 0; i < 3; i++) |
| 253 | sz += regions[i].end - regions[i].start; |
| 254 | if (ctx->attrs.min_nr_regions) |
| 255 | sz /= ctx->attrs.min_nr_regions; |
| 256 | if (sz < DAMON_MIN_REGION) |
| 257 | sz = DAMON_MIN_REGION; |
| 258 | |
| 259 | /* Set the initial three regions of the target */ |
| 260 | for (i = 0; i < 3; i++) { |
| 261 | r = damon_new_region(regions[i].start, regions[i].end); |
| 262 | if (!r) { |
| 263 | pr_err("%d'th init region creation failed\n", i); |
| 264 | return; |
| 265 | } |
| 266 | damon_add_region(r, t); |
| 267 | |
| 268 | nr_pieces = (regions[i].end - regions[i].start) / sz; |
| 269 | damon_va_evenly_split_region(t, r, nr_pieces); |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | /* Initialize '->regions_list' of every target (task) */ |
| 274 | static void damon_va_init(struct damon_ctx *ctx) |
| 275 | { |
| 276 | struct damon_target *t; |
| 277 | |
| 278 | damon_for_each_target(t, ctx) { |
| 279 | /* the user may set the target regions as they want */ |
| 280 | if (!damon_nr_regions(t)) |
| 281 | __damon_va_init_regions(ctx, t); |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | /* |
| 286 | * Update regions for current memory mappings |
| 287 | */ |
| 288 | static void damon_va_update(struct damon_ctx *ctx) |
| 289 | { |
| 290 | struct damon_addr_range three_regions[3]; |
| 291 | struct damon_target *t; |
| 292 | |
| 293 | damon_for_each_target(t, ctx) { |
| 294 | if (damon_va_three_regions(t, three_regions)) |
| 295 | continue; |
| 296 | damon_set_regions(t, three_regions, 3); |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, |
| 301 | unsigned long next, struct mm_walk *walk) |
| 302 | { |
| 303 | pte_t *pte; |
| 304 | spinlock_t *ptl; |
| 305 | |
| 306 | if (pmd_trans_huge(*pmd)) { |
| 307 | ptl = pmd_lock(walk->mm, pmd); |
| 308 | if (!pmd_present(*pmd)) { |
| 309 | spin_unlock(ptl); |
| 310 | return 0; |
| 311 | } |
| 312 | |
| 313 | if (pmd_trans_huge(*pmd)) { |
| 314 | damon_pmdp_mkold(pmd, walk->vma, addr); |
| 315 | spin_unlock(ptl); |
| 316 | return 0; |
| 317 | } |
| 318 | spin_unlock(ptl); |
| 319 | } |
| 320 | |
| 321 | pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
| 322 | if (!pte) { |
| 323 | walk->action = ACTION_AGAIN; |
| 324 | return 0; |
| 325 | } |
| 326 | if (!pte_present(ptep_get(pte))) |
| 327 | goto out; |
| 328 | damon_ptep_mkold(pte, walk->vma, addr); |
| 329 | out: |
| 330 | pte_unmap_unlock(pte, ptl); |
| 331 | return 0; |
| 332 | } |
| 333 | |
| 334 | #ifdef CONFIG_HUGETLB_PAGE |
| 335 | static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, |
| 336 | struct vm_area_struct *vma, unsigned long addr) |
| 337 | { |
| 338 | bool referenced = false; |
| 339 | pte_t entry = huge_ptep_get(pte); |
| 340 | struct folio *folio = pfn_folio(pte_pfn(entry)); |
| 341 | |
| 342 | folio_get(folio); |
| 343 | |
| 344 | if (pte_young(entry)) { |
| 345 | referenced = true; |
| 346 | entry = pte_mkold(entry); |
| 347 | set_huge_pte_at(mm, addr, pte, entry); |
| 348 | } |
| 349 | |
| 350 | #ifdef CONFIG_MMU_NOTIFIER |
| 351 | if (mmu_notifier_clear_young(mm, addr, |
| 352 | addr + huge_page_size(hstate_vma(vma)))) |
| 353 | referenced = true; |
| 354 | #endif /* CONFIG_MMU_NOTIFIER */ |
| 355 | |
| 356 | if (referenced) |
| 357 | folio_set_young(folio); |
| 358 | |
| 359 | folio_set_idle(folio); |
| 360 | folio_put(folio); |
| 361 | } |
| 362 | |
| 363 | static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, |
| 364 | unsigned long addr, unsigned long end, |
| 365 | struct mm_walk *walk) |
| 366 | { |
| 367 | struct hstate *h = hstate_vma(walk->vma); |
| 368 | spinlock_t *ptl; |
| 369 | pte_t entry; |
| 370 | |
| 371 | ptl = huge_pte_lock(h, walk->mm, pte); |
| 372 | entry = huge_ptep_get(pte); |
| 373 | if (!pte_present(entry)) |
| 374 | goto out; |
| 375 | |
| 376 | damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); |
| 377 | |
| 378 | out: |
| 379 | spin_unlock(ptl); |
| 380 | return 0; |
| 381 | } |
| 382 | #else |
| 383 | #define damon_mkold_hugetlb_entry NULL |
| 384 | #endif /* CONFIG_HUGETLB_PAGE */ |
| 385 | |
| 386 | static const struct mm_walk_ops damon_mkold_ops = { |
| 387 | .pmd_entry = damon_mkold_pmd_entry, |
| 388 | .hugetlb_entry = damon_mkold_hugetlb_entry, |
| 389 | }; |
| 390 | |
| 391 | static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) |
| 392 | { |
| 393 | mmap_read_lock(mm); |
| 394 | walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); |
| 395 | mmap_read_unlock(mm); |
| 396 | } |
| 397 | |
| 398 | /* |
| 399 | * Functions for the access checking of the regions |
| 400 | */ |
| 401 | |
| 402 | static void __damon_va_prepare_access_check(struct mm_struct *mm, |
| 403 | struct damon_region *r) |
| 404 | { |
| 405 | r->sampling_addr = damon_rand(r->ar.start, r->ar.end); |
| 406 | |
| 407 | damon_va_mkold(mm, r->sampling_addr); |
| 408 | } |
| 409 | |
| 410 | static void damon_va_prepare_access_checks(struct damon_ctx *ctx) |
| 411 | { |
| 412 | struct damon_target *t; |
| 413 | struct mm_struct *mm; |
| 414 | struct damon_region *r; |
| 415 | |
| 416 | damon_for_each_target(t, ctx) { |
| 417 | mm = damon_get_mm(t); |
| 418 | if (!mm) |
| 419 | continue; |
| 420 | damon_for_each_region(r, t) |
| 421 | __damon_va_prepare_access_check(mm, r); |
| 422 | mmput(mm); |
| 423 | } |
| 424 | } |
| 425 | |
| 426 | struct damon_young_walk_private { |
| 427 | /* size of the folio for the access checked virtual memory address */ |
| 428 | unsigned long *folio_sz; |
| 429 | bool young; |
| 430 | }; |
| 431 | |
| 432 | static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, |
| 433 | unsigned long next, struct mm_walk *walk) |
| 434 | { |
| 435 | pte_t *pte; |
| 436 | pte_t ptent; |
| 437 | spinlock_t *ptl; |
| 438 | struct folio *folio; |
| 439 | struct damon_young_walk_private *priv = walk->private; |
| 440 | |
| 441 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
| 442 | if (pmd_trans_huge(*pmd)) { |
| 443 | ptl = pmd_lock(walk->mm, pmd); |
| 444 | if (!pmd_present(*pmd)) { |
| 445 | spin_unlock(ptl); |
| 446 | return 0; |
| 447 | } |
| 448 | |
| 449 | if (!pmd_trans_huge(*pmd)) { |
| 450 | spin_unlock(ptl); |
| 451 | goto regular_page; |
| 452 | } |
| 453 | folio = damon_get_folio(pmd_pfn(*pmd)); |
| 454 | if (!folio) |
| 455 | goto huge_out; |
| 456 | if (pmd_young(*pmd) || !folio_test_idle(folio) || |
| 457 | mmu_notifier_test_young(walk->mm, |
| 458 | addr)) |
| 459 | priv->young = true; |
| 460 | *priv->folio_sz = HPAGE_PMD_SIZE; |
| 461 | folio_put(folio); |
| 462 | huge_out: |
| 463 | spin_unlock(ptl); |
| 464 | return 0; |
| 465 | } |
| 466 | |
| 467 | regular_page: |
| 468 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
| 469 | |
| 470 | pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
| 471 | if (!pte) { |
| 472 | walk->action = ACTION_AGAIN; |
| 473 | return 0; |
| 474 | } |
| 475 | ptent = ptep_get(pte); |
| 476 | if (!pte_present(ptent)) |
| 477 | goto out; |
| 478 | folio = damon_get_folio(pte_pfn(ptent)); |
| 479 | if (!folio) |
| 480 | goto out; |
| 481 | if (pte_young(ptent) || !folio_test_idle(folio) || |
| 482 | mmu_notifier_test_young(walk->mm, addr)) |
| 483 | priv->young = true; |
| 484 | *priv->folio_sz = folio_size(folio); |
| 485 | folio_put(folio); |
| 486 | out: |
| 487 | pte_unmap_unlock(pte, ptl); |
| 488 | return 0; |
| 489 | } |
| 490 | |
| 491 | #ifdef CONFIG_HUGETLB_PAGE |
| 492 | static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, |
| 493 | unsigned long addr, unsigned long end, |
| 494 | struct mm_walk *walk) |
| 495 | { |
| 496 | struct damon_young_walk_private *priv = walk->private; |
| 497 | struct hstate *h = hstate_vma(walk->vma); |
| 498 | struct folio *folio; |
| 499 | spinlock_t *ptl; |
| 500 | pte_t entry; |
| 501 | |
| 502 | ptl = huge_pte_lock(h, walk->mm, pte); |
| 503 | entry = huge_ptep_get(pte); |
| 504 | if (!pte_present(entry)) |
| 505 | goto out; |
| 506 | |
| 507 | folio = pfn_folio(pte_pfn(entry)); |
| 508 | folio_get(folio); |
| 509 | |
| 510 | if (pte_young(entry) || !folio_test_idle(folio) || |
| 511 | mmu_notifier_test_young(walk->mm, addr)) |
| 512 | priv->young = true; |
| 513 | *priv->folio_sz = huge_page_size(h); |
| 514 | |
| 515 | folio_put(folio); |
| 516 | |
| 517 | out: |
| 518 | spin_unlock(ptl); |
| 519 | return 0; |
| 520 | } |
| 521 | #else |
| 522 | #define damon_young_hugetlb_entry NULL |
| 523 | #endif /* CONFIG_HUGETLB_PAGE */ |
| 524 | |
| 525 | static const struct mm_walk_ops damon_young_ops = { |
| 526 | .pmd_entry = damon_young_pmd_entry, |
| 527 | .hugetlb_entry = damon_young_hugetlb_entry, |
| 528 | }; |
| 529 | |
| 530 | static bool damon_va_young(struct mm_struct *mm, unsigned long addr, |
| 531 | unsigned long *folio_sz) |
| 532 | { |
| 533 | struct damon_young_walk_private arg = { |
| 534 | .folio_sz = folio_sz, |
| 535 | .young = false, |
| 536 | }; |
| 537 | |
| 538 | mmap_read_lock(mm); |
| 539 | walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); |
| 540 | mmap_read_unlock(mm); |
| 541 | return arg.young; |
| 542 | } |
| 543 | |
| 544 | /* |
| 545 | * Check whether the region was accessed after the last preparation |
| 546 | * |
| 547 | * mm 'mm_struct' for the given virtual address space |
| 548 | * r the region to be checked |
| 549 | */ |
| 550 | static void __damon_va_check_access(struct mm_struct *mm, |
| 551 | struct damon_region *r, bool same_target) |
| 552 | { |
| 553 | static unsigned long last_addr; |
| 554 | static unsigned long last_folio_sz = PAGE_SIZE; |
| 555 | static bool last_accessed; |
| 556 | |
| 557 | /* If the region is in the last checked page, reuse the result */ |
| 558 | if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == |
| 559 | ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { |
| 560 | if (last_accessed) |
| 561 | r->nr_accesses++; |
| 562 | return; |
| 563 | } |
| 564 | |
| 565 | last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); |
| 566 | if (last_accessed) |
| 567 | r->nr_accesses++; |
| 568 | |
| 569 | last_addr = r->sampling_addr; |
| 570 | } |
| 571 | |
| 572 | static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) |
| 573 | { |
| 574 | struct damon_target *t; |
| 575 | struct mm_struct *mm; |
| 576 | struct damon_region *r; |
| 577 | unsigned int max_nr_accesses = 0; |
| 578 | bool same_target; |
| 579 | |
| 580 | damon_for_each_target(t, ctx) { |
| 581 | mm = damon_get_mm(t); |
| 582 | if (!mm) |
| 583 | continue; |
| 584 | same_target = false; |
| 585 | damon_for_each_region(r, t) { |
| 586 | __damon_va_check_access(mm, r, same_target); |
| 587 | max_nr_accesses = max(r->nr_accesses, max_nr_accesses); |
| 588 | same_target = true; |
| 589 | } |
| 590 | mmput(mm); |
| 591 | } |
| 592 | |
| 593 | return max_nr_accesses; |
| 594 | } |
| 595 | |
| 596 | /* |
| 597 | * Functions for the target validity check and cleanup |
| 598 | */ |
| 599 | |
| 600 | static bool damon_va_target_valid(struct damon_target *t) |
| 601 | { |
| 602 | struct task_struct *task; |
| 603 | |
| 604 | task = damon_get_task_struct(t); |
| 605 | if (task) { |
| 606 | put_task_struct(task); |
| 607 | return true; |
| 608 | } |
| 609 | |
| 610 | return false; |
| 611 | } |
| 612 | |
| 613 | #ifndef CONFIG_ADVISE_SYSCALLS |
| 614 | static unsigned long damos_madvise(struct damon_target *target, |
| 615 | struct damon_region *r, int behavior) |
| 616 | { |
| 617 | return 0; |
| 618 | } |
| 619 | #else |
| 620 | static unsigned long damos_madvise(struct damon_target *target, |
| 621 | struct damon_region *r, int behavior) |
| 622 | { |
| 623 | struct mm_struct *mm; |
| 624 | unsigned long start = PAGE_ALIGN(r->ar.start); |
| 625 | unsigned long len = PAGE_ALIGN(damon_sz_region(r)); |
| 626 | unsigned long applied; |
| 627 | |
| 628 | mm = damon_get_mm(target); |
| 629 | if (!mm) |
| 630 | return 0; |
| 631 | |
| 632 | applied = do_madvise(mm, start, len, behavior) ? 0 : len; |
| 633 | mmput(mm); |
| 634 | |
| 635 | return applied; |
| 636 | } |
| 637 | #endif /* CONFIG_ADVISE_SYSCALLS */ |
| 638 | |
| 639 | static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, |
| 640 | struct damon_target *t, struct damon_region *r, |
| 641 | struct damos *scheme) |
| 642 | { |
| 643 | int madv_action; |
| 644 | |
| 645 | switch (scheme->action) { |
| 646 | case DAMOS_WILLNEED: |
| 647 | madv_action = MADV_WILLNEED; |
| 648 | break; |
| 649 | case DAMOS_COLD: |
| 650 | madv_action = MADV_COLD; |
| 651 | break; |
| 652 | case DAMOS_PAGEOUT: |
| 653 | madv_action = MADV_PAGEOUT; |
| 654 | break; |
| 655 | case DAMOS_HUGEPAGE: |
| 656 | madv_action = MADV_HUGEPAGE; |
| 657 | break; |
| 658 | case DAMOS_NOHUGEPAGE: |
| 659 | madv_action = MADV_NOHUGEPAGE; |
| 660 | break; |
| 661 | case DAMOS_STAT: |
| 662 | return 0; |
| 663 | default: |
| 664 | /* |
| 665 | * DAMOS actions that are not yet supported by 'vaddr'. |
| 666 | */ |
| 667 | return 0; |
| 668 | } |
| 669 | |
| 670 | return damos_madvise(t, r, madv_action); |
| 671 | } |
| 672 | |
| 673 | static int damon_va_scheme_score(struct damon_ctx *context, |
| 674 | struct damon_target *t, struct damon_region *r, |
| 675 | struct damos *scheme) |
| 676 | { |
| 677 | |
| 678 | switch (scheme->action) { |
| 679 | case DAMOS_PAGEOUT: |
| 680 | return damon_cold_score(context, r, scheme); |
| 681 | default: |
| 682 | break; |
| 683 | } |
| 684 | |
| 685 | return DAMOS_MAX_SCORE; |
| 686 | } |
| 687 | |
| 688 | static int __init damon_va_initcall(void) |
| 689 | { |
| 690 | struct damon_operations ops = { |
| 691 | .id = DAMON_OPS_VADDR, |
| 692 | .init = damon_va_init, |
| 693 | .update = damon_va_update, |
| 694 | .prepare_access_checks = damon_va_prepare_access_checks, |
| 695 | .check_accesses = damon_va_check_accesses, |
| 696 | .reset_aggregated = NULL, |
| 697 | .target_valid = damon_va_target_valid, |
| 698 | .cleanup = NULL, |
| 699 | .apply_scheme = damon_va_apply_scheme, |
| 700 | .get_scheme_score = damon_va_scheme_score, |
| 701 | }; |
| 702 | /* ops for fixed virtual address ranges */ |
| 703 | struct damon_operations ops_fvaddr = ops; |
| 704 | int err; |
| 705 | |
| 706 | /* Don't set the monitoring target regions for the entire mapping */ |
| 707 | ops_fvaddr.id = DAMON_OPS_FVADDR; |
| 708 | ops_fvaddr.init = NULL; |
| 709 | ops_fvaddr.update = NULL; |
| 710 | |
| 711 | err = damon_register_ops(&ops); |
| 712 | if (err) |
| 713 | return err; |
| 714 | return damon_register_ops(&ops_fvaddr); |
| 715 | }; |
| 716 | |
| 717 | subsys_initcall(damon_va_initcall); |
| 718 | |
| 719 | #include "vaddr-test.h" |