arch/x86/kvm/mmu/tdp_iter.h

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 #ifndef __KVM_X86_MMU_TDP_ITER_H
   4 #define __KVM_X86_MMU_TDP_ITER_H
   5
   6 #include <linux/kvm_host.h>
   7
   8 #include "mmu.h"
   9 #include "spte.h"
  10
  11 /*
  12  * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
  13  * to be zapped while holding mmu_lock for read, and to allow TLB flushes to be
  14  * batched without having to collect the list of zapped SPs.  Flows that can
  15  * remove SPs must service pending TLB flushes prior to dropping RCU protection.
  16  */
  17 static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
  18 {
  19         return READ_ONCE(*rcu_dereference(sptep));
  20 }
  21
  22 static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
  23 {
  24         return xchg(rcu_dereference(sptep), new_spte);
  25 }
  26
  27 static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
  28 {
  29         WRITE_ONCE(*rcu_dereference(sptep), new_spte);
  30 }
  31
  32 /*
  33  * SPTEs must be modified atomically if they are shadow-present, leaf
  34  * SPTEs, and have volatile bits, i.e. has bits that can be set outside
  35  * of mmu_lock.  The Writable bit can be set by KVM's fast page fault
  36  * handler, and Accessed and Dirty bits can be set by the CPU.
  37  *
  38  * Note, non-leaf SPTEs do have Accessed bits and those bits are
  39  * technically volatile, but KVM doesn't consume the Accessed bit of
  40  * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit.  This
  41  * logic needs to be reassessed if KVM were to use non-leaf Accessed
  42  * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
  43  */
  44 static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level)
  45 {
  46         return is_shadow_present_pte(old_spte) &&
  47                is_last_spte(old_spte, level) &&
  48                spte_has_volatile_bits(old_spte);
  49 }
  50
  51 static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
  52                                          u64 new_spte, int level)
  53 {
  54         if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level))
  55                 return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
  56
  57         __kvm_tdp_mmu_write_spte(sptep, new_spte);
  58         return old_spte;
  59 }
  60
  61 static inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte,
  62                                           u64 mask, int level)
  63 {
  64         atomic64_t *sptep_atomic;
  65
  66         if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) {
  67                 sptep_atomic = (atomic64_t *)rcu_dereference(sptep);
  68                 return (u64)atomic64_fetch_and(~mask, sptep_atomic);
  69         }
  70
  71         __kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask);
  72         return old_spte;
  73 }
  74
  75 /*
  76  * A TDP iterator performs a pre-order walk over a TDP paging structure.
  77  */
  78 struct tdp_iter {
  79         /*
  80          * The iterator will traverse the paging structure towards the mapping
  81          * for this GFN.
  82          */
  83         gfn_t next_last_level_gfn;
  84         /*
  85          * The next_last_level_gfn at the time when the thread last
  86          * yielded. Only yielding when the next_last_level_gfn !=
  87          * yielded_gfn helps ensure forward progress.
  88          */
  89         gfn_t yielded_gfn;
  90         /* Pointers to the page tables traversed to reach the current SPTE */
  91         tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL];
  92         /* A pointer to the current SPTE */
  93         tdp_ptep_t sptep;
  94         /* The lowest GFN mapped by the current SPTE */
  95         gfn_t gfn;
  96         /* The level of the root page given to the iterator */
  97         int root_level;
  98         /* The lowest level the iterator should traverse to */
  99         int min_level;
 100         /* The iterator's current level within the paging structure */
 101         int level;
 102         /* The address space ID, i.e. SMM vs. regular. */
 103         int as_id;
 104         /* A snapshot of the value at sptep */
 105         u64 old_spte;
 106         /*
 107          * Whether the iterator has a valid state. This will be false if the
 108          * iterator walks off the end of the paging structure.
 109          */
 110         bool valid;
 111         /*
 112          * True if KVM dropped mmu_lock and yielded in the middle of a walk, in
 113          * which case tdp_iter_next() needs to restart the walk at the root
 114          * level instead of advancing to the next entry.
 115          */
 116         bool yielded;
 117 };
 118
 119 /*
 120  * Iterates over every SPTE mapping the GFN range [start, end) in a
 121  * preorder traversal.
 122  */
 123 #define for_each_tdp_pte_min_level(iter, root, min_level, start, end) \
 124         for (tdp_iter_start(&iter, root, min_level, start); \
 125              iter.valid && iter.gfn < end;                   \
 126              tdp_iter_next(&iter))
 127
 128 #define for_each_tdp_pte(iter, root, start, end) \
 129         for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end)
 130
 131 tdp_ptep_t spte_to_child_pt(u64 pte, int level);
 132
 133 void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
 134                     int min_level, gfn_t next_last_level_gfn);
 135 void tdp_iter_next(struct tdp_iter *iter);
 136 void tdp_iter_restart(struct tdp_iter *iter);
 137
 138 #endif /* __KVM_X86_MMU_TDP_ITER_H */