Commit | Line | Data |
---|---|---|
fb04a1ed PX |
1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* | |
3 | * KVM dirty ring implementation | |
4 | * | |
5 | * Copyright 2019 Red Hat, Inc. | |
6 | */ | |
7 | #include <linux/kvm_host.h> | |
8 | #include <linux/kvm.h> | |
9 | #include <linux/vmalloc.h> | |
10 | #include <linux/kvm_dirty_ring.h> | |
11 | #include <trace/events/kvm.h> | |
982ed0de | 12 | #include "kvm_mm.h" |
fb04a1ed PX |
13 | |
14 | int __weak kvm_cpu_dirty_log_size(void) | |
15 | { | |
16 | return 0; | |
17 | } | |
18 | ||
19 | u32 kvm_dirty_ring_get_rsvd_entries(void) | |
20 | { | |
21 | return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size(); | |
22 | } | |
23 | ||
24 | static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring) | |
25 | { | |
26 | return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index); | |
27 | } | |
28 | ||
29 | bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) | |
30 | { | |
31 | return kvm_dirty_ring_used(ring) >= ring->soft_limit; | |
32 | } | |
33 | ||
34 | static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring) | |
35 | { | |
36 | return kvm_dirty_ring_used(ring) >= ring->size; | |
37 | } | |
38 | ||
fb04a1ed PX |
39 | static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) |
40 | { | |
41 | struct kvm_memory_slot *memslot; | |
42 | int as_id, id; | |
43 | ||
44 | as_id = slot >> 16; | |
45 | id = (u16)slot; | |
46 | ||
47 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) | |
48 | return; | |
49 | ||
50 | memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id); | |
51 | ||
52 | if (!memslot || (offset + __fls(mask)) >= memslot->npages) | |
53 | return; | |
54 | ||
531810ca | 55 | KVM_MMU_LOCK(kvm); |
fb04a1ed | 56 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); |
531810ca | 57 | KVM_MMU_UNLOCK(kvm); |
fb04a1ed PX |
58 | } |
59 | ||
60 | int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size) | |
61 | { | |
c910662c | 62 | ring->dirty_gfns = vzalloc(size); |
fb04a1ed PX |
63 | if (!ring->dirty_gfns) |
64 | return -ENOMEM; | |
fb04a1ed PX |
65 | |
66 | ring->size = size / sizeof(struct kvm_dirty_gfn); | |
67 | ring->soft_limit = ring->size - kvm_dirty_ring_get_rsvd_entries(); | |
68 | ring->dirty_index = 0; | |
69 | ring->reset_index = 0; | |
70 | ring->index = index; | |
71 | ||
72 | return 0; | |
73 | } | |
74 | ||
75 | static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn) | |
76 | { | |
77 | gfn->flags = 0; | |
78 | } | |
79 | ||
80 | static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn) | |
81 | { | |
82 | gfn->flags = KVM_DIRTY_GFN_F_DIRTY; | |
83 | } | |
84 | ||
fb04a1ed PX |
85 | static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn) |
86 | { | |
87 | return gfn->flags & KVM_DIRTY_GFN_F_RESET; | |
88 | } | |
89 | ||
90 | int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) | |
91 | { | |
92 | u32 cur_slot, next_slot; | |
93 | u64 cur_offset, next_offset; | |
94 | unsigned long mask; | |
95 | int count = 0; | |
96 | struct kvm_dirty_gfn *entry; | |
97 | bool first_round = true; | |
98 | ||
99 | /* This is only needed to make compilers happy */ | |
100 | cur_slot = cur_offset = mask = 0; | |
101 | ||
102 | while (true) { | |
103 | entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)]; | |
104 | ||
105 | if (!kvm_dirty_gfn_harvested(entry)) | |
106 | break; | |
107 | ||
108 | next_slot = READ_ONCE(entry->slot); | |
109 | next_offset = READ_ONCE(entry->offset); | |
110 | ||
111 | /* Update the flags to reflect that this GFN is reset */ | |
112 | kvm_dirty_gfn_set_invalid(entry); | |
113 | ||
114 | ring->reset_index++; | |
115 | count++; | |
116 | /* | |
117 | * Try to coalesce the reset operations when the guest is | |
118 | * scanning pages in the same slot. | |
119 | */ | |
120 | if (!first_round && next_slot == cur_slot) { | |
121 | s64 delta = next_offset - cur_offset; | |
122 | ||
123 | if (delta >= 0 && delta < BITS_PER_LONG) { | |
124 | mask |= 1ull << delta; | |
125 | continue; | |
126 | } | |
127 | ||
128 | /* Backwards visit, careful about overflows! */ | |
129 | if (delta > -BITS_PER_LONG && delta < 0 && | |
130 | (mask << -delta >> -delta) == mask) { | |
131 | cur_offset = next_offset; | |
132 | mask = (mask << -delta) | 1; | |
133 | continue; | |
134 | } | |
135 | } | |
136 | kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); | |
137 | cur_slot = next_slot; | |
138 | cur_offset = next_offset; | |
139 | mask = 1; | |
140 | first_round = false; | |
141 | } | |
142 | ||
143 | kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); | |
144 | ||
145 | trace_kvm_dirty_ring_reset(ring); | |
146 | ||
147 | return count; | |
148 | } | |
149 | ||
150 | void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset) | |
151 | { | |
152 | struct kvm_dirty_gfn *entry; | |
153 | ||
154 | /* It should never get full */ | |
155 | WARN_ON_ONCE(kvm_dirty_ring_full(ring)); | |
156 | ||
157 | entry = &ring->dirty_gfns[ring->dirty_index & (ring->size - 1)]; | |
158 | ||
159 | entry->slot = slot; | |
160 | entry->offset = offset; | |
161 | /* | |
162 | * Make sure the data is filled in before we publish this to | |
163 | * the userspace program. There's no paired kernel-side reader. | |
164 | */ | |
165 | smp_wmb(); | |
166 | kvm_dirty_gfn_set_dirtied(entry); | |
167 | ring->dirty_index++; | |
168 | trace_kvm_dirty_ring_push(ring, slot, offset); | |
169 | } | |
170 | ||
171 | struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset) | |
172 | { | |
173 | return vmalloc_to_page((void *)ring->dirty_gfns + offset * PAGE_SIZE); | |
174 | } | |
175 | ||
176 | void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) | |
177 | { | |
178 | vfree(ring->dirty_gfns); | |
179 | ring->dirty_gfns = NULL; | |
180 | } |