Commit | Line | Data |
---|---|---|
5f1f79bb DH |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* | |
3 | * Virtio-mem device driver. | |
4 | * | |
5 | * Copyright Red Hat, Inc. 2020 | |
6 | * | |
7 | * Author(s): David Hildenbrand <david@redhat.com> | |
8 | */ | |
9 | ||
10 | #include <linux/virtio.h> | |
11 | #include <linux/virtio_mem.h> | |
12 | #include <linux/workqueue.h> | |
13 | #include <linux/slab.h> | |
14 | #include <linux/module.h> | |
15 | #include <linux/mm.h> | |
16 | #include <linux/memory_hotplug.h> | |
17 | #include <linux/memory.h> | |
18 | #include <linux/hrtimer.h> | |
19 | #include <linux/crash_dump.h> | |
20 | #include <linux/mutex.h> | |
21 | #include <linux/bitmap.h> | |
22 | #include <linux/lockdep.h> | |
6639032a | 23 | #include <linux/log2.h> |
5f1f79bb | 24 | |
f2af6d39 DH |
25 | #include <acpi/acpi_numa.h> |
26 | ||
255f5985 DH |
27 | static bool unplug_online = true; |
28 | module_param(unplug_online, bool, 0644); | |
29 | MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); | |
30 | ||
faa45ff4 DH |
31 | static bool force_bbm; |
32 | module_param(force_bbm, bool, 0444); | |
33 | MODULE_PARM_DESC(force_bbm, | |
34 | "Force Big Block Mode. Default is 0 (auto-selection)"); | |
35 | ||
36 | static unsigned long bbm_block_size; | |
37 | module_param(bbm_block_size, ulong, 0444); | |
38 | MODULE_PARM_DESC(bbm_block_size, | |
39 | "Big Block size in bytes. Default is 0 (auto-detection)."); | |
40 | ||
3711387a DH |
41 | static bool bbm_safe_unplug = true; |
42 | module_param(bbm_safe_unplug, bool, 0444); | |
43 | MODULE_PARM_DESC(bbm_safe_unplug, | |
44 | "Use a safe unplug mechanism in BBM, avoiding long/endless loops"); | |
45 | ||
d5614944 DH |
46 | /* |
47 | * virtio-mem currently supports the following modes of operation: | |
48 | * | |
4ba50cd3 | 49 | * * Sub Block Mode (SBM): A Linux memory block spans 2..X subblocks (SB). The |
d5614944 DH |
50 | * size of a Sub Block (SB) is determined based on the device block size, the |
51 | * pageblock size, and the maximum allocation granularity of the buddy. | |
52 | * Subblocks within a Linux memory block might either be plugged or unplugged. | |
53 | * Memory is added/removed to Linux MM in Linux memory block granularity. | |
54 | * | |
4ba50cd3 DH |
55 | * * Big Block Mode (BBM): A Big Block (BB) spans 1..X Linux memory blocks. |
56 | * Memory is added/removed to Linux MM in Big Block granularity. | |
57 | * | |
58 | * The mode is determined automatically based on the Linux memory block size | |
59 | * and the device block size. | |
60 | * | |
d5614944 DH |
61 | * User space / core MM (auto onlining) is responsible for onlining added |
62 | * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are | |
63 | * always onlined separately, and all memory within a Linux memory block is | |
64 | * onlined to the same zone - virtio-mem relies on this behavior. | |
65 | */ | |
66 | ||
99f0b55e DH |
67 | /* |
68 | * State of a Linux memory block in SBM. | |
69 | */ | |
70 | enum virtio_mem_sbm_mb_state { | |
5f1f79bb | 71 | /* Unplugged, not added to Linux. Can be reused later. */ |
99f0b55e | 72 | VIRTIO_MEM_SBM_MB_UNUSED = 0, |
5f1f79bb | 73 | /* (Partially) plugged, not added to Linux. Error on add_memory(). */ |
99f0b55e | 74 | VIRTIO_MEM_SBM_MB_PLUGGED, |
5f1f79bb | 75 | /* Fully plugged, fully added to Linux, offline. */ |
99f0b55e | 76 | VIRTIO_MEM_SBM_MB_OFFLINE, |
5f1f79bb | 77 | /* Partially plugged, fully added to Linux, offline. */ |
99f0b55e | 78 | VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, |
c740bb97 DH |
79 | /* Fully plugged, fully added to Linux, onlined to a kernel zone. */ |
80 | VIRTIO_MEM_SBM_MB_KERNEL, | |
81 | /* Partially plugged, fully added to Linux, online to a kernel zone */ | |
82 | VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, | |
83 | /* Fully plugged, fully added to Linux, onlined to ZONE_MOVABLE. */ | |
84 | VIRTIO_MEM_SBM_MB_MOVABLE, | |
85 | /* Partially plugged, fully added to Linux, onlined to ZONE_MOVABLE. */ | |
86 | VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, | |
99f0b55e | 87 | VIRTIO_MEM_SBM_MB_COUNT |
5f1f79bb DH |
88 | }; |
89 | ||
4ba50cd3 DH |
90 | /* |
91 | * State of a Big Block (BB) in BBM, covering 1..X Linux memory blocks. | |
92 | */ | |
93 | enum virtio_mem_bbm_bb_state { | |
94 | /* Unplugged, not added to Linux. Can be reused later. */ | |
95 | VIRTIO_MEM_BBM_BB_UNUSED = 0, | |
96 | /* Plugged, not added to Linux. Error on add_memory(). */ | |
97 | VIRTIO_MEM_BBM_BB_PLUGGED, | |
98 | /* Plugged and added to Linux. */ | |
99 | VIRTIO_MEM_BBM_BB_ADDED, | |
3711387a DH |
100 | /* All online parts are fake-offline, ready to remove. */ |
101 | VIRTIO_MEM_BBM_BB_FAKE_OFFLINE, | |
4ba50cd3 DH |
102 | VIRTIO_MEM_BBM_BB_COUNT |
103 | }; | |
104 | ||
5f1f79bb DH |
105 | struct virtio_mem { |
106 | struct virtio_device *vdev; | |
107 | ||
108 | /* We might first have to unplug all memory when starting up. */ | |
109 | bool unplug_all_required; | |
110 | ||
111 | /* Workqueue that processes the plug/unplug requests. */ | |
112 | struct work_struct wq; | |
98ff9f94 | 113 | atomic_t wq_active; |
5f1f79bb DH |
114 | atomic_t config_changed; |
115 | ||
116 | /* Virtqueue for guest->host requests. */ | |
117 | struct virtqueue *vq; | |
118 | ||
119 | /* Wait for a host response to a guest request. */ | |
120 | wait_queue_head_t host_resp; | |
121 | ||
122 | /* Space for one guest request and the host response. */ | |
123 | struct virtio_mem_req req; | |
124 | struct virtio_mem_resp resp; | |
125 | ||
126 | /* The current size of the device. */ | |
127 | uint64_t plugged_size; | |
128 | /* The requested size of the device. */ | |
129 | uint64_t requested_size; | |
130 | ||
131 | /* The device block size (for communicating with the device). */ | |
544fc7db | 132 | uint64_t device_block_size; |
6725f211 | 133 | /* The determined node id for all memory of the device. */ |
f2af6d39 | 134 | int nid; |
5f1f79bb DH |
135 | /* Physical start address of the memory region. */ |
136 | uint64_t addr; | |
137 | /* Maximum region size in bytes. */ | |
138 | uint64_t region_size; | |
139 | ||
ebf71552 DH |
140 | /* The parent resource for all memory added via this device. */ |
141 | struct resource *parent_resource; | |
b3562c60 DH |
142 | /* |
143 | * Copy of "System RAM (virtio_mem)" to be used for | |
144 | * add_memory_driver_managed(). | |
145 | */ | |
146 | const char *resource_name; | |
ffaa6ce8 DH |
147 | /* Memory group identification. */ |
148 | int mgid; | |
ebf71552 | 149 | |
98ff9f94 DH |
150 | /* |
151 | * We don't want to add too much memory if it's not getting onlined, | |
152 | * to avoid running OOM. Besides this threshold, we allow to have at | |
153 | * least two offline blocks at a time (whatever is bigger). | |
154 | */ | |
155 | #define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD (1024 * 1024 * 1024) | |
156 | atomic64_t offline_size; | |
157 | uint64_t offline_threshold; | |
5f1f79bb | 158 | |
4ba50cd3 DH |
159 | /* If set, the driver is in SBM, otherwise in BBM. */ |
160 | bool in_sbm; | |
161 | ||
162 | union { | |
163 | struct { | |
164 | /* Id of the first memory block of this device. */ | |
165 | unsigned long first_mb_id; | |
166 | /* Id of the last usable memory block of this device. */ | |
167 | unsigned long last_usable_mb_id; | |
168 | /* Id of the next memory bock to prepare when needed. */ | |
169 | unsigned long next_mb_id; | |
170 | ||
171 | /* The subblock size. */ | |
172 | uint64_t sb_size; | |
173 | /* The number of subblocks per Linux memory block. */ | |
174 | uint32_t sbs_per_mb; | |
175 | ||
176 | /* Summary of all memory block states. */ | |
177 | unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; | |
178 | ||
179 | /* | |
180 | * One byte state per memory block. Allocated via | |
181 | * vmalloc(). Resized (alloc+copy+free) on demand. | |
182 | * | |
183 | * With 128 MiB memory blocks, we have states for 512 | |
184 | * GiB of memory in one 4 KiB page. | |
185 | */ | |
186 | uint8_t *mb_states; | |
187 | ||
188 | /* | |
189 | * Bitmap: one bit per subblock. Allocated similar to | |
190 | * sbm.mb_states. | |
191 | * | |
192 | * A set bit means the corresponding subblock is | |
193 | * plugged, otherwise it's unblocked. | |
194 | * | |
195 | * With 4 MiB subblocks, we manage 128 GiB of memory | |
196 | * in one 4 KiB page. | |
197 | */ | |
198 | unsigned long *sb_states; | |
199 | } sbm; | |
200 | ||
201 | struct { | |
202 | /* Id of the first big block of this device. */ | |
203 | unsigned long first_bb_id; | |
204 | /* Id of the last usable big block of this device. */ | |
205 | unsigned long last_usable_bb_id; | |
206 | /* Id of the next device bock to prepare when needed. */ | |
207 | unsigned long next_bb_id; | |
208 | ||
209 | /* Summary of all big block states. */ | |
210 | unsigned long bb_count[VIRTIO_MEM_BBM_BB_COUNT]; | |
211 | ||
212 | /* One byte state per big block. See sbm.mb_states. */ | |
213 | uint8_t *bb_states; | |
214 | ||
215 | /* The block size used for plugging/adding/removing. */ | |
216 | uint64_t bb_size; | |
217 | } bbm; | |
218 | }; | |
5f1f79bb DH |
219 | |
220 | /* | |
4ba50cd3 DH |
221 | * Mutex that protects the sbm.mb_count, sbm.mb_states, |
222 | * sbm.sb_states, bbm.bb_count, and bbm.bb_states | |
5f1f79bb DH |
223 | * |
224 | * When this lock is held the pointers can't change, ONLINE and | |
225 | * OFFLINE blocks can't change the state and no subblocks will get | |
c627ff5d | 226 | * plugged/unplugged. |
ce281462 DH |
227 | * |
228 | * In kdump mode, used to serialize requests, last_block_addr and | |
229 | * last_block_plugged. | |
5f1f79bb DH |
230 | */ |
231 | struct mutex hotplug_mutex; | |
232 | bool hotplug_active; | |
233 | ||
234 | /* An error occurred we cannot handle - stop processing requests. */ | |
235 | bool broken; | |
236 | ||
ce281462 DH |
237 | /* Cached valued of is_kdump_kernel() when the device was probed. */ |
238 | bool in_kdump; | |
239 | ||
5f1f79bb DH |
240 | /* The driver is being removed. */ |
241 | spinlock_t removal_lock; | |
242 | bool removing; | |
243 | ||
244 | /* Timer for retrying to plug/unplug memory. */ | |
245 | struct hrtimer retry_timer; | |
23e77b5d DH |
246 | unsigned int retry_timer_ms; |
247 | #define VIRTIO_MEM_RETRY_TIMER_MIN_MS 50000 | |
248 | #define VIRTIO_MEM_RETRY_TIMER_MAX_MS 300000 | |
5f1f79bb DH |
249 | |
250 | /* Memory notifier (online/offline events). */ | |
251 | struct notifier_block memory_notifier; | |
252 | ||
ce281462 DH |
253 | #ifdef CONFIG_PROC_VMCORE |
254 | /* vmcore callback for /proc/vmcore handling in kdump mode */ | |
255 | struct vmcore_cb vmcore_cb; | |
256 | uint64_t last_block_addr; | |
257 | bool last_block_plugged; | |
258 | #endif /* CONFIG_PROC_VMCORE */ | |
259 | ||
5f1f79bb DH |
260 | /* Next device in the list of virtio-mem devices. */ |
261 | struct list_head next; | |
262 | }; | |
263 | ||
264 | /* | |
265 | * We have to share a single online_page callback among all virtio-mem | |
266 | * devices. We use RCU to iterate the list in the callback. | |
267 | */ | |
268 | static DEFINE_MUTEX(virtio_mem_mutex); | |
269 | static LIST_HEAD(virtio_mem_devices); | |
270 | ||
271 | static void virtio_mem_online_page_cb(struct page *page, unsigned int order); | |
7a34c77d DH |
272 | static void virtio_mem_fake_offline_going_offline(unsigned long pfn, |
273 | unsigned long nr_pages); | |
274 | static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, | |
275 | unsigned long nr_pages); | |
1d33c2ca | 276 | static void virtio_mem_retry(struct virtio_mem *vm); |
84e17e68 DH |
277 | static int virtio_mem_create_resource(struct virtio_mem *vm); |
278 | static void virtio_mem_delete_resource(struct virtio_mem *vm); | |
5f1f79bb DH |
279 | |
280 | /* | |
281 | * Register a virtio-mem device so it will be considered for the online_page | |
282 | * callback. | |
283 | */ | |
284 | static int register_virtio_mem_device(struct virtio_mem *vm) | |
285 | { | |
286 | int rc = 0; | |
287 | ||
288 | /* First device registers the callback. */ | |
289 | mutex_lock(&virtio_mem_mutex); | |
290 | if (list_empty(&virtio_mem_devices)) | |
291 | rc = set_online_page_callback(&virtio_mem_online_page_cb); | |
292 | if (!rc) | |
293 | list_add_rcu(&vm->next, &virtio_mem_devices); | |
294 | mutex_unlock(&virtio_mem_mutex); | |
295 | ||
296 | return rc; | |
297 | } | |
298 | ||
299 | /* | |
300 | * Unregister a virtio-mem device so it will no longer be considered for the | |
301 | * online_page callback. | |
302 | */ | |
303 | static void unregister_virtio_mem_device(struct virtio_mem *vm) | |
304 | { | |
305 | /* Last device unregisters the callback. */ | |
306 | mutex_lock(&virtio_mem_mutex); | |
307 | list_del_rcu(&vm->next); | |
308 | if (list_empty(&virtio_mem_devices)) | |
309 | restore_online_page_callback(&virtio_mem_online_page_cb); | |
310 | mutex_unlock(&virtio_mem_mutex); | |
311 | ||
312 | synchronize_rcu(); | |
313 | } | |
314 | ||
315 | /* | |
316 | * Calculate the memory block id of a given address. | |
317 | */ | |
318 | static unsigned long virtio_mem_phys_to_mb_id(unsigned long addr) | |
319 | { | |
320 | return addr / memory_block_size_bytes(); | |
321 | } | |
322 | ||
323 | /* | |
324 | * Calculate the physical start address of a given memory block id. | |
325 | */ | |
326 | static unsigned long virtio_mem_mb_id_to_phys(unsigned long mb_id) | |
327 | { | |
328 | return mb_id * memory_block_size_bytes(); | |
329 | } | |
330 | ||
4ba50cd3 DH |
331 | /* |
332 | * Calculate the big block id of a given address. | |
333 | */ | |
334 | static unsigned long virtio_mem_phys_to_bb_id(struct virtio_mem *vm, | |
335 | uint64_t addr) | |
336 | { | |
337 | return addr / vm->bbm.bb_size; | |
338 | } | |
339 | ||
340 | /* | |
341 | * Calculate the physical start address of a given big block id. | |
342 | */ | |
343 | static uint64_t virtio_mem_bb_id_to_phys(struct virtio_mem *vm, | |
344 | unsigned long bb_id) | |
345 | { | |
346 | return bb_id * vm->bbm.bb_size; | |
347 | } | |
348 | ||
5f1f79bb DH |
349 | /* |
350 | * Calculate the subblock id of a given address. | |
351 | */ | |
352 | static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, | |
353 | unsigned long addr) | |
354 | { | |
355 | const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr); | |
356 | const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id); | |
357 | ||
905c4c51 | 358 | return (addr - mb_addr) / vm->sbm.sb_size; |
5f1f79bb DH |
359 | } |
360 | ||
4ba50cd3 DH |
361 | /* |
362 | * Set the state of a big block, taking care of the state counter. | |
363 | */ | |
364 | static void virtio_mem_bbm_set_bb_state(struct virtio_mem *vm, | |
365 | unsigned long bb_id, | |
366 | enum virtio_mem_bbm_bb_state state) | |
367 | { | |
368 | const unsigned long idx = bb_id - vm->bbm.first_bb_id; | |
369 | enum virtio_mem_bbm_bb_state old_state; | |
370 | ||
371 | old_state = vm->bbm.bb_states[idx]; | |
372 | vm->bbm.bb_states[idx] = state; | |
373 | ||
374 | BUG_ON(vm->bbm.bb_count[old_state] == 0); | |
375 | vm->bbm.bb_count[old_state]--; | |
376 | vm->bbm.bb_count[state]++; | |
377 | } | |
378 | ||
379 | /* | |
380 | * Get the state of a big block. | |
381 | */ | |
382 | static enum virtio_mem_bbm_bb_state virtio_mem_bbm_get_bb_state(struct virtio_mem *vm, | |
383 | unsigned long bb_id) | |
384 | { | |
385 | return vm->bbm.bb_states[bb_id - vm->bbm.first_bb_id]; | |
386 | } | |
387 | ||
388 | /* | |
389 | * Prepare the big block state array for the next big block. | |
390 | */ | |
391 | static int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm) | |
392 | { | |
393 | unsigned long old_bytes = vm->bbm.next_bb_id - vm->bbm.first_bb_id; | |
394 | unsigned long new_bytes = old_bytes + 1; | |
395 | int old_pages = PFN_UP(old_bytes); | |
396 | int new_pages = PFN_UP(new_bytes); | |
397 | uint8_t *new_array; | |
398 | ||
399 | if (vm->bbm.bb_states && old_pages == new_pages) | |
400 | return 0; | |
401 | ||
402 | new_array = vzalloc(new_pages * PAGE_SIZE); | |
403 | if (!new_array) | |
404 | return -ENOMEM; | |
405 | ||
406 | mutex_lock(&vm->hotplug_mutex); | |
407 | if (vm->bbm.bb_states) | |
408 | memcpy(new_array, vm->bbm.bb_states, old_pages * PAGE_SIZE); | |
409 | vfree(vm->bbm.bb_states); | |
410 | vm->bbm.bb_states = new_array; | |
411 | mutex_unlock(&vm->hotplug_mutex); | |
412 | ||
413 | return 0; | |
414 | } | |
415 | ||
416 | #define virtio_mem_bbm_for_each_bb(_vm, _bb_id, _state) \ | |
417 | for (_bb_id = vm->bbm.first_bb_id; \ | |
418 | _bb_id < vm->bbm.next_bb_id && _vm->bbm.bb_count[_state]; \ | |
419 | _bb_id++) \ | |
420 | if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) | |
421 | ||
269ac938 DH |
422 | #define virtio_mem_bbm_for_each_bb_rev(_vm, _bb_id, _state) \ |
423 | for (_bb_id = vm->bbm.next_bb_id - 1; \ | |
424 | _bb_id >= vm->bbm.first_bb_id && _vm->bbm.bb_count[_state]; \ | |
425 | _bb_id--) \ | |
426 | if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) | |
427 | ||
5f1f79bb DH |
428 | /* |
429 | * Set the state of a memory block, taking care of the state counter. | |
430 | */ | |
99f0b55e DH |
431 | static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm, |
432 | unsigned long mb_id, uint8_t state) | |
5f1f79bb | 433 | { |
8a6f082b | 434 | const unsigned long idx = mb_id - vm->sbm.first_mb_id; |
99f0b55e | 435 | uint8_t old_state; |
5f1f79bb | 436 | |
99f0b55e DH |
437 | old_state = vm->sbm.mb_states[idx]; |
438 | vm->sbm.mb_states[idx] = state; | |
5f1f79bb | 439 | |
99f0b55e DH |
440 | BUG_ON(vm->sbm.mb_count[old_state] == 0); |
441 | vm->sbm.mb_count[old_state]--; | |
442 | vm->sbm.mb_count[state]++; | |
5f1f79bb DH |
443 | } |
444 | ||
445 | /* | |
446 | * Get the state of a memory block. | |
447 | */ | |
99f0b55e DH |
448 | static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm, |
449 | unsigned long mb_id) | |
5f1f79bb | 450 | { |
8a6f082b | 451 | const unsigned long idx = mb_id - vm->sbm.first_mb_id; |
5f1f79bb | 452 | |
99f0b55e | 453 | return vm->sbm.mb_states[idx]; |
5f1f79bb DH |
454 | } |
455 | ||
456 | /* | |
457 | * Prepare the state array for the next memory block. | |
458 | */ | |
99f0b55e | 459 | static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) |
5f1f79bb | 460 | { |
8a6f082b DH |
461 | int old_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id); |
462 | int new_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id + 1); | |
99f0b55e | 463 | uint8_t *new_array; |
5f1f79bb | 464 | |
99f0b55e | 465 | if (vm->sbm.mb_states && old_pages == new_pages) |
5f1f79bb DH |
466 | return 0; |
467 | ||
99f0b55e DH |
468 | new_array = vzalloc(new_pages * PAGE_SIZE); |
469 | if (!new_array) | |
5f1f79bb DH |
470 | return -ENOMEM; |
471 | ||
472 | mutex_lock(&vm->hotplug_mutex); | |
99f0b55e DH |
473 | if (vm->sbm.mb_states) |
474 | memcpy(new_array, vm->sbm.mb_states, old_pages * PAGE_SIZE); | |
475 | vfree(vm->sbm.mb_states); | |
476 | vm->sbm.mb_states = new_array; | |
5f1f79bb DH |
477 | mutex_unlock(&vm->hotplug_mutex); |
478 | ||
479 | return 0; | |
480 | } | |
481 | ||
99f0b55e | 482 | #define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \ |
8a6f082b DH |
483 | for (_mb_id = _vm->sbm.first_mb_id; \ |
484 | _mb_id < _vm->sbm.next_mb_id && _vm->sbm.mb_count[_state]; \ | |
5f1f79bb | 485 | _mb_id++) \ |
99f0b55e | 486 | if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) |
5f1f79bb | 487 | |
99f0b55e | 488 | #define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \ |
8a6f082b DH |
489 | for (_mb_id = _vm->sbm.next_mb_id - 1; \ |
490 | _mb_id >= _vm->sbm.first_mb_id && _vm->sbm.mb_count[_state]; \ | |
c627ff5d | 491 | _mb_id--) \ |
99f0b55e | 492 | if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) |
c627ff5d | 493 | |
41e6215c DH |
494 | /* |
495 | * Calculate the bit number in the subblock bitmap for the given subblock | |
496 | * inside the given memory block. | |
497 | */ | |
54c6a6ba DH |
498 | static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm, |
499 | unsigned long mb_id, int sb_id) | |
41e6215c | 500 | { |
8a6f082b | 501 | return (mb_id - vm->sbm.first_mb_id) * vm->sbm.sbs_per_mb + sb_id; |
41e6215c DH |
502 | } |
503 | ||
5f1f79bb DH |
504 | /* |
505 | * Mark all selected subblocks plugged. | |
506 | * | |
507 | * Will not modify the state of the memory block. | |
508 | */ | |
54c6a6ba DH |
509 | static void virtio_mem_sbm_set_sb_plugged(struct virtio_mem *vm, |
510 | unsigned long mb_id, int sb_id, | |
511 | int count) | |
5f1f79bb | 512 | { |
54c6a6ba | 513 | const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); |
5f1f79bb | 514 | |
54c6a6ba | 515 | __bitmap_set(vm->sbm.sb_states, bit, count); |
5f1f79bb DH |
516 | } |
517 | ||
518 | /* | |
519 | * Mark all selected subblocks unplugged. | |
520 | * | |
521 | * Will not modify the state of the memory block. | |
522 | */ | |
54c6a6ba DH |
523 | static void virtio_mem_sbm_set_sb_unplugged(struct virtio_mem *vm, |
524 | unsigned long mb_id, int sb_id, | |
525 | int count) | |
5f1f79bb | 526 | { |
54c6a6ba | 527 | const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); |
5f1f79bb | 528 | |
54c6a6ba | 529 | __bitmap_clear(vm->sbm.sb_states, bit, count); |
5f1f79bb DH |
530 | } |
531 | ||
532 | /* | |
533 | * Test if all selected subblocks are plugged. | |
534 | */ | |
54c6a6ba DH |
535 | static bool virtio_mem_sbm_test_sb_plugged(struct virtio_mem *vm, |
536 | unsigned long mb_id, int sb_id, | |
537 | int count) | |
5f1f79bb | 538 | { |
54c6a6ba | 539 | const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); |
5f1f79bb DH |
540 | |
541 | if (count == 1) | |
54c6a6ba | 542 | return test_bit(bit, vm->sbm.sb_states); |
5f1f79bb DH |
543 | |
544 | /* TODO: Helper similar to bitmap_set() */ | |
54c6a6ba | 545 | return find_next_zero_bit(vm->sbm.sb_states, bit + count, bit) >= |
5f1f79bb DH |
546 | bit + count; |
547 | } | |
548 | ||
c627ff5d DH |
549 | /* |
550 | * Test if all selected subblocks are unplugged. | |
551 | */ | |
54c6a6ba DH |
552 | static bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm, |
553 | unsigned long mb_id, int sb_id, | |
554 | int count) | |
c627ff5d | 555 | { |
54c6a6ba | 556 | const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); |
c627ff5d DH |
557 | |
558 | /* TODO: Helper similar to bitmap_set() */ | |
54c6a6ba DH |
559 | return find_next_bit(vm->sbm.sb_states, bit + count, bit) >= |
560 | bit + count; | |
c627ff5d DH |
561 | } |
562 | ||
5f1f79bb | 563 | /* |
905c4c51 | 564 | * Find the first unplugged subblock. Returns vm->sbm.sbs_per_mb in case there is |
5f1f79bb DH |
565 | * none. |
566 | */ | |
54c6a6ba | 567 | static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, |
5f1f79bb DH |
568 | unsigned long mb_id) |
569 | { | |
54c6a6ba | 570 | const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0); |
5f1f79bb | 571 | |
54c6a6ba | 572 | return find_next_zero_bit(vm->sbm.sb_states, |
905c4c51 | 573 | bit + vm->sbm.sbs_per_mb, bit) - bit; |
5f1f79bb DH |
574 | } |
575 | ||
576 | /* | |
577 | * Prepare the subblock bitmap for the next memory block. | |
578 | */ | |
54c6a6ba | 579 | static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) |
5f1f79bb | 580 | { |
8a6f082b | 581 | const unsigned long old_nb_mb = vm->sbm.next_mb_id - vm->sbm.first_mb_id; |
905c4c51 DH |
582 | const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb; |
583 | const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb; | |
5f1f79bb DH |
584 | int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long)); |
585 | int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long)); | |
54c6a6ba | 586 | unsigned long *new_bitmap, *old_bitmap; |
5f1f79bb | 587 | |
54c6a6ba | 588 | if (vm->sbm.sb_states && old_pages == new_pages) |
5f1f79bb DH |
589 | return 0; |
590 | ||
54c6a6ba DH |
591 | new_bitmap = vzalloc(new_pages * PAGE_SIZE); |
592 | if (!new_bitmap) | |
5f1f79bb DH |
593 | return -ENOMEM; |
594 | ||
595 | mutex_lock(&vm->hotplug_mutex); | |
cf4a4493 | 596 | if (vm->sbm.sb_states) |
54c6a6ba | 597 | memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE); |
5f1f79bb | 598 | |
54c6a6ba DH |
599 | old_bitmap = vm->sbm.sb_states; |
600 | vm->sbm.sb_states = new_bitmap; | |
5f1f79bb DH |
601 | mutex_unlock(&vm->hotplug_mutex); |
602 | ||
54c6a6ba | 603 | vfree(old_bitmap); |
5f1f79bb DH |
604 | return 0; |
605 | } | |
606 | ||
98ff9f94 DH |
607 | /* |
608 | * Test if we could add memory without creating too much offline memory - | |
609 | * to avoid running OOM if memory is getting onlined deferred. | |
610 | */ | |
611 | static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size) | |
612 | { | |
613 | if (WARN_ON_ONCE(size > vm->offline_threshold)) | |
614 | return false; | |
615 | ||
616 | return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold; | |
617 | } | |
618 | ||
5f1f79bb | 619 | /* |
01afdee2 | 620 | * Try adding memory to Linux. Will usually only fail if out of memory. |
5f1f79bb DH |
621 | * |
622 | * Must not be called with the vm->hotplug_mutex held (possible deadlock with | |
623 | * onlining code). | |
624 | * | |
01afdee2 | 625 | * Will not modify the state of memory blocks in virtio-mem. |
5f1f79bb | 626 | */ |
01afdee2 DH |
627 | static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, |
628 | uint64_t size) | |
5f1f79bb | 629 | { |
98ff9f94 | 630 | int rc; |
5f1f79bb | 631 | |
b3562c60 DH |
632 | /* |
633 | * When force-unloading the driver and we still have memory added to | |
634 | * Linux, the resource name has to stay. | |
635 | */ | |
636 | if (!vm->resource_name) { | |
637 | vm->resource_name = kstrdup_const("System RAM (virtio_mem)", | |
638 | GFP_KERNEL); | |
639 | if (!vm->resource_name) | |
640 | return -ENOMEM; | |
641 | } | |
642 | ||
01afdee2 DH |
643 | dev_dbg(&vm->vdev->dev, "adding memory: 0x%llx - 0x%llx\n", addr, |
644 | addr + size - 1); | |
98ff9f94 DH |
645 | /* Memory might get onlined immediately. */ |
646 | atomic64_add(size, &vm->offline_size); | |
ffaa6ce8 DH |
647 | rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name, |
648 | MHP_MERGE_RESOURCE | MHP_NID_IS_MGID); | |
01afdee2 | 649 | if (rc) { |
98ff9f94 | 650 | atomic64_sub(size, &vm->offline_size); |
01afdee2 DH |
651 | dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc); |
652 | /* | |
653 | * TODO: Linux MM does not properly clean up yet in all cases | |
654 | * where adding of memory failed - especially on -ENOMEM. | |
655 | */ | |
656 | } | |
98ff9f94 | 657 | return rc; |
5f1f79bb DH |
658 | } |
659 | ||
660 | /* | |
01afdee2 DH |
661 | * See virtio_mem_add_memory(): Try adding a single Linux memory block. |
662 | */ | |
663 | static int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id) | |
664 | { | |
665 | const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); | |
666 | const uint64_t size = memory_block_size_bytes(); | |
667 | ||
668 | return virtio_mem_add_memory(vm, addr, size); | |
669 | } | |
670 | ||
4ba50cd3 DH |
671 | /* |
672 | * See virtio_mem_add_memory(): Try adding a big block. | |
673 | */ | |
674 | static int virtio_mem_bbm_add_bb(struct virtio_mem *vm, unsigned long bb_id) | |
675 | { | |
676 | const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); | |
677 | const uint64_t size = vm->bbm.bb_size; | |
678 | ||
679 | return virtio_mem_add_memory(vm, addr, size); | |
680 | } | |
681 | ||
01afdee2 DH |
682 | /* |
683 | * Try removing memory from Linux. Will only fail if memory blocks aren't | |
684 | * offline. | |
5f1f79bb DH |
685 | * |
686 | * Must not be called with the vm->hotplug_mutex held (possible deadlock with | |
687 | * onlining code). | |
688 | * | |
01afdee2 | 689 | * Will not modify the state of memory blocks in virtio-mem. |
5f1f79bb | 690 | */ |
01afdee2 DH |
691 | static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr, |
692 | uint64_t size) | |
5f1f79bb | 693 | { |
1d33c2ca | 694 | int rc; |
5f1f79bb | 695 | |
01afdee2 DH |
696 | dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr, |
697 | addr + size - 1); | |
e1c158e4 | 698 | rc = remove_memory(addr, size); |
98ff9f94 DH |
699 | if (!rc) { |
700 | atomic64_sub(size, &vm->offline_size); | |
1d33c2ca DH |
701 | /* |
702 | * We might have freed up memory we can now unplug, retry | |
703 | * immediately instead of waiting. | |
704 | */ | |
705 | virtio_mem_retry(vm); | |
01afdee2 DH |
706 | } else { |
707 | dev_dbg(&vm->vdev->dev, "removing memory failed: %d\n", rc); | |
98ff9f94 | 708 | } |
1d33c2ca | 709 | return rc; |
5f1f79bb DH |
710 | } |
711 | ||
a5732387 | 712 | /* |
01afdee2 DH |
713 | * See virtio_mem_remove_memory(): Try removing a single Linux memory block. |
714 | */ | |
715 | static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id) | |
716 | { | |
717 | const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); | |
718 | const uint64_t size = memory_block_size_bytes(); | |
719 | ||
720 | return virtio_mem_remove_memory(vm, addr, size); | |
721 | } | |
722 | ||
723 | /* | |
724 | * Try offlining and removing memory from Linux. | |
a5732387 DH |
725 | * |
726 | * Must not be called with the vm->hotplug_mutex held (possible deadlock with | |
727 | * onlining code). | |
728 | * | |
01afdee2 | 729 | * Will not modify the state of memory blocks in virtio-mem. |
a5732387 | 730 | */ |
01afdee2 DH |
731 | static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm, |
732 | uint64_t addr, | |
733 | uint64_t size) | |
a5732387 | 734 | { |
1d33c2ca | 735 | int rc; |
a5732387 | 736 | |
01afdee2 DH |
737 | dev_dbg(&vm->vdev->dev, |
738 | "offlining and removing memory: 0x%llx - 0x%llx\n", addr, | |
739 | addr + size - 1); | |
740 | ||
e1c158e4 | 741 | rc = offline_and_remove_memory(addr, size); |
98ff9f94 DH |
742 | if (!rc) { |
743 | atomic64_sub(size, &vm->offline_size); | |
1d33c2ca DH |
744 | /* |
745 | * We might have freed up memory we can now unplug, retry | |
746 | * immediately instead of waiting. | |
747 | */ | |
748 | virtio_mem_retry(vm); | |
01afdee2 DH |
749 | } else { |
750 | dev_dbg(&vm->vdev->dev, | |
751 | "offlining and removing memory failed: %d\n", rc); | |
98ff9f94 | 752 | } |
1d33c2ca | 753 | return rc; |
a5732387 DH |
754 | } |
755 | ||
01afdee2 DH |
756 | /* |
757 | * See virtio_mem_offline_and_remove_memory(): Try offlining and removing | |
758 | * a single Linux memory block. | |
759 | */ | |
760 | static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm, | |
761 | unsigned long mb_id) | |
762 | { | |
763 | const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); | |
764 | const uint64_t size = memory_block_size_bytes(); | |
765 | ||
766 | return virtio_mem_offline_and_remove_memory(vm, addr, size); | |
767 | } | |
768 | ||
269ac938 DH |
769 | /* |
770 | * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a | |
771 | * all Linux memory blocks covered by the big block. | |
772 | */ | |
773 | static int virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem *vm, | |
774 | unsigned long bb_id) | |
775 | { | |
776 | const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); | |
777 | const uint64_t size = vm->bbm.bb_size; | |
778 | ||
779 | return virtio_mem_offline_and_remove_memory(vm, addr, size); | |
780 | } | |
781 | ||
5f1f79bb DH |
782 | /* |
783 | * Trigger the workqueue so the device can perform its magic. | |
784 | */ | |
785 | static void virtio_mem_retry(struct virtio_mem *vm) | |
786 | { | |
787 | unsigned long flags; | |
788 | ||
789 | spin_lock_irqsave(&vm->removal_lock, flags); | |
790 | if (!vm->removing) | |
791 | queue_work(system_freezable_wq, &vm->wq); | |
792 | spin_unlock_irqrestore(&vm->removal_lock, flags); | |
793 | } | |
794 | ||
f2af6d39 DH |
795 | static int virtio_mem_translate_node_id(struct virtio_mem *vm, uint16_t node_id) |
796 | { | |
797 | int node = NUMA_NO_NODE; | |
798 | ||
799 | #if defined(CONFIG_ACPI_NUMA) | |
800 | if (virtio_has_feature(vm->vdev, VIRTIO_MEM_F_ACPI_PXM)) | |
801 | node = pxm_to_node(node_id); | |
802 | #endif | |
803 | return node; | |
804 | } | |
805 | ||
5f1f79bb DH |
806 | /* |
807 | * Test if a virtio-mem device overlaps with the given range. Can be called | |
808 | * from (notifier) callbacks lockless. | |
809 | */ | |
835491c5 DH |
810 | static bool virtio_mem_overlaps_range(struct virtio_mem *vm, uint64_t start, |
811 | uint64_t size) | |
5f1f79bb | 812 | { |
835491c5 | 813 | return start < vm->addr + vm->region_size && vm->addr < start + size; |
5f1f79bb DH |
814 | } |
815 | ||
816 | /* | |
8464e3bd | 817 | * Test if a virtio-mem device contains a given range. Can be called from |
5f1f79bb DH |
818 | * (notifier) callbacks lockless. |
819 | */ | |
8464e3bd DH |
820 | static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start, |
821 | uint64_t size) | |
5f1f79bb | 822 | { |
8464e3bd | 823 | return start >= vm->addr && start + size <= vm->addr + vm->region_size; |
5f1f79bb DH |
824 | } |
825 | ||
d46dfb62 DH |
826 | static int virtio_mem_sbm_notify_going_online(struct virtio_mem *vm, |
827 | unsigned long mb_id) | |
5f1f79bb | 828 | { |
99f0b55e DH |
829 | switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { |
830 | case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: | |
831 | case VIRTIO_MEM_SBM_MB_OFFLINE: | |
5f1f79bb DH |
832 | return NOTIFY_OK; |
833 | default: | |
834 | break; | |
835 | } | |
836 | dev_warn_ratelimited(&vm->vdev->dev, | |
837 | "memory block onlining denied\n"); | |
838 | return NOTIFY_BAD; | |
839 | } | |
840 | ||
d46dfb62 DH |
841 | static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm, |
842 | unsigned long mb_id) | |
5f1f79bb | 843 | { |
99f0b55e | 844 | switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { |
c740bb97 DH |
845 | case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL: |
846 | case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL: | |
99f0b55e DH |
847 | virtio_mem_sbm_set_mb_state(vm, mb_id, |
848 | VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); | |
5f1f79bb | 849 | break; |
c740bb97 DH |
850 | case VIRTIO_MEM_SBM_MB_KERNEL: |
851 | case VIRTIO_MEM_SBM_MB_MOVABLE: | |
99f0b55e DH |
852 | virtio_mem_sbm_set_mb_state(vm, mb_id, |
853 | VIRTIO_MEM_SBM_MB_OFFLINE); | |
5f1f79bb DH |
854 | break; |
855 | default: | |
856 | BUG(); | |
857 | break; | |
858 | } | |
859 | } | |
860 | ||
d46dfb62 | 861 | static void virtio_mem_sbm_notify_online(struct virtio_mem *vm, |
c740bb97 DH |
862 | unsigned long mb_id, |
863 | unsigned long start_pfn) | |
5f1f79bb | 864 | { |
07252dfe | 865 | const bool is_movable = is_zone_movable_page(pfn_to_page(start_pfn)); |
c740bb97 DH |
866 | int new_state; |
867 | ||
99f0b55e DH |
868 | switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { |
869 | case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: | |
c740bb97 DH |
870 | new_state = VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL; |
871 | if (is_movable) | |
872 | new_state = VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL; | |
5f1f79bb | 873 | break; |
99f0b55e | 874 | case VIRTIO_MEM_SBM_MB_OFFLINE: |
c740bb97 DH |
875 | new_state = VIRTIO_MEM_SBM_MB_KERNEL; |
876 | if (is_movable) | |
877 | new_state = VIRTIO_MEM_SBM_MB_MOVABLE; | |
5f1f79bb DH |
878 | break; |
879 | default: | |
880 | BUG(); | |
881 | break; | |
882 | } | |
c740bb97 | 883 | virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); |
5f1f79bb DH |
884 | } |
885 | ||
d46dfb62 DH |
886 | static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm, |
887 | unsigned long mb_id) | |
8e5c921c | 888 | { |
905c4c51 | 889 | const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); |
8e5c921c | 890 | unsigned long pfn; |
7a34c77d | 891 | int sb_id; |
8e5c921c | 892 | |
905c4c51 | 893 | for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { |
54c6a6ba | 894 | if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) |
8e5c921c | 895 | continue; |
8e5c921c | 896 | pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + |
905c4c51 | 897 | sb_id * vm->sbm.sb_size); |
7a34c77d | 898 | virtio_mem_fake_offline_going_offline(pfn, nr_pages); |
8e5c921c DH |
899 | } |
900 | } | |
901 | ||
d46dfb62 DH |
902 | static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm, |
903 | unsigned long mb_id) | |
8e5c921c | 904 | { |
905c4c51 | 905 | const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); |
8e5c921c | 906 | unsigned long pfn; |
7a34c77d | 907 | int sb_id; |
8e5c921c | 908 | |
905c4c51 | 909 | for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { |
54c6a6ba | 910 | if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) |
8e5c921c | 911 | continue; |
8e5c921c | 912 | pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + |
905c4c51 | 913 | sb_id * vm->sbm.sb_size); |
7a34c77d | 914 | virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); |
8e5c921c DH |
915 | } |
916 | } | |
917 | ||
3711387a DH |
918 | static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm, |
919 | unsigned long bb_id, | |
920 | unsigned long pfn, | |
921 | unsigned long nr_pages) | |
922 | { | |
923 | /* | |
924 | * When marked as "fake-offline", all online memory of this device block | |
925 | * is allocated by us. Otherwise, we don't have any memory allocated. | |
926 | */ | |
927 | if (virtio_mem_bbm_get_bb_state(vm, bb_id) != | |
928 | VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) | |
929 | return; | |
930 | virtio_mem_fake_offline_going_offline(pfn, nr_pages); | |
931 | } | |
932 | ||
933 | static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm, | |
934 | unsigned long bb_id, | |
935 | unsigned long pfn, | |
936 | unsigned long nr_pages) | |
937 | { | |
938 | if (virtio_mem_bbm_get_bb_state(vm, bb_id) != | |
939 | VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) | |
940 | return; | |
941 | virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); | |
942 | } | |
943 | ||
5f1f79bb DH |
944 | /* |
945 | * This callback will either be called synchronously from add_memory() or | |
946 | * asynchronously (e.g., triggered via user space). We have to be careful | |
947 | * with locking when calling add_memory(). | |
948 | */ | |
949 | static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, | |
950 | unsigned long action, void *arg) | |
951 | { | |
952 | struct virtio_mem *vm = container_of(nb, struct virtio_mem, | |
953 | memory_notifier); | |
954 | struct memory_notify *mhp = arg; | |
955 | const unsigned long start = PFN_PHYS(mhp->start_pfn); | |
956 | const unsigned long size = PFN_PHYS(mhp->nr_pages); | |
5f1f79bb | 957 | int rc = NOTIFY_OK; |
4ba50cd3 | 958 | unsigned long id; |
5f1f79bb DH |
959 | |
960 | if (!virtio_mem_overlaps_range(vm, start, size)) | |
961 | return NOTIFY_DONE; | |
962 | ||
4ba50cd3 DH |
963 | if (vm->in_sbm) { |
964 | id = virtio_mem_phys_to_mb_id(start); | |
965 | /* | |
966 | * In SBM, we add memory in separate memory blocks - we expect | |
967 | * it to be onlined/offlined in the same granularity. Bail out | |
968 | * if this ever changes. | |
969 | */ | |
970 | if (WARN_ON_ONCE(size != memory_block_size_bytes() || | |
971 | !IS_ALIGNED(start, memory_block_size_bytes()))) | |
972 | return NOTIFY_BAD; | |
973 | } else { | |
974 | id = virtio_mem_phys_to_bb_id(vm, start); | |
975 | /* | |
976 | * In BBM, we only care about onlining/offlining happening | |
977 | * within a single big block, we don't care about the | |
978 | * actual granularity as we don't track individual Linux | |
979 | * memory blocks. | |
980 | */ | |
981 | if (WARN_ON_ONCE(id != virtio_mem_phys_to_bb_id(vm, start + size - 1))) | |
982 | return NOTIFY_BAD; | |
983 | } | |
5f1f79bb DH |
984 | |
985 | /* | |
986 | * Avoid circular locking lockdep warnings. We lock the mutex | |
987 | * e.g., in MEM_GOING_ONLINE and unlock it in MEM_ONLINE. The | |
988 | * blocking_notifier_call_chain() has it's own lock, which gets unlocked | |
989 | * between both notifier calls and will bail out. False positive. | |
990 | */ | |
991 | lockdep_off(); | |
992 | ||
993 | switch (action) { | |
994 | case MEM_GOING_OFFLINE: | |
995 | mutex_lock(&vm->hotplug_mutex); | |
996 | if (vm->removing) { | |
997 | rc = notifier_from_errno(-EBUSY); | |
998 | mutex_unlock(&vm->hotplug_mutex); | |
999 | break; | |
1000 | } | |
1001 | vm->hotplug_active = true; | |
4ba50cd3 DH |
1002 | if (vm->in_sbm) |
1003 | virtio_mem_sbm_notify_going_offline(vm, id); | |
3711387a DH |
1004 | else |
1005 | virtio_mem_bbm_notify_going_offline(vm, id, | |
1006 | mhp->start_pfn, | |
1007 | mhp->nr_pages); | |
5f1f79bb DH |
1008 | break; |
1009 | case MEM_GOING_ONLINE: | |
1010 | mutex_lock(&vm->hotplug_mutex); | |
1011 | if (vm->removing) { | |
1012 | rc = notifier_from_errno(-EBUSY); | |
1013 | mutex_unlock(&vm->hotplug_mutex); | |
1014 | break; | |
1015 | } | |
1016 | vm->hotplug_active = true; | |
4ba50cd3 DH |
1017 | if (vm->in_sbm) |
1018 | rc = virtio_mem_sbm_notify_going_online(vm, id); | |
5f1f79bb DH |
1019 | break; |
1020 | case MEM_OFFLINE: | |
4ba50cd3 DH |
1021 | if (vm->in_sbm) |
1022 | virtio_mem_sbm_notify_offline(vm, id); | |
1d33c2ca | 1023 | |
98ff9f94 | 1024 | atomic64_add(size, &vm->offline_size); |
1d33c2ca DH |
1025 | /* |
1026 | * Trigger the workqueue. Now that we have some offline memory, | |
1027 | * maybe we can handle pending unplug requests. | |
1028 | */ | |
1029 | if (!unplug_online) | |
1030 | virtio_mem_retry(vm); | |
1031 | ||
5f1f79bb DH |
1032 | vm->hotplug_active = false; |
1033 | mutex_unlock(&vm->hotplug_mutex); | |
1034 | break; | |
1035 | case MEM_ONLINE: | |
4ba50cd3 | 1036 | if (vm->in_sbm) |
c740bb97 | 1037 | virtio_mem_sbm_notify_online(vm, id, mhp->start_pfn); |
98ff9f94 DH |
1038 | |
1039 | atomic64_sub(size, &vm->offline_size); | |
1040 | /* | |
1041 | * Start adding more memory once we onlined half of our | |
1042 | * threshold. Don't trigger if it's possibly due to our actipn | |
1043 | * (e.g., us adding memory which gets onlined immediately from | |
1044 | * the core). | |
1045 | */ | |
1046 | if (!atomic_read(&vm->wq_active) && | |
1047 | virtio_mem_could_add_memory(vm, vm->offline_threshold / 2)) | |
1048 | virtio_mem_retry(vm); | |
1049 | ||
5f1f79bb DH |
1050 | vm->hotplug_active = false; |
1051 | mutex_unlock(&vm->hotplug_mutex); | |
1052 | break; | |
1053 | case MEM_CANCEL_OFFLINE: | |
8e5c921c DH |
1054 | if (!vm->hotplug_active) |
1055 | break; | |
4ba50cd3 DH |
1056 | if (vm->in_sbm) |
1057 | virtio_mem_sbm_notify_cancel_offline(vm, id); | |
3711387a DH |
1058 | else |
1059 | virtio_mem_bbm_notify_cancel_offline(vm, id, | |
1060 | mhp->start_pfn, | |
1061 | mhp->nr_pages); | |
8e5c921c DH |
1062 | vm->hotplug_active = false; |
1063 | mutex_unlock(&vm->hotplug_mutex); | |
1064 | break; | |
5f1f79bb DH |
1065 | case MEM_CANCEL_ONLINE: |
1066 | if (!vm->hotplug_active) | |
1067 | break; | |
1068 | vm->hotplug_active = false; | |
1069 | mutex_unlock(&vm->hotplug_mutex); | |
1070 | break; | |
1071 | default: | |
1072 | break; | |
1073 | } | |
1074 | ||
1075 | lockdep_on(); | |
1076 | ||
1077 | return rc; | |
1078 | } | |
1079 | ||
1080 | /* | |
255f5985 DH |
1081 | * Set a range of pages PG_offline. Remember pages that were never onlined |
1082 | * (via generic_online_page()) using PageDirty(). | |
5f1f79bb DH |
1083 | */ |
1084 | static void virtio_mem_set_fake_offline(unsigned long pfn, | |
2a628511 | 1085 | unsigned long nr_pages, bool onlined) |
5f1f79bb | 1086 | { |
6cc26d77 | 1087 | page_offline_begin(); |
255f5985 DH |
1088 | for (; nr_pages--; pfn++) { |
1089 | struct page *page = pfn_to_page(pfn); | |
1090 | ||
1091 | __SetPageOffline(page); | |
8e5c921c | 1092 | if (!onlined) { |
255f5985 | 1093 | SetPageDirty(page); |
8e5c921c DH |
1094 | /* FIXME: remove after cleanups */ |
1095 | ClearPageReserved(page); | |
1096 | } | |
255f5985 | 1097 | } |
6cc26d77 | 1098 | page_offline_end(); |
5f1f79bb DH |
1099 | } |
1100 | ||
1101 | /* | |
255f5985 DH |
1102 | * Clear PG_offline from a range of pages. If the pages were never onlined, |
1103 | * (via generic_online_page()), clear PageDirty(). | |
5f1f79bb DH |
1104 | */ |
1105 | static void virtio_mem_clear_fake_offline(unsigned long pfn, | |
2a628511 | 1106 | unsigned long nr_pages, bool onlined) |
5f1f79bb | 1107 | { |
255f5985 DH |
1108 | for (; nr_pages--; pfn++) { |
1109 | struct page *page = pfn_to_page(pfn); | |
1110 | ||
1111 | __ClearPageOffline(page); | |
1112 | if (!onlined) | |
1113 | ClearPageDirty(page); | |
1114 | } | |
5f1f79bb DH |
1115 | } |
1116 | ||
1117 | /* | |
1118 | * Release a range of fake-offline pages to the buddy, effectively | |
1119 | * fake-onlining them. | |
1120 | */ | |
2a628511 | 1121 | static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) |
5f1f79bb | 1122 | { |
23baf831 | 1123 | unsigned long order = MAX_ORDER; |
2a628511 | 1124 | unsigned long i; |
5f1f79bb DH |
1125 | |
1126 | /* | |
57c5a5b3 | 1127 | * We might get called for ranges that don't cover properly aligned |
23baf831 KS |
1128 | * MAX_ORDER pages; however, we can only online properly aligned |
1129 | * pages with an order of MAX_ORDER at maximum. | |
5f1f79bb | 1130 | */ |
57c5a5b3 DH |
1131 | while (!IS_ALIGNED(pfn | nr_pages, 1 << order)) |
1132 | order--; | |
1133 | ||
1134 | for (i = 0; i < nr_pages; i += 1 << order) { | |
255f5985 | 1135 | struct page *page = pfn_to_page(pfn + i); |
5f1f79bb | 1136 | |
255f5985 DH |
1137 | /* |
1138 | * If the page is PageDirty(), it was kept fake-offline when | |
1139 | * onlining the memory block. Otherwise, it was allocated | |
1140 | * using alloc_contig_range(). All pages in a subblock are | |
1141 | * alike. | |
1142 | */ | |
1143 | if (PageDirty(page)) { | |
57c5a5b3 DH |
1144 | virtio_mem_clear_fake_offline(pfn + i, 1 << order, false); |
1145 | generic_online_page(page, order); | |
255f5985 | 1146 | } else { |
57c5a5b3 DH |
1147 | virtio_mem_clear_fake_offline(pfn + i, 1 << order, true); |
1148 | free_contig_range(pfn + i, 1 << order); | |
1149 | adjust_managed_page_count(page, 1 << order); | |
255f5985 DH |
1150 | } |
1151 | } | |
5f1f79bb DH |
1152 | } |
1153 | ||
89c486c4 DH |
1154 | /* |
1155 | * Try to allocate a range, marking pages fake-offline, effectively | |
1156 | * fake-offlining them. | |
1157 | */ | |
1158 | static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) | |
1159 | { | |
07252dfe | 1160 | const bool is_movable = is_zone_movable_page(pfn_to_page(pfn)); |
f2d799d5 DH |
1161 | int rc, retry_count; |
1162 | ||
1163 | /* | |
1164 | * TODO: We want an alloc_contig_range() mode that tries to allocate | |
1165 | * harder (e.g., dealing with temporarily pinned pages, PCP), especially | |
1166 | * with ZONE_MOVABLE. So for now, retry a couple of times with | |
1167 | * ZONE_MOVABLE before giving up - because that zone is supposed to give | |
1168 | * some guarantees. | |
1169 | */ | |
1170 | for (retry_count = 0; retry_count < 5; retry_count++) { | |
1171 | rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, | |
1172 | GFP_KERNEL); | |
1173 | if (rc == -ENOMEM) | |
1174 | /* whoops, out of memory */ | |
1175 | return rc; | |
1176 | else if (rc && !is_movable) | |
1177 | break; | |
1178 | else if (rc) | |
1179 | continue; | |
1180 | ||
1181 | virtio_mem_set_fake_offline(pfn, nr_pages, true); | |
1182 | adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); | |
1183 | return 0; | |
1184 | } | |
1185 | ||
1186 | return -EBUSY; | |
89c486c4 DH |
1187 | } |
1188 | ||
7a34c77d DH |
1189 | /* |
1190 | * Handle fake-offline pages when memory is going offline - such that the | |
1191 | * pages can be skipped by mm-core when offlining. | |
1192 | */ | |
1193 | static void virtio_mem_fake_offline_going_offline(unsigned long pfn, | |
1194 | unsigned long nr_pages) | |
1195 | { | |
1196 | struct page *page; | |
1197 | unsigned long i; | |
1198 | ||
1199 | /* | |
1200 | * Drop our reference to the pages so the memory can get offlined | |
1201 | * and add the unplugged pages to the managed page counters (so | |
1202 | * offlining code can correctly subtract them again). | |
1203 | */ | |
1204 | adjust_managed_page_count(pfn_to_page(pfn), nr_pages); | |
1205 | /* Drop our reference to the pages so the memory can get offlined. */ | |
1206 | for (i = 0; i < nr_pages; i++) { | |
1207 | page = pfn_to_page(pfn + i); | |
1208 | if (WARN_ON(!page_ref_dec_and_test(page))) | |
1209 | dump_page(page, "fake-offline page referenced"); | |
1210 | } | |
1211 | } | |
1212 | ||
1213 | /* | |
1214 | * Handle fake-offline pages when memory offlining is canceled - to undo | |
1215 | * what we did in virtio_mem_fake_offline_going_offline(). | |
1216 | */ | |
1217 | static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, | |
1218 | unsigned long nr_pages) | |
1219 | { | |
1220 | unsigned long i; | |
1221 | ||
1222 | /* | |
1223 | * Get the reference we dropped when going offline and subtract the | |
1224 | * unplugged pages from the managed page counters. | |
1225 | */ | |
1226 | adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); | |
1227 | for (i = 0; i < nr_pages; i++) | |
1228 | page_ref_inc(pfn_to_page(pfn + i)); | |
1229 | } | |
1230 | ||
6639032a DH |
1231 | static void virtio_mem_online_page(struct virtio_mem *vm, |
1232 | struct page *page, unsigned int order) | |
5f1f79bb | 1233 | { |
6639032a DH |
1234 | const unsigned long start = page_to_phys(page); |
1235 | const unsigned long end = start + PFN_PHYS(1 << order); | |
1236 | unsigned long addr, next, id, sb_id, count; | |
4ba50cd3 | 1237 | bool do_online; |
5f1f79bb | 1238 | |
6639032a | 1239 | /* |
23baf831 KS |
1240 | * We can get called with any order up to MAX_ORDER. If our subblock |
1241 | * size is smaller than that and we have a mixture of plugged and | |
1242 | * unplugged subblocks within such a page, we have to process in | |
6639032a DH |
1243 | * smaller granularity. In that case we'll adjust the order exactly once |
1244 | * within the loop. | |
1245 | */ | |
1246 | for (addr = start; addr < end; ) { | |
1247 | next = addr + PFN_PHYS(1 << order); | |
5f1f79bb | 1248 | |
4ba50cd3 | 1249 | if (vm->in_sbm) { |
4ba50cd3 DH |
1250 | id = virtio_mem_phys_to_mb_id(addr); |
1251 | sb_id = virtio_mem_phys_to_sb_id(vm, addr); | |
6639032a DH |
1252 | count = virtio_mem_phys_to_sb_id(vm, next - 1) - sb_id + 1; |
1253 | ||
1254 | if (virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, count)) { | |
1255 | /* Fully plugged. */ | |
1256 | do_online = true; | |
1257 | } else if (count == 1 || | |
1258 | virtio_mem_sbm_test_sb_unplugged(vm, id, sb_id, count)) { | |
1259 | /* Fully unplugged. */ | |
1260 | do_online = false; | |
1261 | } else { | |
1262 | /* | |
1263 | * Mixture, process sub-blocks instead. This | |
1264 | * will be at least the size of a pageblock. | |
1265 | * We'll run into this case exactly once. | |
1266 | */ | |
1267 | order = ilog2(vm->sbm.sb_size) - PAGE_SHIFT; | |
1268 | do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1); | |
1269 | continue; | |
1270 | } | |
4ba50cd3 | 1271 | } else { |
3711387a DH |
1272 | /* |
1273 | * If the whole block is marked fake offline, keep | |
1274 | * everything that way. | |
1275 | */ | |
1276 | id = virtio_mem_phys_to_bb_id(vm, addr); | |
1277 | do_online = virtio_mem_bbm_get_bb_state(vm, id) != | |
1278 | VIRTIO_MEM_BBM_BB_FAKE_OFFLINE; | |
4ba50cd3 | 1279 | } |
425bec00 | 1280 | |
6639032a DH |
1281 | if (do_online) |
1282 | generic_online_page(pfn_to_page(PFN_DOWN(addr)), order); | |
1283 | else | |
1284 | virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, | |
1285 | false); | |
1286 | addr = next; | |
1287 | } | |
1288 | } | |
1289 | ||
1290 | static void virtio_mem_online_page_cb(struct page *page, unsigned int order) | |
1291 | { | |
1292 | const unsigned long addr = page_to_phys(page); | |
1293 | struct virtio_mem *vm; | |
1294 | ||
1295 | rcu_read_lock(); | |
1296 | list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { | |
1297 | /* | |
1298 | * Pages we're onlining will never cross memory blocks and, | |
1299 | * therefore, not virtio-mem devices. | |
1300 | */ | |
1301 | if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) | |
1302 | continue; | |
1303 | ||
425bec00 | 1304 | /* |
6639032a DH |
1305 | * virtio_mem_set_fake_offline() might sleep. We can safely |
1306 | * drop the RCU lock at this point because the device | |
1307 | * cannot go away. See virtio_mem_remove() how races | |
425bec00 DH |
1308 | * between memory onlining and device removal are handled. |
1309 | */ | |
1310 | rcu_read_unlock(); | |
1311 | ||
6639032a | 1312 | virtio_mem_online_page(vm, page, order); |
5f1f79bb DH |
1313 | return; |
1314 | } | |
1315 | rcu_read_unlock(); | |
1316 | ||
1317 | /* not virtio-mem memory, but e.g., a DIMM. online it */ | |
1318 | generic_online_page(page, order); | |
1319 | } | |
1320 | ||
1321 | static uint64_t virtio_mem_send_request(struct virtio_mem *vm, | |
1322 | const struct virtio_mem_req *req) | |
1323 | { | |
1324 | struct scatterlist *sgs[2], sg_req, sg_resp; | |
1325 | unsigned int len; | |
1326 | int rc; | |
1327 | ||
1328 | /* don't use the request residing on the stack (vaddr) */ | |
1329 | vm->req = *req; | |
1330 | ||
1331 | /* out: buffer for request */ | |
1332 | sg_init_one(&sg_req, &vm->req, sizeof(vm->req)); | |
1333 | sgs[0] = &sg_req; | |
1334 | ||
1335 | /* in: buffer for response */ | |
1336 | sg_init_one(&sg_resp, &vm->resp, sizeof(vm->resp)); | |
1337 | sgs[1] = &sg_resp; | |
1338 | ||
1339 | rc = virtqueue_add_sgs(vm->vq, sgs, 1, 1, vm, GFP_KERNEL); | |
1340 | if (rc < 0) | |
1341 | return rc; | |
1342 | ||
1343 | virtqueue_kick(vm->vq); | |
1344 | ||
1345 | /* wait for a response */ | |
1346 | wait_event(vm->host_resp, virtqueue_get_buf(vm->vq, &len)); | |
1347 | ||
1348 | return virtio16_to_cpu(vm->vdev, vm->resp.type); | |
1349 | } | |
1350 | ||
1351 | static int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr, | |
1352 | uint64_t size) | |
1353 | { | |
1354 | const uint64_t nb_vm_blocks = size / vm->device_block_size; | |
1355 | const struct virtio_mem_req req = { | |
1356 | .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_PLUG), | |
1357 | .u.plug.addr = cpu_to_virtio64(vm->vdev, addr), | |
1358 | .u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), | |
1359 | }; | |
6beb3a94 | 1360 | int rc = -ENOMEM; |
5f1f79bb DH |
1361 | |
1362 | if (atomic_read(&vm->config_changed)) | |
1363 | return -EAGAIN; | |
1364 | ||
6beb3a94 DH |
1365 | dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr, |
1366 | addr + size - 1); | |
1367 | ||
5f1f79bb DH |
1368 | switch (virtio_mem_send_request(vm, &req)) { |
1369 | case VIRTIO_MEM_RESP_ACK: | |
1370 | vm->plugged_size += size; | |
1371 | return 0; | |
1372 | case VIRTIO_MEM_RESP_NACK: | |
6beb3a94 DH |
1373 | rc = -EAGAIN; |
1374 | break; | |
5f1f79bb | 1375 | case VIRTIO_MEM_RESP_BUSY: |
6beb3a94 DH |
1376 | rc = -ETXTBSY; |
1377 | break; | |
5f1f79bb | 1378 | case VIRTIO_MEM_RESP_ERROR: |
6beb3a94 DH |
1379 | rc = -EINVAL; |
1380 | break; | |
5f1f79bb | 1381 | default: |
6beb3a94 | 1382 | break; |
5f1f79bb | 1383 | } |
6beb3a94 DH |
1384 | |
1385 | dev_dbg(&vm->vdev->dev, "plugging memory failed: %d\n", rc); | |
1386 | return rc; | |
5f1f79bb DH |
1387 | } |
1388 | ||
1389 | static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr, | |
1390 | uint64_t size) | |
1391 | { | |
1392 | const uint64_t nb_vm_blocks = size / vm->device_block_size; | |
1393 | const struct virtio_mem_req req = { | |
1394 | .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG), | |
1395 | .u.unplug.addr = cpu_to_virtio64(vm->vdev, addr), | |
1396 | .u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), | |
1397 | }; | |
6beb3a94 | 1398 | int rc = -ENOMEM; |
5f1f79bb DH |
1399 | |
1400 | if (atomic_read(&vm->config_changed)) | |
1401 | return -EAGAIN; | |
1402 | ||
6beb3a94 DH |
1403 | dev_dbg(&vm->vdev->dev, "unplugging memory: 0x%llx - 0x%llx\n", addr, |
1404 | addr + size - 1); | |
1405 | ||
5f1f79bb DH |
1406 | switch (virtio_mem_send_request(vm, &req)) { |
1407 | case VIRTIO_MEM_RESP_ACK: | |
1408 | vm->plugged_size -= size; | |
1409 | return 0; | |
1410 | case VIRTIO_MEM_RESP_BUSY: | |
6beb3a94 DH |
1411 | rc = -ETXTBSY; |
1412 | break; | |
5f1f79bb | 1413 | case VIRTIO_MEM_RESP_ERROR: |
6beb3a94 DH |
1414 | rc = -EINVAL; |
1415 | break; | |
5f1f79bb | 1416 | default: |
6beb3a94 | 1417 | break; |
5f1f79bb | 1418 | } |
6beb3a94 DH |
1419 | |
1420 | dev_dbg(&vm->vdev->dev, "unplugging memory failed: %d\n", rc); | |
1421 | return rc; | |
5f1f79bb DH |
1422 | } |
1423 | ||
1424 | static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) | |
1425 | { | |
1426 | const struct virtio_mem_req req = { | |
1427 | .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL), | |
1428 | }; | |
6beb3a94 DH |
1429 | int rc = -ENOMEM; |
1430 | ||
1431 | dev_dbg(&vm->vdev->dev, "unplugging all memory"); | |
5f1f79bb DH |
1432 | |
1433 | switch (virtio_mem_send_request(vm, &req)) { | |
1434 | case VIRTIO_MEM_RESP_ACK: | |
1435 | vm->unplug_all_required = false; | |
1436 | vm->plugged_size = 0; | |
1437 | /* usable region might have shrunk */ | |
1438 | atomic_set(&vm->config_changed, 1); | |
1439 | return 0; | |
1440 | case VIRTIO_MEM_RESP_BUSY: | |
6beb3a94 DH |
1441 | rc = -ETXTBSY; |
1442 | break; | |
5f1f79bb | 1443 | default: |
6beb3a94 | 1444 | break; |
5f1f79bb | 1445 | } |
6beb3a94 DH |
1446 | |
1447 | dev_dbg(&vm->vdev->dev, "unplugging all memory failed: %d\n", rc); | |
1448 | return rc; | |
5f1f79bb DH |
1449 | } |
1450 | ||
1451 | /* | |
1452 | * Plug selected subblocks. Updates the plugged state, but not the state | |
1453 | * of the memory block. | |
1454 | */ | |
602ef894 DH |
1455 | static int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id, |
1456 | int sb_id, int count) | |
5f1f79bb DH |
1457 | { |
1458 | const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + | |
905c4c51 DH |
1459 | sb_id * vm->sbm.sb_size; |
1460 | const uint64_t size = count * vm->sbm.sb_size; | |
5f1f79bb DH |
1461 | int rc; |
1462 | ||
5f1f79bb DH |
1463 | rc = virtio_mem_send_plug_request(vm, addr, size); |
1464 | if (!rc) | |
54c6a6ba | 1465 | virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count); |
5f1f79bb DH |
1466 | return rc; |
1467 | } | |
1468 | ||
1469 | /* | |
1470 | * Unplug selected subblocks. Updates the plugged state, but not the state | |
1471 | * of the memory block. | |
1472 | */ | |
602ef894 DH |
1473 | static int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, |
1474 | int sb_id, int count) | |
5f1f79bb DH |
1475 | { |
1476 | const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + | |
905c4c51 DH |
1477 | sb_id * vm->sbm.sb_size; |
1478 | const uint64_t size = count * vm->sbm.sb_size; | |
5f1f79bb DH |
1479 | int rc; |
1480 | ||
5f1f79bb DH |
1481 | rc = virtio_mem_send_unplug_request(vm, addr, size); |
1482 | if (!rc) | |
54c6a6ba | 1483 | virtio_mem_sbm_set_sb_unplugged(vm, mb_id, sb_id, count); |
5f1f79bb DH |
1484 | return rc; |
1485 | } | |
1486 | ||
4ba50cd3 DH |
1487 | /* |
1488 | * Request to unplug a big block. | |
1489 | * | |
1490 | * Will not modify the state of the big block. | |
1491 | */ | |
1492 | static int virtio_mem_bbm_unplug_bb(struct virtio_mem *vm, unsigned long bb_id) | |
1493 | { | |
1494 | const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); | |
1495 | const uint64_t size = vm->bbm.bb_size; | |
1496 | ||
1497 | return virtio_mem_send_unplug_request(vm, addr, size); | |
1498 | } | |
1499 | ||
1500 | /* | |
1501 | * Request to plug a big block. | |
1502 | * | |
1503 | * Will not modify the state of the big block. | |
1504 | */ | |
1505 | static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id) | |
1506 | { | |
1507 | const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); | |
1508 | const uint64_t size = vm->bbm.bb_size; | |
1509 | ||
1510 | return virtio_mem_send_plug_request(vm, addr, size); | |
1511 | } | |
1512 | ||
5f1f79bb DH |
1513 | /* |
1514 | * Unplug the desired number of plugged subblocks of a offline or not-added | |
1515 | * memory block. Will fail if any subblock cannot get unplugged (instead of | |
1516 | * skipping it). | |
1517 | * | |
1518 | * Will not modify the state of the memory block. | |
1519 | * | |
1520 | * Note: can fail after some subblocks were unplugged. | |
1521 | */ | |
5304ca3d DH |
1522 | static int virtio_mem_sbm_unplug_any_sb_raw(struct virtio_mem *vm, |
1523 | unsigned long mb_id, uint64_t *nb_sb) | |
5f1f79bb DH |
1524 | { |
1525 | int sb_id, count; | |
1526 | int rc; | |
1527 | ||
905c4c51 | 1528 | sb_id = vm->sbm.sbs_per_mb - 1; |
5f1f79bb | 1529 | while (*nb_sb) { |
562e08cd DH |
1530 | /* Find the next candidate subblock */ |
1531 | while (sb_id >= 0 && | |
54c6a6ba | 1532 | virtio_mem_sbm_test_sb_unplugged(vm, mb_id, sb_id, 1)) |
562e08cd DH |
1533 | sb_id--; |
1534 | if (sb_id < 0) | |
5f1f79bb | 1535 | break; |
562e08cd | 1536 | /* Try to unplug multiple subblocks at a time */ |
5f1f79bb | 1537 | count = 1; |
562e08cd | 1538 | while (count < *nb_sb && sb_id > 0 && |
54c6a6ba | 1539 | virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) { |
5f1f79bb | 1540 | count++; |
562e08cd DH |
1541 | sb_id--; |
1542 | } | |
5f1f79bb | 1543 | |
602ef894 | 1544 | rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); |
5f1f79bb DH |
1545 | if (rc) |
1546 | return rc; | |
1547 | *nb_sb -= count; | |
562e08cd | 1548 | sb_id--; |
5f1f79bb DH |
1549 | } |
1550 | ||
1551 | return 0; | |
1552 | } | |
1553 | ||
1554 | /* | |
1555 | * Unplug all plugged subblocks of an offline or not-added memory block. | |
1556 | * | |
1557 | * Will not modify the state of the memory block. | |
1558 | * | |
1559 | * Note: can fail after some subblocks were unplugged. | |
1560 | */ | |
602ef894 | 1561 | static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id) |
5f1f79bb | 1562 | { |
905c4c51 | 1563 | uint64_t nb_sb = vm->sbm.sbs_per_mb; |
5f1f79bb | 1564 | |
5304ca3d | 1565 | return virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, &nb_sb); |
5f1f79bb DH |
1566 | } |
1567 | ||
1568 | /* | |
1569 | * Prepare tracking data for the next memory block. | |
1570 | */ | |
602ef894 DH |
1571 | static int virtio_mem_sbm_prepare_next_mb(struct virtio_mem *vm, |
1572 | unsigned long *mb_id) | |
5f1f79bb DH |
1573 | { |
1574 | int rc; | |
1575 | ||
8a6f082b | 1576 | if (vm->sbm.next_mb_id > vm->sbm.last_usable_mb_id) |
5f1f79bb DH |
1577 | return -ENOSPC; |
1578 | ||
1579 | /* Resize the state array if required. */ | |
99f0b55e | 1580 | rc = virtio_mem_sbm_mb_states_prepare_next_mb(vm); |
5f1f79bb DH |
1581 | if (rc) |
1582 | return rc; | |
1583 | ||
1584 | /* Resize the subblock bitmap if required. */ | |
54c6a6ba | 1585 | rc = virtio_mem_sbm_sb_states_prepare_next_mb(vm); |
5f1f79bb DH |
1586 | if (rc) |
1587 | return rc; | |
1588 | ||
99f0b55e | 1589 | vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++; |
8a6f082b | 1590 | *mb_id = vm->sbm.next_mb_id++; |
5f1f79bb DH |
1591 | return 0; |
1592 | } | |
1593 | ||
5f1f79bb DH |
1594 | /* |
1595 | * Try to plug the desired number of subblocks and add the memory block | |
1596 | * to Linux. | |
1597 | * | |
1598 | * Will modify the state of the memory block. | |
1599 | */ | |
602ef894 DH |
1600 | static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm, |
1601 | unsigned long mb_id, uint64_t *nb_sb) | |
5f1f79bb | 1602 | { |
905c4c51 | 1603 | const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb); |
d76944f8 | 1604 | int rc; |
5f1f79bb DH |
1605 | |
1606 | if (WARN_ON_ONCE(!count)) | |
1607 | return -EINVAL; | |
1608 | ||
1609 | /* | |
1610 | * Plug the requested number of subblocks before adding it to linux, | |
1611 | * so that onlining will directly online all plugged subblocks. | |
1612 | */ | |
602ef894 | 1613 | rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count); |
5f1f79bb DH |
1614 | if (rc) |
1615 | return rc; | |
1616 | ||
1617 | /* | |
1618 | * Mark the block properly offline before adding it to Linux, | |
1619 | * so the memory notifiers will find the block in the right state. | |
1620 | */ | |
905c4c51 | 1621 | if (count == vm->sbm.sbs_per_mb) |
99f0b55e DH |
1622 | virtio_mem_sbm_set_mb_state(vm, mb_id, |
1623 | VIRTIO_MEM_SBM_MB_OFFLINE); | |
5f1f79bb | 1624 | else |
99f0b55e DH |
1625 | virtio_mem_sbm_set_mb_state(vm, mb_id, |
1626 | VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); | |
5f1f79bb DH |
1627 | |
1628 | /* Add the memory block to linux - if that fails, try to unplug. */ | |
01afdee2 | 1629 | rc = virtio_mem_sbm_add_mb(vm, mb_id); |
5f1f79bb | 1630 | if (rc) { |
99f0b55e | 1631 | int new_state = VIRTIO_MEM_SBM_MB_UNUSED; |
5f1f79bb | 1632 | |
602ef894 | 1633 | if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count)) |
99f0b55e DH |
1634 | new_state = VIRTIO_MEM_SBM_MB_PLUGGED; |
1635 | virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); | |
5f1f79bb DH |
1636 | return rc; |
1637 | } | |
1638 | ||
1639 | *nb_sb -= count; | |
1640 | return 0; | |
1641 | } | |
1642 | ||
1643 | /* | |
1644 | * Try to plug the desired number of subblocks of a memory block that | |
1645 | * is already added to Linux. | |
1646 | * | |
1647 | * Will modify the state of the memory block. | |
1648 | * | |
1649 | * Note: Can fail after some subblocks were successfully plugged. | |
1650 | */ | |
602ef894 | 1651 | static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, |
f4cf803d | 1652 | unsigned long mb_id, uint64_t *nb_sb) |
5f1f79bb | 1653 | { |
f4cf803d | 1654 | const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); |
5f1f79bb DH |
1655 | unsigned long pfn, nr_pages; |
1656 | int sb_id, count; | |
1657 | int rc; | |
1658 | ||
1659 | if (WARN_ON_ONCE(!*nb_sb)) | |
1660 | return -EINVAL; | |
1661 | ||
1662 | while (*nb_sb) { | |
54c6a6ba | 1663 | sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id); |
905c4c51 | 1664 | if (sb_id >= vm->sbm.sbs_per_mb) |
5f1f79bb DH |
1665 | break; |
1666 | count = 1; | |
1667 | while (count < *nb_sb && | |
905c4c51 | 1668 | sb_id + count < vm->sbm.sbs_per_mb && |
54c6a6ba | 1669 | !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1)) |
5f1f79bb DH |
1670 | count++; |
1671 | ||
602ef894 | 1672 | rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count); |
5f1f79bb DH |
1673 | if (rc) |
1674 | return rc; | |
1675 | *nb_sb -= count; | |
f4cf803d | 1676 | if (old_state == VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) |
5f1f79bb DH |
1677 | continue; |
1678 | ||
1679 | /* fake-online the pages if the memory block is online */ | |
1680 | pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + | |
905c4c51 DH |
1681 | sb_id * vm->sbm.sb_size); |
1682 | nr_pages = PFN_DOWN(count * vm->sbm.sb_size); | |
5f1f79bb DH |
1683 | virtio_mem_fake_online(pfn, nr_pages); |
1684 | } | |
1685 | ||
f4cf803d DH |
1686 | if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) |
1687 | virtio_mem_sbm_set_mb_state(vm, mb_id, old_state - 1); | |
5f1f79bb | 1688 | |
1c3d69ab | 1689 | return 0; |
5f1f79bb DH |
1690 | } |
1691 | ||
4ba50cd3 | 1692 | static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff) |
5f1f79bb | 1693 | { |
f4cf803d | 1694 | const int mb_states[] = { |
c740bb97 DH |
1695 | VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, |
1696 | VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, | |
f4cf803d DH |
1697 | VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, |
1698 | }; | |
905c4c51 | 1699 | uint64_t nb_sb = diff / vm->sbm.sb_size; |
5f1f79bb | 1700 | unsigned long mb_id; |
f4cf803d | 1701 | int rc, i; |
5f1f79bb DH |
1702 | |
1703 | if (!nb_sb) | |
1704 | return 0; | |
1705 | ||
1706 | /* Don't race with onlining/offlining */ | |
1707 | mutex_lock(&vm->hotplug_mutex); | |
1708 | ||
f4cf803d DH |
1709 | for (i = 0; i < ARRAY_SIZE(mb_states); i++) { |
1710 | virtio_mem_sbm_for_each_mb(vm, mb_id, mb_states[i]) { | |
1711 | rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb); | |
1712 | if (rc || !nb_sb) | |
1713 | goto out_unlock; | |
1714 | cond_resched(); | |
1715 | } | |
5f1f79bb DH |
1716 | } |
1717 | ||
1718 | /* | |
1719 | * We won't be working on online/offline memory blocks from this point, | |
1720 | * so we can't race with memory onlining/offlining. Drop the mutex. | |
1721 | */ | |
1722 | mutex_unlock(&vm->hotplug_mutex); | |
1723 | ||
1724 | /* Try to plug and add unused blocks */ | |
99f0b55e | 1725 | virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) { |
98ff9f94 | 1726 | if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) |
5f1f79bb DH |
1727 | return -ENOSPC; |
1728 | ||
602ef894 | 1729 | rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); |
5f1f79bb DH |
1730 | if (rc || !nb_sb) |
1731 | return rc; | |
1732 | cond_resched(); | |
1733 | } | |
1734 | ||
1735 | /* Try to prepare, plug and add new blocks */ | |
1736 | while (nb_sb) { | |
98ff9f94 | 1737 | if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) |
5f1f79bb DH |
1738 | return -ENOSPC; |
1739 | ||
602ef894 | 1740 | rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id); |
5f1f79bb DH |
1741 | if (rc) |
1742 | return rc; | |
602ef894 | 1743 | rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); |
5f1f79bb DH |
1744 | if (rc) |
1745 | return rc; | |
1746 | cond_resched(); | |
1747 | } | |
1748 | ||
1749 | return 0; | |
1750 | out_unlock: | |
1751 | mutex_unlock(&vm->hotplug_mutex); | |
1752 | return rc; | |
1753 | } | |
1754 | ||
4ba50cd3 DH |
1755 | /* |
1756 | * Plug a big block and add it to Linux. | |
1757 | * | |
1758 | * Will modify the state of the big block. | |
1759 | */ | |
1760 | static int virtio_mem_bbm_plug_and_add_bb(struct virtio_mem *vm, | |
1761 | unsigned long bb_id) | |
1762 | { | |
1763 | int rc; | |
1764 | ||
1765 | if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != | |
1766 | VIRTIO_MEM_BBM_BB_UNUSED)) | |
1767 | return -EINVAL; | |
1768 | ||
1769 | rc = virtio_mem_bbm_plug_bb(vm, bb_id); | |
1770 | if (rc) | |
1771 | return rc; | |
1772 | virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); | |
1773 | ||
1774 | rc = virtio_mem_bbm_add_bb(vm, bb_id); | |
1775 | if (rc) { | |
1776 | if (!virtio_mem_bbm_unplug_bb(vm, bb_id)) | |
1777 | virtio_mem_bbm_set_bb_state(vm, bb_id, | |
1778 | VIRTIO_MEM_BBM_BB_UNUSED); | |
1779 | else | |
1780 | /* Retry from the main loop. */ | |
1781 | virtio_mem_bbm_set_bb_state(vm, bb_id, | |
1782 | VIRTIO_MEM_BBM_BB_PLUGGED); | |
1783 | return rc; | |
1784 | } | |
1785 | return 0; | |
1786 | } | |
1787 | ||
1788 | /* | |
1789 | * Prepare tracking data for the next big block. | |
1790 | */ | |
1791 | static int virtio_mem_bbm_prepare_next_bb(struct virtio_mem *vm, | |
1792 | unsigned long *bb_id) | |
1793 | { | |
1794 | int rc; | |
1795 | ||
1796 | if (vm->bbm.next_bb_id > vm->bbm.last_usable_bb_id) | |
1797 | return -ENOSPC; | |
1798 | ||
1799 | /* Resize the big block state array if required. */ | |
1800 | rc = virtio_mem_bbm_bb_states_prepare_next_bb(vm); | |
1801 | if (rc) | |
1802 | return rc; | |
1803 | ||
1804 | vm->bbm.bb_count[VIRTIO_MEM_BBM_BB_UNUSED]++; | |
1805 | *bb_id = vm->bbm.next_bb_id; | |
1806 | vm->bbm.next_bb_id++; | |
1807 | return 0; | |
1808 | } | |
1809 | ||
1810 | static int virtio_mem_bbm_plug_request(struct virtio_mem *vm, uint64_t diff) | |
1811 | { | |
1812 | uint64_t nb_bb = diff / vm->bbm.bb_size; | |
1813 | unsigned long bb_id; | |
1814 | int rc; | |
1815 | ||
1816 | if (!nb_bb) | |
1817 | return 0; | |
1818 | ||
1819 | /* Try to plug and add unused big blocks */ | |
1820 | virtio_mem_bbm_for_each_bb(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED) { | |
1821 | if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) | |
1822 | return -ENOSPC; | |
1823 | ||
1824 | rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); | |
1825 | if (!rc) | |
1826 | nb_bb--; | |
1827 | if (rc || !nb_bb) | |
1828 | return rc; | |
1829 | cond_resched(); | |
1830 | } | |
1831 | ||
1832 | /* Try to prepare, plug and add new big blocks */ | |
1833 | while (nb_bb) { | |
1834 | if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) | |
1835 | return -ENOSPC; | |
1836 | ||
1837 | rc = virtio_mem_bbm_prepare_next_bb(vm, &bb_id); | |
1838 | if (rc) | |
1839 | return rc; | |
1840 | rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); | |
1841 | if (!rc) | |
1842 | nb_bb--; | |
1843 | if (rc) | |
1844 | return rc; | |
1845 | cond_resched(); | |
1846 | } | |
1847 | ||
1848 | return 0; | |
1849 | } | |
1850 | ||
1851 | /* | |
1852 | * Try to plug the requested amount of memory. | |
1853 | */ | |
1854 | static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) | |
1855 | { | |
1856 | if (vm->in_sbm) | |
1857 | return virtio_mem_sbm_plug_request(vm, diff); | |
1858 | return virtio_mem_bbm_plug_request(vm, diff); | |
1859 | } | |
1860 | ||
c627ff5d DH |
1861 | /* |
1862 | * Unplug the desired number of plugged subblocks of an offline memory block. | |
1863 | * Will fail if any subblock cannot get unplugged (instead of skipping it). | |
1864 | * | |
1865 | * Will modify the state of the memory block. Might temporarily drop the | |
1866 | * hotplug_mutex. | |
1867 | * | |
1868 | * Note: Can fail after some subblocks were successfully unplugged. | |
1869 | */ | |
602ef894 DH |
1870 | static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm, |
1871 | unsigned long mb_id, | |
1872 | uint64_t *nb_sb) | |
c627ff5d DH |
1873 | { |
1874 | int rc; | |
1875 | ||
5304ca3d | 1876 | rc = virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, nb_sb); |
c627ff5d DH |
1877 | |
1878 | /* some subblocks might have been unplugged even on failure */ | |
905c4c51 | 1879 | if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) |
99f0b55e DH |
1880 | virtio_mem_sbm_set_mb_state(vm, mb_id, |
1881 | VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); | |
c627ff5d DH |
1882 | if (rc) |
1883 | return rc; | |
1884 | ||
905c4c51 | 1885 | if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { |
c627ff5d DH |
1886 | /* |
1887 | * Remove the block from Linux - this should never fail. | |
1888 | * Hinder the block from getting onlined by marking it | |
1889 | * unplugged. Temporarily drop the mutex, so | |
1890 | * any pending GOING_ONLINE requests can be serviced/rejected. | |
1891 | */ | |
99f0b55e DH |
1892 | virtio_mem_sbm_set_mb_state(vm, mb_id, |
1893 | VIRTIO_MEM_SBM_MB_UNUSED); | |
c627ff5d DH |
1894 | |
1895 | mutex_unlock(&vm->hotplug_mutex); | |
01afdee2 | 1896 | rc = virtio_mem_sbm_remove_mb(vm, mb_id); |
c627ff5d DH |
1897 | BUG_ON(rc); |
1898 | mutex_lock(&vm->hotplug_mutex); | |
1899 | } | |
1900 | return 0; | |
1901 | } | |
1902 | ||
72f9525a DH |
1903 | /* |
1904 | * Unplug the given plugged subblocks of an online memory block. | |
1905 | * | |
1906 | * Will modify the state of the memory block. | |
1907 | */ | |
602ef894 DH |
1908 | static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, |
1909 | unsigned long mb_id, int sb_id, | |
1910 | int count) | |
72f9525a | 1911 | { |
905c4c51 | 1912 | const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count; |
c740bb97 | 1913 | const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); |
72f9525a DH |
1914 | unsigned long start_pfn; |
1915 | int rc; | |
1916 | ||
1917 | start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + | |
905c4c51 | 1918 | sb_id * vm->sbm.sb_size); |
72f9525a | 1919 | |
89c486c4 DH |
1920 | rc = virtio_mem_fake_offline(start_pfn, nr_pages); |
1921 | if (rc) | |
1922 | return rc; | |
72f9525a DH |
1923 | |
1924 | /* Try to unplug the allocated memory */ | |
602ef894 | 1925 | rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); |
72f9525a DH |
1926 | if (rc) { |
1927 | /* Return the memory to the buddy. */ | |
1928 | virtio_mem_fake_online(start_pfn, nr_pages); | |
1929 | return rc; | |
1930 | } | |
1931 | ||
c740bb97 DH |
1932 | switch (old_state) { |
1933 | case VIRTIO_MEM_SBM_MB_KERNEL: | |
1934 | virtio_mem_sbm_set_mb_state(vm, mb_id, | |
1935 | VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL); | |
1936 | break; | |
1937 | case VIRTIO_MEM_SBM_MB_MOVABLE: | |
1938 | virtio_mem_sbm_set_mb_state(vm, mb_id, | |
1939 | VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL); | |
1940 | break; | |
1941 | } | |
1942 | ||
72f9525a DH |
1943 | return 0; |
1944 | } | |
1945 | ||
255f5985 DH |
1946 | /* |
1947 | * Unplug the desired number of plugged subblocks of an online memory block. | |
1948 | * Will skip subblock that are busy. | |
1949 | * | |
a5732387 DH |
1950 | * Will modify the state of the memory block. Might temporarily drop the |
1951 | * hotplug_mutex. | |
255f5985 DH |
1952 | * |
1953 | * Note: Can fail after some subblocks were successfully unplugged. Can | |
1954 | * return 0 even if subblocks were busy and could not get unplugged. | |
1955 | */ | |
602ef894 DH |
1956 | static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, |
1957 | unsigned long mb_id, | |
1958 | uint64_t *nb_sb) | |
255f5985 | 1959 | { |
255f5985 DH |
1960 | int rc, sb_id; |
1961 | ||
72f9525a | 1962 | /* If possible, try to unplug the complete block in one shot. */ |
905c4c51 DH |
1963 | if (*nb_sb >= vm->sbm.sbs_per_mb && |
1964 | virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { | |
602ef894 DH |
1965 | rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0, |
1966 | vm->sbm.sbs_per_mb); | |
72f9525a | 1967 | if (!rc) { |
905c4c51 | 1968 | *nb_sb -= vm->sbm.sbs_per_mb; |
72f9525a DH |
1969 | goto unplugged; |
1970 | } else if (rc != -EBUSY) | |
1971 | return rc; | |
1972 | } | |
1973 | ||
1974 | /* Fallback to single subblocks. */ | |
905c4c51 | 1975 | for (sb_id = vm->sbm.sbs_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { |
255f5985 | 1976 | /* Find the next candidate subblock */ |
562e08cd | 1977 | while (sb_id >= 0 && |
54c6a6ba | 1978 | !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) |
562e08cd DH |
1979 | sb_id--; |
1980 | if (sb_id < 0) | |
255f5985 DH |
1981 | break; |
1982 | ||
602ef894 | 1983 | rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1); |
72f9525a | 1984 | if (rc == -EBUSY) |
255f5985 | 1985 | continue; |
72f9525a | 1986 | else if (rc) |
255f5985 | 1987 | return rc; |
255f5985 DH |
1988 | *nb_sb -= 1; |
1989 | } | |
1990 | ||
72f9525a | 1991 | unplugged: |
255f5985 | 1992 | /* |
a5732387 DH |
1993 | * Once all subblocks of a memory block were unplugged, offline and |
1994 | * remove it. This will usually not fail, as no memory is in use | |
1995 | * anymore - however some other notifiers might NACK the request. | |
255f5985 | 1996 | */ |
905c4c51 | 1997 | if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { |
a5732387 | 1998 | mutex_unlock(&vm->hotplug_mutex); |
01afdee2 | 1999 | rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); |
a5732387 DH |
2000 | mutex_lock(&vm->hotplug_mutex); |
2001 | if (!rc) | |
99f0b55e DH |
2002 | virtio_mem_sbm_set_mb_state(vm, mb_id, |
2003 | VIRTIO_MEM_SBM_MB_UNUSED); | |
a5732387 DH |
2004 | } |
2005 | ||
255f5985 DH |
2006 | return 0; |
2007 | } | |
2008 | ||
5304ca3d DH |
2009 | /* |
2010 | * Unplug the desired number of plugged subblocks of a memory block that is | |
2011 | * already added to Linux. Will skip subblock of online memory blocks that are | |
2012 | * busy (by the OS). Will fail if any subblock that's not busy cannot get | |
2013 | * unplugged. | |
2014 | * | |
2015 | * Will modify the state of the memory block. Might temporarily drop the | |
2016 | * hotplug_mutex. | |
2017 | * | |
2018 | * Note: Can fail after some subblocks were successfully unplugged. Can | |
2019 | * return 0 even if subblocks were busy and could not get unplugged. | |
2020 | */ | |
2021 | static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm, | |
2022 | unsigned long mb_id, | |
2023 | uint64_t *nb_sb) | |
2024 | { | |
2025 | const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); | |
2026 | ||
2027 | switch (old_state) { | |
c740bb97 DH |
2028 | case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL: |
2029 | case VIRTIO_MEM_SBM_MB_KERNEL: | |
2030 | case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL: | |
2031 | case VIRTIO_MEM_SBM_MB_MOVABLE: | |
5304ca3d DH |
2032 | return virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, nb_sb); |
2033 | case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: | |
2034 | case VIRTIO_MEM_SBM_MB_OFFLINE: | |
2035 | return virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, nb_sb); | |
2036 | } | |
2037 | return -EINVAL; | |
2038 | } | |
2039 | ||
4ba50cd3 | 2040 | static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff) |
c627ff5d | 2041 | { |
5304ca3d DH |
2042 | const int mb_states[] = { |
2043 | VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, | |
2044 | VIRTIO_MEM_SBM_MB_OFFLINE, | |
c740bb97 DH |
2045 | VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, |
2046 | VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, | |
2047 | VIRTIO_MEM_SBM_MB_MOVABLE, | |
2048 | VIRTIO_MEM_SBM_MB_KERNEL, | |
5304ca3d | 2049 | }; |
905c4c51 | 2050 | uint64_t nb_sb = diff / vm->sbm.sb_size; |
c627ff5d | 2051 | unsigned long mb_id; |
5304ca3d | 2052 | int rc, i; |
c627ff5d DH |
2053 | |
2054 | if (!nb_sb) | |
2055 | return 0; | |
2056 | ||
2057 | /* | |
2058 | * We'll drop the mutex a couple of times when it is safe to do so. | |
2059 | * This might result in some blocks switching the state (online/offline) | |
2060 | * and we could miss them in this run - we will retry again later. | |
2061 | */ | |
2062 | mutex_lock(&vm->hotplug_mutex); | |
2063 | ||
5304ca3d DH |
2064 | /* |
2065 | * We try unplug from partially plugged blocks first, to try removing | |
c740bb97 DH |
2066 | * whole memory blocks along with metadata. We prioritize ZONE_MOVABLE |
2067 | * as it's more reliable to unplug memory and remove whole memory | |
2068 | * blocks, and we don't want to trigger a zone imbalances by | |
2069 | * accidentially removing too much kernel memory. | |
5304ca3d DH |
2070 | */ |
2071 | for (i = 0; i < ARRAY_SIZE(mb_states); i++) { | |
2072 | virtio_mem_sbm_for_each_mb_rev(vm, mb_id, mb_states[i]) { | |
2073 | rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb); | |
2074 | if (rc || !nb_sb) | |
2075 | goto out_unlock; | |
2076 | mutex_unlock(&vm->hotplug_mutex); | |
2077 | cond_resched(); | |
2078 | mutex_lock(&vm->hotplug_mutex); | |
2079 | } | |
2080 | if (!unplug_online && i == 1) { | |
2081 | mutex_unlock(&vm->hotplug_mutex); | |
2082 | return 0; | |
2083 | } | |
255f5985 DH |
2084 | } |
2085 | ||
c627ff5d | 2086 | mutex_unlock(&vm->hotplug_mutex); |
255f5985 | 2087 | return nb_sb ? -EBUSY : 0; |
c627ff5d DH |
2088 | out_unlock: |
2089 | mutex_unlock(&vm->hotplug_mutex); | |
2090 | return rc; | |
2091 | } | |
2092 | ||
269ac938 DH |
2093 | /* |
2094 | * Try to offline and remove a big block from Linux and unplug it. Will fail | |
2095 | * with -EBUSY if some memory is busy and cannot get unplugged. | |
2096 | * | |
2097 | * Will modify the state of the memory block. Might temporarily drop the | |
2098 | * hotplug_mutex. | |
2099 | */ | |
2100 | static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, | |
2101 | unsigned long bb_id) | |
2102 | { | |
3711387a DH |
2103 | const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); |
2104 | const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); | |
2105 | unsigned long end_pfn = start_pfn + nr_pages; | |
2106 | unsigned long pfn; | |
2107 | struct page *page; | |
269ac938 DH |
2108 | int rc; |
2109 | ||
2110 | if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != | |
2111 | VIRTIO_MEM_BBM_BB_ADDED)) | |
2112 | return -EINVAL; | |
2113 | ||
3711387a DH |
2114 | if (bbm_safe_unplug) { |
2115 | /* | |
2116 | * Start by fake-offlining all memory. Once we marked the device | |
2117 | * block as fake-offline, all newly onlined memory will | |
2118 | * automatically be kept fake-offline. Protect from concurrent | |
2119 | * onlining/offlining until we have a consistent state. | |
2120 | */ | |
2121 | mutex_lock(&vm->hotplug_mutex); | |
2122 | virtio_mem_bbm_set_bb_state(vm, bb_id, | |
2123 | VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); | |
2124 | ||
2125 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | |
2126 | page = pfn_to_online_page(pfn); | |
2127 | if (!page) | |
2128 | continue; | |
2129 | ||
2130 | rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); | |
2131 | if (rc) { | |
2132 | end_pfn = pfn; | |
2133 | goto rollback_safe_unplug; | |
2134 | } | |
2135 | } | |
2136 | mutex_unlock(&vm->hotplug_mutex); | |
2137 | } | |
2138 | ||
269ac938 | 2139 | rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); |
3711387a DH |
2140 | if (rc) { |
2141 | if (bbm_safe_unplug) { | |
2142 | mutex_lock(&vm->hotplug_mutex); | |
2143 | goto rollback_safe_unplug; | |
2144 | } | |
269ac938 | 2145 | return rc; |
3711387a | 2146 | } |
269ac938 DH |
2147 | |
2148 | rc = virtio_mem_bbm_unplug_bb(vm, bb_id); | |
2149 | if (rc) | |
2150 | virtio_mem_bbm_set_bb_state(vm, bb_id, | |
2151 | VIRTIO_MEM_BBM_BB_PLUGGED); | |
2152 | else | |
2153 | virtio_mem_bbm_set_bb_state(vm, bb_id, | |
2154 | VIRTIO_MEM_BBM_BB_UNUSED); | |
2155 | return rc; | |
3711387a DH |
2156 | |
2157 | rollback_safe_unplug: | |
2158 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | |
2159 | page = pfn_to_online_page(pfn); | |
2160 | if (!page) | |
2161 | continue; | |
2162 | virtio_mem_fake_online(pfn, PAGES_PER_SECTION); | |
2163 | } | |
2164 | virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); | |
2165 | mutex_unlock(&vm->hotplug_mutex); | |
2166 | return rc; | |
269ac938 DH |
2167 | } |
2168 | ||
2169 | /* | |
269ac938 | 2170 | * Test if a big block is completely offline. |
269ac938 | 2171 | */ |
269ac938 DH |
2172 | static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm, |
2173 | unsigned long bb_id) | |
269ac938 | 2174 | { |
269ac938 DH |
2175 | const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); |
2176 | const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); | |
2177 | unsigned long pfn; | |
269ac938 | 2178 | |
269ac938 DH |
2179 | for (pfn = start_pfn; pfn < start_pfn + nr_pages; |
2180 | pfn += PAGES_PER_SECTION) { | |
2181 | if (pfn_to_online_page(pfn)) | |
2182 | return false; | |
2183 | } | |
269ac938 | 2184 | |
269ac938 | 2185 | return true; |
269ac938 DH |
2186 | } |
2187 | ||
2188 | /* | |
db7b3377 | 2189 | * Test if a big block is completely onlined to ZONE_MOVABLE (or offline). |
269ac938 | 2190 | */ |
db7b3377 | 2191 | static bool virtio_mem_bbm_bb_is_movable(struct virtio_mem *vm, |
269ac938 DH |
2192 | unsigned long bb_id) |
2193 | { | |
2194 | const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); | |
2195 | const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); | |
db7b3377 | 2196 | struct page *page; |
269ac938 DH |
2197 | unsigned long pfn; |
2198 | ||
2199 | for (pfn = start_pfn; pfn < start_pfn + nr_pages; | |
2200 | pfn += PAGES_PER_SECTION) { | |
db7b3377 DH |
2201 | page = pfn_to_online_page(pfn); |
2202 | if (!page) | |
2203 | continue; | |
2204 | if (page_zonenum(page) != ZONE_MOVABLE) | |
269ac938 DH |
2205 | return false; |
2206 | } | |
2207 | ||
2208 | return true; | |
2209 | } | |
2210 | ||
2211 | static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff) | |
2212 | { | |
2213 | uint64_t nb_bb = diff / vm->bbm.bb_size; | |
2214 | uint64_t bb_id; | |
c6bc1422 | 2215 | int rc, i; |
269ac938 DH |
2216 | |
2217 | if (!nb_bb) | |
2218 | return 0; | |
2219 | ||
c6bc1422 DH |
2220 | /* |
2221 | * Try to unplug big blocks. Similar to SBM, start with offline | |
2222 | * big blocks. | |
2223 | */ | |
db7b3377 | 2224 | for (i = 0; i < 3; i++) { |
c6bc1422 DH |
2225 | virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { |
2226 | cond_resched(); | |
269ac938 | 2227 | |
c6bc1422 DH |
2228 | /* |
2229 | * As we're holding no locks, these checks are racy, | |
2230 | * but we don't care. | |
2231 | */ | |
2232 | if (i == 0 && !virtio_mem_bbm_bb_is_offline(vm, bb_id)) | |
2233 | continue; | |
db7b3377 DH |
2234 | if (i == 1 && !virtio_mem_bbm_bb_is_movable(vm, bb_id)) |
2235 | continue; | |
c6bc1422 DH |
2236 | rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id); |
2237 | if (rc == -EBUSY) | |
2238 | continue; | |
2239 | if (!rc) | |
2240 | nb_bb--; | |
2241 | if (rc || !nb_bb) | |
2242 | return rc; | |
2243 | } | |
2244 | if (i == 0 && !unplug_online) | |
2245 | return 0; | |
269ac938 DH |
2246 | } |
2247 | ||
2248 | return nb_bb ? -EBUSY : 0; | |
2249 | } | |
2250 | ||
4ba50cd3 DH |
2251 | /* |
2252 | * Try to unplug the requested amount of memory. | |
2253 | */ | |
2254 | static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) | |
2255 | { | |
2256 | if (vm->in_sbm) | |
2257 | return virtio_mem_sbm_unplug_request(vm, diff); | |
269ac938 | 2258 | return virtio_mem_bbm_unplug_request(vm, diff); |
4ba50cd3 DH |
2259 | } |
2260 | ||
5f1f79bb DH |
2261 | /* |
2262 | * Try to unplug all blocks that couldn't be unplugged before, for example, | |
2263 | * because the hypervisor was busy. | |
2264 | */ | |
2265 | static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) | |
2266 | { | |
4ba50cd3 | 2267 | unsigned long id; |
5f1f79bb DH |
2268 | int rc; |
2269 | ||
4ba50cd3 DH |
2270 | if (!vm->in_sbm) { |
2271 | virtio_mem_bbm_for_each_bb(vm, id, | |
2272 | VIRTIO_MEM_BBM_BB_PLUGGED) { | |
2273 | rc = virtio_mem_bbm_unplug_bb(vm, id); | |
2274 | if (rc) | |
2275 | return rc; | |
2276 | virtio_mem_bbm_set_bb_state(vm, id, | |
2277 | VIRTIO_MEM_BBM_BB_UNUSED); | |
2278 | } | |
2279 | return 0; | |
2280 | } | |
2281 | ||
2282 | virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_PLUGGED) { | |
2283 | rc = virtio_mem_sbm_unplug_mb(vm, id); | |
5f1f79bb DH |
2284 | if (rc) |
2285 | return rc; | |
4ba50cd3 | 2286 | virtio_mem_sbm_set_mb_state(vm, id, |
99f0b55e | 2287 | VIRTIO_MEM_SBM_MB_UNUSED); |
5f1f79bb DH |
2288 | } |
2289 | ||
2290 | return 0; | |
2291 | } | |
2292 | ||
2293 | /* | |
2294 | * Update all parts of the config that could have changed. | |
2295 | */ | |
2296 | static void virtio_mem_refresh_config(struct virtio_mem *vm) | |
2297 | { | |
94c89453 | 2298 | const struct range pluggable_range = mhp_get_pluggable_range(true); |
5f1f79bb DH |
2299 | uint64_t new_plugged_size, usable_region_size, end_addr; |
2300 | ||
2301 | /* the plugged_size is just a reflection of what _we_ did previously */ | |
99e0d048 MT |
2302 | virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, |
2303 | &new_plugged_size); | |
5f1f79bb DH |
2304 | if (WARN_ON_ONCE(new_plugged_size != vm->plugged_size)) |
2305 | vm->plugged_size = new_plugged_size; | |
2306 | ||
2307 | /* calculate the last usable memory block id */ | |
99e0d048 MT |
2308 | virtio_cread_le(vm->vdev, struct virtio_mem_config, |
2309 | usable_region_size, &usable_region_size); | |
94c89453 DH |
2310 | end_addr = min(vm->addr + usable_region_size - 1, |
2311 | pluggable_range.end); | |
4ba50cd3 | 2312 | |
94c89453 DH |
2313 | if (vm->in_sbm) { |
2314 | vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr); | |
2315 | if (!IS_ALIGNED(end_addr + 1, memory_block_size_bytes())) | |
2316 | vm->sbm.last_usable_mb_id--; | |
2317 | } else { | |
2318 | vm->bbm.last_usable_bb_id = virtio_mem_phys_to_bb_id(vm, | |
2319 | end_addr); | |
2320 | if (!IS_ALIGNED(end_addr + 1, vm->bbm.bb_size)) | |
2321 | vm->bbm.last_usable_bb_id--; | |
2322 | } | |
2323 | /* | |
2324 | * If we cannot plug any of our device memory (e.g., nothing in the | |
2325 | * usable region is addressable), the last usable memory block id will | |
2326 | * be smaller than the first usable memory block id. We'll stop | |
2327 | * attempting to add memory with -ENOSPC from our main loop. | |
2328 | */ | |
5f1f79bb DH |
2329 | |
2330 | /* see if there is a request to change the size */ | |
99e0d048 MT |
2331 | virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size, |
2332 | &vm->requested_size); | |
5f1f79bb DH |
2333 | |
2334 | dev_info(&vm->vdev->dev, "plugged size: 0x%llx", vm->plugged_size); | |
2335 | dev_info(&vm->vdev->dev, "requested size: 0x%llx", vm->requested_size); | |
2336 | } | |
2337 | ||
2338 | /* | |
2339 | * Workqueue function for handling plug/unplug requests and config updates. | |
2340 | */ | |
2341 | static void virtio_mem_run_wq(struct work_struct *work) | |
2342 | { | |
2343 | struct virtio_mem *vm = container_of(work, struct virtio_mem, wq); | |
2344 | uint64_t diff; | |
2345 | int rc; | |
2346 | ||
ce281462 DH |
2347 | if (unlikely(vm->in_kdump)) { |
2348 | dev_warn_once(&vm->vdev->dev, | |
2349 | "unexpected workqueue run in kdump kernel\n"); | |
2350 | return; | |
2351 | } | |
2352 | ||
5f1f79bb DH |
2353 | hrtimer_cancel(&vm->retry_timer); |
2354 | ||
2355 | if (vm->broken) | |
2356 | return; | |
2357 | ||
98ff9f94 | 2358 | atomic_set(&vm->wq_active, 1); |
5f1f79bb DH |
2359 | retry: |
2360 | rc = 0; | |
2361 | ||
2362 | /* Make sure we start with a clean state if there are leftovers. */ | |
2363 | if (unlikely(vm->unplug_all_required)) | |
2364 | rc = virtio_mem_send_unplug_all_request(vm); | |
2365 | ||
2366 | if (atomic_read(&vm->config_changed)) { | |
2367 | atomic_set(&vm->config_changed, 0); | |
2368 | virtio_mem_refresh_config(vm); | |
2369 | } | |
2370 | ||
2371 | /* Unplug any leftovers from previous runs */ | |
2372 | if (!rc) | |
2373 | rc = virtio_mem_unplug_pending_mb(vm); | |
2374 | ||
2375 | if (!rc && vm->requested_size != vm->plugged_size) { | |
2376 | if (vm->requested_size > vm->plugged_size) { | |
2377 | diff = vm->requested_size - vm->plugged_size; | |
2378 | rc = virtio_mem_plug_request(vm, diff); | |
c627ff5d DH |
2379 | } else { |
2380 | diff = vm->plugged_size - vm->requested_size; | |
2381 | rc = virtio_mem_unplug_request(vm, diff); | |
5f1f79bb | 2382 | } |
5f1f79bb DH |
2383 | } |
2384 | ||
2385 | switch (rc) { | |
2386 | case 0: | |
23e77b5d | 2387 | vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; |
5f1f79bb DH |
2388 | break; |
2389 | case -ENOSPC: | |
2390 | /* | |
2391 | * We cannot add any more memory (alignment, physical limit) | |
2392 | * or we have too many offline memory blocks. | |
2393 | */ | |
2394 | break; | |
8d4edcfe | 2395 | case -ETXTBSY: |
5f1f79bb DH |
2396 | /* |
2397 | * The hypervisor cannot process our request right now | |
8d4edcfe DH |
2398 | * (e.g., out of memory, migrating); |
2399 | */ | |
2400 | case -EBUSY: | |
2401 | /* | |
2402 | * We cannot free up any memory to unplug it (all plugged memory | |
2403 | * is busy). | |
5f1f79bb DH |
2404 | */ |
2405 | case -ENOMEM: | |
2406 | /* Out of memory, try again later. */ | |
23e77b5d | 2407 | hrtimer_start(&vm->retry_timer, ms_to_ktime(vm->retry_timer_ms), |
5f1f79bb DH |
2408 | HRTIMER_MODE_REL); |
2409 | break; | |
2410 | case -EAGAIN: | |
2411 | /* Retry immediately (e.g., the config changed). */ | |
2412 | goto retry; | |
2413 | default: | |
2414 | /* Unknown error, mark as broken */ | |
2415 | dev_err(&vm->vdev->dev, | |
2416 | "unknown error, marking device broken: %d\n", rc); | |
2417 | vm->broken = true; | |
2418 | } | |
98ff9f94 DH |
2419 | |
2420 | atomic_set(&vm->wq_active, 0); | |
5f1f79bb DH |
2421 | } |
2422 | ||
2423 | static enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer) | |
2424 | { | |
2425 | struct virtio_mem *vm = container_of(timer, struct virtio_mem, | |
2426 | retry_timer); | |
2427 | ||
2428 | virtio_mem_retry(vm); | |
23e77b5d DH |
2429 | vm->retry_timer_ms = min_t(unsigned int, vm->retry_timer_ms * 2, |
2430 | VIRTIO_MEM_RETRY_TIMER_MAX_MS); | |
5f1f79bb DH |
2431 | return HRTIMER_NORESTART; |
2432 | } | |
2433 | ||
2434 | static void virtio_mem_handle_response(struct virtqueue *vq) | |
2435 | { | |
2436 | struct virtio_mem *vm = vq->vdev->priv; | |
2437 | ||
2438 | wake_up(&vm->host_resp); | |
2439 | } | |
2440 | ||
2441 | static int virtio_mem_init_vq(struct virtio_mem *vm) | |
2442 | { | |
2443 | struct virtqueue *vq; | |
2444 | ||
2445 | vq = virtio_find_single_vq(vm->vdev, virtio_mem_handle_response, | |
2446 | "guest-request"); | |
2447 | if (IS_ERR(vq)) | |
2448 | return PTR_ERR(vq); | |
2449 | vm->vq = vq; | |
2450 | ||
2451 | return 0; | |
2452 | } | |
2453 | ||
94300fcf | 2454 | static int virtio_mem_init_hotplug(struct virtio_mem *vm) |
5f1f79bb | 2455 | { |
94c89453 | 2456 | const struct range pluggable_range = mhp_get_pluggable_range(true); |
84e17e68 DH |
2457 | uint64_t unit_pages, sb_size, addr; |
2458 | int rc; | |
6725f211 | 2459 | |
5f1f79bb DH |
2460 | /* bad device setup - warn only */ |
2461 | if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) | |
2462 | dev_warn(&vm->vdev->dev, | |
2463 | "The alignment of the physical start address can make some memory unusable.\n"); | |
2464 | if (!IS_ALIGNED(vm->addr + vm->region_size, memory_block_size_bytes())) | |
2465 | dev_warn(&vm->vdev->dev, | |
2466 | "The alignment of the physical end address can make some memory unusable.\n"); | |
94c89453 DH |
2467 | if (vm->addr < pluggable_range.start || |
2468 | vm->addr + vm->region_size - 1 > pluggable_range.end) | |
5f1f79bb | 2469 | dev_warn(&vm->vdev->dev, |
94c89453 | 2470 | "Some device memory is not addressable/pluggable. This can make some memory unusable.\n"); |
5f1f79bb | 2471 | |
500817bf DH |
2472 | /* Prepare the offline threshold - make sure we can add two blocks. */ |
2473 | vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), | |
2474 | VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD); | |
2475 | ||
5f1f79bb | 2476 | /* |
448b8ec3 ZY |
2477 | * alloc_contig_range() works reliably with pageblock |
2478 | * granularity on ZONE_NORMAL, use pageblock_nr_pages. | |
5f1f79bb | 2479 | */ |
448b8ec3 | 2480 | sb_size = PAGE_SIZE * pageblock_nr_pages; |
4ba50cd3 DH |
2481 | sb_size = max_t(uint64_t, vm->device_block_size, sb_size); |
2482 | ||
faa45ff4 | 2483 | if (sb_size < memory_block_size_bytes() && !force_bbm) { |
4ba50cd3 DH |
2484 | /* SBM: At least two subblocks per Linux memory block. */ |
2485 | vm->in_sbm = true; | |
2486 | vm->sbm.sb_size = sb_size; | |
2487 | vm->sbm.sbs_per_mb = memory_block_size_bytes() / | |
2488 | vm->sbm.sb_size; | |
2489 | ||
2490 | /* Round up to the next full memory block */ | |
94c89453 DH |
2491 | addr = max_t(uint64_t, vm->addr, pluggable_range.start) + |
2492 | memory_block_size_bytes() - 1; | |
4ba50cd3 DH |
2493 | vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr); |
2494 | vm->sbm.next_mb_id = vm->sbm.first_mb_id; | |
2495 | } else { | |
2496 | /* BBM: At least one Linux memory block. */ | |
faa45ff4 DH |
2497 | vm->bbm.bb_size = max_t(uint64_t, vm->device_block_size, |
2498 | memory_block_size_bytes()); | |
2499 | ||
2500 | if (bbm_block_size) { | |
2501 | if (!is_power_of_2(bbm_block_size)) { | |
2502 | dev_warn(&vm->vdev->dev, | |
2503 | "bbm_block_size is not a power of 2"); | |
2504 | } else if (bbm_block_size < vm->bbm.bb_size) { | |
2505 | dev_warn(&vm->vdev->dev, | |
2506 | "bbm_block_size is too small"); | |
2507 | } else { | |
2508 | vm->bbm.bb_size = bbm_block_size; | |
2509 | } | |
2510 | } | |
5f1f79bb | 2511 | |
faa45ff4 | 2512 | /* Round up to the next aligned big block */ |
94c89453 DH |
2513 | addr = max_t(uint64_t, vm->addr, pluggable_range.start) + |
2514 | vm->bbm.bb_size - 1; | |
faa45ff4 | 2515 | vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr); |
4ba50cd3 | 2516 | vm->bbm.next_bb_id = vm->bbm.first_bb_id; |
5f1f79bb | 2517 | |
500817bf DH |
2518 | /* Make sure we can add two big blocks. */ |
2519 | vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size, | |
2520 | vm->offline_threshold); | |
2521 | } | |
98ff9f94 | 2522 | |
5f1f79bb DH |
2523 | dev_info(&vm->vdev->dev, "memory block size: 0x%lx", |
2524 | memory_block_size_bytes()); | |
4ba50cd3 DH |
2525 | if (vm->in_sbm) |
2526 | dev_info(&vm->vdev->dev, "subblock size: 0x%llx", | |
2527 | (unsigned long long)vm->sbm.sb_size); | |
2528 | else | |
2529 | dev_info(&vm->vdev->dev, "big block size: 0x%llx", | |
2530 | (unsigned long long)vm->bbm.bb_size); | |
94300fcf | 2531 | |
84e17e68 DH |
2532 | /* create the parent resource for all memory */ |
2533 | rc = virtio_mem_create_resource(vm); | |
2534 | if (rc) | |
2535 | return rc; | |
2536 | ||
2537 | /* use a single dynamic memory group to cover the whole memory device */ | |
2538 | if (vm->in_sbm) | |
2539 | unit_pages = PHYS_PFN(memory_block_size_bytes()); | |
2540 | else | |
2541 | unit_pages = PHYS_PFN(vm->bbm.bb_size); | |
2542 | rc = memory_group_register_dynamic(vm->nid, unit_pages); | |
2543 | if (rc < 0) | |
2544 | goto out_del_resource; | |
2545 | vm->mgid = rc; | |
2546 | ||
2547 | /* | |
2548 | * If we still have memory plugged, we have to unplug all memory first. | |
2549 | * Registering our parent resource makes sure that this memory isn't | |
2550 | * actually in use (e.g., trying to reload the driver). | |
2551 | */ | |
2552 | if (vm->plugged_size) { | |
2553 | vm->unplug_all_required = true; | |
2554 | dev_info(&vm->vdev->dev, "unplugging all memory is required\n"); | |
2555 | } | |
2556 | ||
2557 | /* register callbacks */ | |
2558 | vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb; | |
2559 | rc = register_memory_notifier(&vm->memory_notifier); | |
2560 | if (rc) | |
2561 | goto out_unreg_group; | |
2562 | rc = register_virtio_mem_device(vm); | |
2563 | if (rc) | |
2564 | goto out_unreg_mem; | |
2565 | ||
94300fcf | 2566 | return 0; |
84e17e68 DH |
2567 | out_unreg_mem: |
2568 | unregister_memory_notifier(&vm->memory_notifier); | |
2569 | out_unreg_group: | |
2570 | memory_group_unregister(vm->mgid); | |
2571 | out_del_resource: | |
2572 | virtio_mem_delete_resource(vm); | |
2573 | return rc; | |
94300fcf DH |
2574 | } |
2575 | ||
ce281462 DH |
2576 | #ifdef CONFIG_PROC_VMCORE |
2577 | static int virtio_mem_send_state_request(struct virtio_mem *vm, uint64_t addr, | |
2578 | uint64_t size) | |
2579 | { | |
2580 | const uint64_t nb_vm_blocks = size / vm->device_block_size; | |
2581 | const struct virtio_mem_req req = { | |
2582 | .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_STATE), | |
2583 | .u.state.addr = cpu_to_virtio64(vm->vdev, addr), | |
2584 | .u.state.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), | |
2585 | }; | |
2586 | int rc = -ENOMEM; | |
2587 | ||
2588 | dev_dbg(&vm->vdev->dev, "requesting state: 0x%llx - 0x%llx\n", addr, | |
2589 | addr + size - 1); | |
2590 | ||
2591 | switch (virtio_mem_send_request(vm, &req)) { | |
2592 | case VIRTIO_MEM_RESP_ACK: | |
2593 | return virtio16_to_cpu(vm->vdev, vm->resp.u.state.state); | |
2594 | case VIRTIO_MEM_RESP_ERROR: | |
2595 | rc = -EINVAL; | |
2596 | break; | |
2597 | default: | |
2598 | break; | |
2599 | } | |
2600 | ||
2601 | dev_dbg(&vm->vdev->dev, "requesting state failed: %d\n", rc); | |
2602 | return rc; | |
2603 | } | |
2604 | ||
2605 | static bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb, | |
2606 | unsigned long pfn) | |
2607 | { | |
2608 | struct virtio_mem *vm = container_of(cb, struct virtio_mem, | |
2609 | vmcore_cb); | |
2610 | uint64_t addr = PFN_PHYS(pfn); | |
2611 | bool is_ram; | |
2612 | int rc; | |
2613 | ||
2614 | if (!virtio_mem_contains_range(vm, addr, PAGE_SIZE)) | |
2615 | return true; | |
2616 | if (!vm->plugged_size) | |
2617 | return false; | |
2618 | ||
2619 | /* | |
2620 | * We have to serialize device requests and access to the information | |
2621 | * about the block queried last. | |
2622 | */ | |
2623 | mutex_lock(&vm->hotplug_mutex); | |
2624 | ||
2625 | addr = ALIGN_DOWN(addr, vm->device_block_size); | |
2626 | if (addr != vm->last_block_addr) { | |
2627 | rc = virtio_mem_send_state_request(vm, addr, | |
2628 | vm->device_block_size); | |
2629 | /* On any kind of error, we're going to signal !ram. */ | |
2630 | if (rc == VIRTIO_MEM_STATE_PLUGGED) | |
2631 | vm->last_block_plugged = true; | |
2632 | else | |
2633 | vm->last_block_plugged = false; | |
2634 | vm->last_block_addr = addr; | |
2635 | } | |
2636 | ||
2637 | is_ram = vm->last_block_plugged; | |
2638 | mutex_unlock(&vm->hotplug_mutex); | |
2639 | return is_ram; | |
2640 | } | |
2641 | #endif /* CONFIG_PROC_VMCORE */ | |
2642 | ||
2643 | static int virtio_mem_init_kdump(struct virtio_mem *vm) | |
2644 | { | |
2645 | #ifdef CONFIG_PROC_VMCORE | |
2646 | dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n"); | |
2647 | vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram; | |
2648 | register_vmcore_cb(&vm->vmcore_cb); | |
2649 | return 0; | |
2650 | #else /* CONFIG_PROC_VMCORE */ | |
2651 | dev_warn(&vm->vdev->dev, "disabled in kdump kernel without vmcore\n"); | |
2652 | return -EBUSY; | |
2653 | #endif /* CONFIG_PROC_VMCORE */ | |
2654 | } | |
2655 | ||
94300fcf DH |
2656 | static int virtio_mem_init(struct virtio_mem *vm) |
2657 | { | |
2658 | uint16_t node_id; | |
2659 | ||
2660 | if (!vm->vdev->config->get) { | |
2661 | dev_err(&vm->vdev->dev, "config access disabled\n"); | |
2662 | return -EINVAL; | |
2663 | } | |
2664 | ||
94300fcf DH |
2665 | /* Fetch all properties that can't change. */ |
2666 | virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, | |
2667 | &vm->plugged_size); | |
2668 | virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, | |
2669 | &vm->device_block_size); | |
2670 | virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, | |
2671 | &node_id); | |
2672 | vm->nid = virtio_mem_translate_node_id(vm, node_id); | |
2673 | virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); | |
2674 | virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, | |
2675 | &vm->region_size); | |
2676 | ||
2677 | /* Determine the nid for the device based on the lowest address. */ | |
2678 | if (vm->nid == NUMA_NO_NODE) | |
2679 | vm->nid = memory_add_physaddr_to_nid(vm->addr); | |
2680 | ||
2681 | dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); | |
2682 | dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); | |
2683 | dev_info(&vm->vdev->dev, "device block size: 0x%llx", | |
2684 | (unsigned long long)vm->device_block_size); | |
6725f211 | 2685 | if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) |
f2af6d39 | 2686 | dev_info(&vm->vdev->dev, "nid: %d", vm->nid); |
5f1f79bb | 2687 | |
ce281462 DH |
2688 | /* |
2689 | * We don't want to (un)plug or reuse any memory when in kdump. The | |
2690 | * memory is still accessible (but not exposed to Linux). | |
2691 | */ | |
2692 | if (vm->in_kdump) | |
2693 | return virtio_mem_init_kdump(vm); | |
94300fcf | 2694 | return virtio_mem_init_hotplug(vm); |
5f1f79bb DH |
2695 | } |
2696 | ||
ebf71552 DH |
2697 | static int virtio_mem_create_resource(struct virtio_mem *vm) |
2698 | { | |
2699 | /* | |
2700 | * When force-unloading the driver and removing the device, we | |
2701 | * could have a garbage pointer. Duplicate the string. | |
2702 | */ | |
2703 | const char *name = kstrdup(dev_name(&vm->vdev->dev), GFP_KERNEL); | |
2704 | ||
2705 | if (!name) | |
2706 | return -ENOMEM; | |
2707 | ||
2128f4e2 | 2708 | /* Disallow mapping device memory via /dev/mem completely. */ |
ebf71552 | 2709 | vm->parent_resource = __request_mem_region(vm->addr, vm->region_size, |
2128f4e2 DH |
2710 | name, IORESOURCE_SYSTEM_RAM | |
2711 | IORESOURCE_EXCLUSIVE); | |
ebf71552 DH |
2712 | if (!vm->parent_resource) { |
2713 | kfree(name); | |
2714 | dev_warn(&vm->vdev->dev, "could not reserve device region\n"); | |
3c42e198 DH |
2715 | dev_info(&vm->vdev->dev, |
2716 | "reloading the driver is not supported\n"); | |
ebf71552 DH |
2717 | return -EBUSY; |
2718 | } | |
2719 | ||
2720 | /* The memory is not actually busy - make add_memory() work. */ | |
2721 | vm->parent_resource->flags &= ~IORESOURCE_BUSY; | |
2722 | return 0; | |
2723 | } | |
2724 | ||
2725 | static void virtio_mem_delete_resource(struct virtio_mem *vm) | |
2726 | { | |
2727 | const char *name; | |
2728 | ||
2729 | if (!vm->parent_resource) | |
2730 | return; | |
2731 | ||
2732 | name = vm->parent_resource->name; | |
2733 | release_resource(vm->parent_resource); | |
2734 | kfree(vm->parent_resource); | |
2735 | kfree(name); | |
2736 | vm->parent_resource = NULL; | |
2737 | } | |
2738 | ||
989ff825 DH |
2739 | static int virtio_mem_range_has_system_ram(struct resource *res, void *arg) |
2740 | { | |
2741 | return 1; | |
2742 | } | |
2743 | ||
2744 | static bool virtio_mem_has_memory_added(struct virtio_mem *vm) | |
2745 | { | |
2746 | const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; | |
2747 | ||
2748 | return walk_iomem_res_desc(IORES_DESC_NONE, flags, vm->addr, | |
2749 | vm->addr + vm->region_size, NULL, | |
2750 | virtio_mem_range_has_system_ram) == 1; | |
2751 | } | |
2752 | ||
5f1f79bb DH |
2753 | static int virtio_mem_probe(struct virtio_device *vdev) |
2754 | { | |
2755 | struct virtio_mem *vm; | |
b3fb6de7 | 2756 | int rc; |
5f1f79bb | 2757 | |
fce8afd7 DH |
2758 | BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24); |
2759 | BUILD_BUG_ON(sizeof(struct virtio_mem_resp) != 10); | |
2760 | ||
5f1f79bb DH |
2761 | vdev->priv = vm = kzalloc(sizeof(*vm), GFP_KERNEL); |
2762 | if (!vm) | |
2763 | return -ENOMEM; | |
2764 | ||
2765 | init_waitqueue_head(&vm->host_resp); | |
2766 | vm->vdev = vdev; | |
2767 | INIT_WORK(&vm->wq, virtio_mem_run_wq); | |
2768 | mutex_init(&vm->hotplug_mutex); | |
2769 | INIT_LIST_HEAD(&vm->next); | |
2770 | spin_lock_init(&vm->removal_lock); | |
2771 | hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
2772 | vm->retry_timer.function = virtio_mem_timer_expired; | |
23e77b5d | 2773 | vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; |
ce281462 | 2774 | vm->in_kdump = is_kdump_kernel(); |
5f1f79bb DH |
2775 | |
2776 | /* register the virtqueue */ | |
2777 | rc = virtio_mem_init_vq(vm); | |
2778 | if (rc) | |
2779 | goto out_free_vm; | |
2780 | ||
2781 | /* initialize the device by querying the config */ | |
2782 | rc = virtio_mem_init(vm); | |
2783 | if (rc) | |
2784 | goto out_del_vq; | |
2785 | ||
5f1f79bb DH |
2786 | virtio_device_ready(vdev); |
2787 | ||
2788 | /* trigger a config update to start processing the requested_size */ | |
ce281462 DH |
2789 | if (!vm->in_kdump) { |
2790 | atomic_set(&vm->config_changed, 1); | |
2791 | queue_work(system_freezable_wq, &vm->wq); | |
2792 | } | |
5f1f79bb DH |
2793 | |
2794 | return 0; | |
5f1f79bb DH |
2795 | out_del_vq: |
2796 | vdev->config->del_vqs(vdev); | |
2797 | out_free_vm: | |
2798 | kfree(vm); | |
2799 | vdev->priv = NULL; | |
2800 | ||
2801 | return rc; | |
2802 | } | |
2803 | ||
ffc763d0 | 2804 | static void virtio_mem_deinit_hotplug(struct virtio_mem *vm) |
5f1f79bb | 2805 | { |
5f1f79bb DH |
2806 | unsigned long mb_id; |
2807 | int rc; | |
2808 | ||
2809 | /* | |
2810 | * Make sure the workqueue won't be triggered anymore and no memory | |
2811 | * blocks can be onlined/offlined until we're finished here. | |
2812 | */ | |
2813 | mutex_lock(&vm->hotplug_mutex); | |
2814 | spin_lock_irq(&vm->removal_lock); | |
2815 | vm->removing = true; | |
2816 | spin_unlock_irq(&vm->removal_lock); | |
2817 | mutex_unlock(&vm->hotplug_mutex); | |
2818 | ||
2819 | /* wait until the workqueue stopped */ | |
2820 | cancel_work_sync(&vm->wq); | |
2821 | hrtimer_cancel(&vm->retry_timer); | |
2822 | ||
4ba50cd3 DH |
2823 | if (vm->in_sbm) { |
2824 | /* | |
2825 | * After we unregistered our callbacks, user space can online | |
2826 | * partially plugged offline blocks. Make sure to remove them. | |
2827 | */ | |
2828 | virtio_mem_sbm_for_each_mb(vm, mb_id, | |
2829 | VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { | |
2830 | rc = virtio_mem_sbm_remove_mb(vm, mb_id); | |
2831 | BUG_ON(rc); | |
2832 | virtio_mem_sbm_set_mb_state(vm, mb_id, | |
2833 | VIRTIO_MEM_SBM_MB_UNUSED); | |
2834 | } | |
2835 | /* | |
2836 | * After we unregistered our callbacks, user space can no longer | |
2837 | * offline partially plugged online memory blocks. No need to | |
2838 | * worry about them. | |
2839 | */ | |
5f1f79bb DH |
2840 | } |
2841 | ||
2842 | /* unregister callbacks */ | |
2843 | unregister_virtio_mem_device(vm); | |
2844 | unregister_memory_notifier(&vm->memory_notifier); | |
2845 | ||
2846 | /* | |
2847 | * There is no way we could reliably remove all memory we have added to | |
2848 | * the system. And there is no way to stop the driver/device from going | |
2849 | * away. Warn at least. | |
2850 | */ | |
989ff825 | 2851 | if (virtio_mem_has_memory_added(vm)) { |
ffc763d0 DH |
2852 | dev_warn(&vm->vdev->dev, |
2853 | "device still has system memory added\n"); | |
b3562c60 | 2854 | } else { |
ebf71552 | 2855 | virtio_mem_delete_resource(vm); |
b3562c60 | 2856 | kfree_const(vm->resource_name); |
ffaa6ce8 | 2857 | memory_group_unregister(vm->mgid); |
b3562c60 | 2858 | } |
5f1f79bb DH |
2859 | |
2860 | /* remove all tracking data - no locking needed */ | |
4ba50cd3 DH |
2861 | if (vm->in_sbm) { |
2862 | vfree(vm->sbm.mb_states); | |
2863 | vfree(vm->sbm.sb_states); | |
2864 | } else { | |
2865 | vfree(vm->bbm.bb_states); | |
2866 | } | |
ffc763d0 DH |
2867 | } |
2868 | ||
ce281462 DH |
2869 | static void virtio_mem_deinit_kdump(struct virtio_mem *vm) |
2870 | { | |
2871 | #ifdef CONFIG_PROC_VMCORE | |
2872 | unregister_vmcore_cb(&vm->vmcore_cb); | |
2873 | #endif /* CONFIG_PROC_VMCORE */ | |
2874 | } | |
2875 | ||
ffc763d0 DH |
2876 | static void virtio_mem_remove(struct virtio_device *vdev) |
2877 | { | |
2878 | struct virtio_mem *vm = vdev->priv; | |
2879 | ||
ce281462 DH |
2880 | if (vm->in_kdump) |
2881 | virtio_mem_deinit_kdump(vm); | |
2882 | else | |
2883 | virtio_mem_deinit_hotplug(vm); | |
5f1f79bb DH |
2884 | |
2885 | /* reset the device and cleanup the queues */ | |
d9679d00 | 2886 | virtio_reset_device(vdev); |
5f1f79bb DH |
2887 | vdev->config->del_vqs(vdev); |
2888 | ||
2889 | kfree(vm); | |
2890 | vdev->priv = NULL; | |
2891 | } | |
2892 | ||
2893 | static void virtio_mem_config_changed(struct virtio_device *vdev) | |
2894 | { | |
2895 | struct virtio_mem *vm = vdev->priv; | |
2896 | ||
ce281462 DH |
2897 | if (unlikely(vm->in_kdump)) |
2898 | return; | |
2899 | ||
5f1f79bb DH |
2900 | atomic_set(&vm->config_changed, 1); |
2901 | virtio_mem_retry(vm); | |
2902 | } | |
2903 | ||
2904 | #ifdef CONFIG_PM_SLEEP | |
2905 | static int virtio_mem_freeze(struct virtio_device *vdev) | |
2906 | { | |
2907 | /* | |
2908 | * When restarting the VM, all memory is usually unplugged. Don't | |
2909 | * allow to suspend/hibernate. | |
2910 | */ | |
2911 | dev_err(&vdev->dev, "save/restore not supported.\n"); | |
2912 | return -EPERM; | |
2913 | } | |
2914 | ||
2915 | static int virtio_mem_restore(struct virtio_device *vdev) | |
2916 | { | |
2917 | return -EPERM; | |
2918 | } | |
2919 | #endif | |
2920 | ||
f2af6d39 DH |
2921 | static unsigned int virtio_mem_features[] = { |
2922 | #if defined(CONFIG_NUMA) && defined(CONFIG_ACPI_NUMA) | |
2923 | VIRTIO_MEM_F_ACPI_PXM, | |
2924 | #endif | |
61082ad6 | 2925 | VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, |
f2af6d39 DH |
2926 | }; |
2927 | ||
7ab4de60 | 2928 | static const struct virtio_device_id virtio_mem_id_table[] = { |
5f1f79bb DH |
2929 | { VIRTIO_ID_MEM, VIRTIO_DEV_ANY_ID }, |
2930 | { 0 }, | |
2931 | }; | |
2932 | ||
2933 | static struct virtio_driver virtio_mem_driver = { | |
f2af6d39 DH |
2934 | .feature_table = virtio_mem_features, |
2935 | .feature_table_size = ARRAY_SIZE(virtio_mem_features), | |
5f1f79bb DH |
2936 | .driver.name = KBUILD_MODNAME, |
2937 | .driver.owner = THIS_MODULE, | |
2938 | .id_table = virtio_mem_id_table, | |
2939 | .probe = virtio_mem_probe, | |
2940 | .remove = virtio_mem_remove, | |
2941 | .config_changed = virtio_mem_config_changed, | |
2942 | #ifdef CONFIG_PM_SLEEP | |
2943 | .freeze = virtio_mem_freeze, | |
2944 | .restore = virtio_mem_restore, | |
2945 | #endif | |
2946 | }; | |
2947 | ||
2948 | module_virtio_driver(virtio_mem_driver); | |
2949 | MODULE_DEVICE_TABLE(virtio, virtio_mem_id_table); | |
2950 | MODULE_AUTHOR("David Hildenbrand <david@redhat.com>"); | |
2951 | MODULE_DESCRIPTION("Virtio-mem driver"); | |
2952 | MODULE_LICENSE("GPL"); |