Commit | Line | Data |
---|---|---|
1e214a5c SL |
1 | /* |
2 | * Virtio balloon implementation, inspired by Dor Laor and Marcelo | |
6b35e407 RR |
3 | * Tosatti's implementations. |
4 | * | |
5 | * Copyright 2008 Rusty Russell IBM Corporation | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
1e214a5c | 21 | |
6b35e407 RR |
22 | #include <linux/virtio.h> |
23 | #include <linux/virtio_balloon.h> | |
24 | #include <linux/swap.h> | |
fad7b7b2 | 25 | #include <linux/workqueue.h> |
6659a0f0 | 26 | #include <linux/delay.h> |
5a0e3ad6 | 27 | #include <linux/slab.h> |
b5a2c4f1 | 28 | #include <linux/module.h> |
e2250429 | 29 | #include <linux/balloon_compaction.h> |
5a10b7db | 30 | #include <linux/oom.h> |
3d2a3774 | 31 | #include <linux/wait.h> |
5057dcd0 | 32 | #include <linux/mm.h> |
b1123ea6 | 33 | #include <linux/mount.h> |
50d34394 | 34 | #include <linux/magic.h> |
6b35e407 | 35 | |
3ccc9372 MT |
36 | /* |
37 | * Balloon device works in 4K page units. So each page is pointed to by | |
38 | * multiple balloon pages. All memory counters in this driver are in balloon | |
39 | * page units. | |
40 | */ | |
e2250429 RA |
41 | #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) |
42 | #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 | |
5a10b7db RM |
43 | #define OOM_VBALLOON_DEFAULT_PAGES 256 |
44 | #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 | |
45 | ||
46 | static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES; | |
47 | module_param(oom_pages, int, S_IRUSR | S_IWUSR); | |
48 | MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); | |
3ccc9372 | 49 | |
b1123ea6 MK |
50 | #ifdef CONFIG_BALLOON_COMPACTION |
51 | static struct vfsmount *balloon_mnt; | |
52 | #endif | |
53 | ||
25e65e4e | 54 | struct virtio_balloon { |
6b35e407 | 55 | struct virtio_device *vdev; |
9564e138 | 56 | struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; |
6b35e407 | 57 | |
fad7b7b2 | 58 | /* The balloon servicing is delegated to a freezable workqueue. */ |
fd0e21c3 PM |
59 | struct work_struct update_balloon_stats_work; |
60 | struct work_struct update_balloon_size_work; | |
6b35e407 | 61 | |
fad7b7b2 PM |
62 | /* Prevent updating balloon when it is being canceled. */ |
63 | spinlock_t stop_update_lock; | |
64 | bool stop_update; | |
6b35e407 RR |
65 | |
66 | /* Waiting for host to ack the pages we released. */ | |
9c378abc | 67 | wait_queue_head_t acked; |
6b35e407 | 68 | |
3ccc9372 | 69 | /* Number of balloon pages we've told the Host we're not using. */ |
6b35e407 | 70 | unsigned int num_pages; |
3ccc9372 | 71 | /* |
e2250429 RA |
72 | * The pages we've told the Host we're not using are enqueued |
73 | * at vb_dev_info->pages list. | |
3ccc9372 MT |
74 | * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE |
75 | * to num_pages above. | |
76 | */ | |
9d1ba805 | 77 | struct balloon_dev_info vb_dev_info; |
e2250429 RA |
78 | |
79 | /* Synchronize access/update to this struct virtio_balloon elements */ | |
80 | struct mutex balloon_lock; | |
6b35e407 RR |
81 | |
82 | /* The array of pfns we tell the Host about. */ | |
83 | unsigned int num_pfns; | |
87c9403b | 84 | __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; |
9564e138 AL |
85 | |
86 | /* Memory statistics */ | |
87 | struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; | |
5a10b7db RM |
88 | |
89 | /* To register callback in oom notifier call chain */ | |
90 | struct notifier_block nb; | |
6b35e407 RR |
91 | }; |
92 | ||
93 | static struct virtio_device_id id_table[] = { | |
94 | { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, | |
95 | { 0 }, | |
96 | }; | |
97 | ||
1b4aa2fa HB |
98 | static u32 page_to_balloon_pfn(struct page *page) |
99 | { | |
100 | unsigned long pfn = page_to_pfn(page); | |
101 | ||
102 | BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT); | |
103 | /* Convert pfn from Linux page size to balloon page size. */ | |
3ccc9372 MT |
104 | return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; |
105 | } | |
106 | ||
6b35e407 RR |
107 | static void balloon_ack(struct virtqueue *vq) |
108 | { | |
9c378abc | 109 | struct virtio_balloon *vb = vq->vdev->priv; |
6b35e407 | 110 | |
9c378abc | 111 | wake_up(&vb->acked); |
6b35e407 RR |
112 | } |
113 | ||
114 | static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) | |
115 | { | |
116 | struct scatterlist sg; | |
9c378abc | 117 | unsigned int len; |
6b35e407 RR |
118 | |
119 | sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); | |
120 | ||
6b35e407 | 121 | /* We should always be able to add one buffer to an empty queue. */ |
4951cc90 | 122 | virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); |
946cfe0e | 123 | virtqueue_kick(vq); |
6b35e407 RR |
124 | |
125 | /* When host has read buffer, this completes via balloon_ack */ | |
9c378abc | 126 | wait_event(vb->acked, virtqueue_get_buf(vq, &len)); |
fd0e21c3 | 127 | |
6b35e407 RR |
128 | } |
129 | ||
87c9403b MT |
130 | static void set_page_pfns(struct virtio_balloon *vb, |
131 | __virtio32 pfns[], struct page *page) | |
3ccc9372 MT |
132 | { |
133 | unsigned int i; | |
134 | ||
f9aada5f WW |
135 | /* |
136 | * Set balloon pfns pointing at this page. | |
137 | * Note that the first pfn points at start of the page. | |
138 | */ | |
3ccc9372 | 139 | for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++) |
87c9403b MT |
140 | pfns[i] = cpu_to_virtio32(vb->vdev, |
141 | page_to_balloon_pfn(page) + i); | |
3ccc9372 MT |
142 | } |
143 | ||
fad7b7b2 | 144 | static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) |
6b35e407 | 145 | { |
fad7b7b2 | 146 | unsigned num_allocated_pages; |
c7cdff0e MT |
147 | unsigned num_pfns; |
148 | struct page *page; | |
149 | LIST_HEAD(pages); | |
e2250429 | 150 | |
6b35e407 RR |
151 | /* We can only do one array worth at a time. */ |
152 | num = min(num, ARRAY_SIZE(vb->pfns)); | |
153 | ||
c7cdff0e MT |
154 | for (num_pfns = 0; num_pfns < num; |
155 | num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { | |
156 | struct page *page = balloon_page_alloc(); | |
e2250429 | 157 | |
6b35e407 | 158 | if (!page) { |
800ba5ea | 159 | dev_info_ratelimited(&vb->vdev->dev, |
b7dfde95 LT |
160 | "Out of puff! Can't get %u pages\n", |
161 | VIRTIO_BALLOON_PAGES_PER_PAGE); | |
6b35e407 RR |
162 | /* Sleep for at least 1/5 of a second before retry. */ |
163 | msleep(200); | |
164 | break; | |
165 | } | |
c7cdff0e MT |
166 | |
167 | balloon_page_push(&pages, page); | |
168 | } | |
169 | ||
170 | mutex_lock(&vb->balloon_lock); | |
171 | ||
172 | vb->num_pfns = 0; | |
173 | ||
174 | while ((page = balloon_page_pop(&pages))) { | |
175 | balloon_page_enqueue(&vb->vb_dev_info, page); | |
176 | ||
87c9403b | 177 | set_page_pfns(vb, vb->pfns + vb->num_pfns, page); |
3ccc9372 | 178 | vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; |
997e1208 DL |
179 | if (!virtio_has_feature(vb->vdev, |
180 | VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) | |
181 | adjust_managed_page_count(page, -1); | |
d9e427f6 | 182 | vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; |
6b35e407 RR |
183 | } |
184 | ||
fad7b7b2 | 185 | num_allocated_pages = vb->num_pfns; |
e2250429 RA |
186 | /* Did we get any? */ |
187 | if (vb->num_pfns != 0) | |
188 | tell_host(vb, vb->inflate_vq); | |
189 | mutex_unlock(&vb->balloon_lock); | |
fad7b7b2 PM |
190 | |
191 | return num_allocated_pages; | |
6b35e407 RR |
192 | } |
193 | ||
195a8c43 LL |
194 | static void release_pages_balloon(struct virtio_balloon *vb, |
195 | struct list_head *pages) | |
6b35e407 | 196 | { |
195a8c43 | 197 | struct page *page, *next; |
6b35e407 | 198 | |
195a8c43 | 199 | list_for_each_entry_safe(page, next, pages, lru) { |
997e1208 DL |
200 | if (!virtio_has_feature(vb->vdev, |
201 | VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) | |
202 | adjust_managed_page_count(page, 1); | |
195a8c43 | 203 | list_del(&page->lru); |
d6d86c0a | 204 | put_page(page); /* balloon reference */ |
6b35e407 RR |
205 | } |
206 | } | |
207 | ||
1fd9c672 | 208 | static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) |
6b35e407 | 209 | { |
1fd9c672 | 210 | unsigned num_freed_pages; |
6b35e407 | 211 | struct page *page; |
9d1ba805 | 212 | struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; |
195a8c43 | 213 | LIST_HEAD(pages); |
6b35e407 RR |
214 | |
215 | /* We can only do one array worth at a time. */ | |
216 | num = min(num, ARRAY_SIZE(vb->pfns)); | |
217 | ||
e2250429 | 218 | mutex_lock(&vb->balloon_lock); |
37cf99e0 KN |
219 | /* We can't release more pages than taken */ |
220 | num = min(num, (size_t)vb->num_pages); | |
3ccc9372 MT |
221 | for (vb->num_pfns = 0; vb->num_pfns < num; |
222 | vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { | |
e2250429 RA |
223 | page = balloon_page_dequeue(vb_dev_info); |
224 | if (!page) | |
225 | break; | |
87c9403b | 226 | set_page_pfns(vb, vb->pfns + vb->num_pfns, page); |
195a8c43 | 227 | list_add(&page->lru, &pages); |
3ccc9372 | 228 | vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; |
6b35e407 RR |
229 | } |
230 | ||
1fd9c672 | 231 | num_freed_pages = vb->num_pfns; |
bf50e69f DH |
232 | /* |
233 | * Note that if | |
234 | * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); | |
235 | * is true, we *have* to do it in this order | |
236 | */ | |
8c6bab4f LC |
237 | if (vb->num_pfns != 0) |
238 | tell_host(vb, vb->deflate_vq); | |
195a8c43 | 239 | release_pages_balloon(vb, &pages); |
f68b992b | 240 | mutex_unlock(&vb->balloon_lock); |
1fd9c672 | 241 | return num_freed_pages; |
6b35e407 RR |
242 | } |
243 | ||
9564e138 AL |
244 | static inline void update_stat(struct virtio_balloon *vb, int idx, |
245 | u16 tag, u64 val) | |
246 | { | |
247 | BUG_ON(idx >= VIRTIO_BALLOON_S_NR); | |
df81b29c MT |
248 | vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag); |
249 | vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val); | |
9564e138 AL |
250 | } |
251 | ||
252 | #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) | |
253 | ||
9646b26e | 254 | static unsigned int update_balloon_stats(struct virtio_balloon *vb) |
9564e138 AL |
255 | { |
256 | unsigned long events[NR_VM_EVENT_ITEMS]; | |
257 | struct sysinfo i; | |
9646b26e | 258 | unsigned int idx = 0; |
5057dcd0 | 259 | long available; |
4d32029b | 260 | unsigned long caches; |
9564e138 AL |
261 | |
262 | all_vm_events(events); | |
263 | si_meminfo(&i); | |
264 | ||
5057dcd0 | 265 | available = si_mem_available(); |
4d32029b | 266 | caches = global_node_page_state(NR_FILE_PAGES); |
5057dcd0 | 267 | |
f0bb2d50 | 268 | #ifdef CONFIG_VM_EVENT_COUNTERS |
9564e138 AL |
269 | update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, |
270 | pages_to_bytes(events[PSWPIN])); | |
271 | update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, | |
272 | pages_to_bytes(events[PSWPOUT])); | |
273 | update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); | |
274 | update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); | |
6c64fe7f JH |
275 | #ifdef CONFIG_HUGETLB_PAGE |
276 | update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, | |
277 | events[HTLB_BUDDY_PGALLOC]); | |
278 | update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL, | |
279 | events[HTLB_BUDDY_PGALLOC_FAIL]); | |
280 | #endif | |
f0bb2d50 | 281 | #endif |
9564e138 AL |
282 | update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, |
283 | pages_to_bytes(i.freeram)); | |
284 | update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, | |
285 | pages_to_bytes(i.totalram)); | |
5057dcd0 IR |
286 | update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL, |
287 | pages_to_bytes(available)); | |
4d32029b TG |
288 | update_stat(vb, idx++, VIRTIO_BALLOON_S_CACHES, |
289 | pages_to_bytes(caches)); | |
9646b26e LP |
290 | |
291 | return idx; | |
9564e138 AL |
292 | } |
293 | ||
294 | /* | |
295 | * While most virtqueues communicate guest-initiated requests to the hypervisor, | |
296 | * the stats queue operates in reverse. The driver initializes the virtqueue | |
297 | * with a single buffer. From that point forward, all conversations consist of | |
298 | * a hypervisor request (a call to this function) which directs us to refill | |
1f34c71a | 299 | * the virtqueue with a fresh stats buffer. Since stats collection can sleep, |
fad7b7b2 PM |
300 | * we delegate the job to a freezable workqueue that will do the actual work via |
301 | * stats_handle_request(). | |
9564e138 | 302 | */ |
1f34c71a | 303 | static void stats_request(struct virtqueue *vq) |
9564e138 | 304 | { |
9c378abc | 305 | struct virtio_balloon *vb = vq->vdev->priv; |
9564e138 | 306 | |
fad7b7b2 PM |
307 | spin_lock(&vb->stop_update_lock); |
308 | if (!vb->stop_update) | |
fd0e21c3 | 309 | queue_work(system_freezable_wq, &vb->update_balloon_stats_work); |
fad7b7b2 | 310 | spin_unlock(&vb->stop_update_lock); |
1f34c71a AL |
311 | } |
312 | ||
313 | static void stats_handle_request(struct virtio_balloon *vb) | |
314 | { | |
315 | struct virtqueue *vq; | |
316 | struct scatterlist sg; | |
9646b26e | 317 | unsigned int len, num_stats; |
9564e138 | 318 | |
9646b26e | 319 | num_stats = update_balloon_stats(vb); |
9564e138 | 320 | |
1f34c71a | 321 | vq = vb->stats_vq; |
9c378abc MT |
322 | if (!virtqueue_get_buf(vq, &len)) |
323 | return; | |
9646b26e | 324 | sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); |
4951cc90 | 325 | virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); |
946cfe0e | 326 | virtqueue_kick(vq); |
9564e138 AL |
327 | } |
328 | ||
6b35e407 RR |
329 | static void virtballoon_changed(struct virtio_device *vdev) |
330 | { | |
331 | struct virtio_balloon *vb = vdev->priv; | |
fad7b7b2 | 332 | unsigned long flags; |
6b35e407 | 333 | |
fad7b7b2 PM |
334 | spin_lock_irqsave(&vb->stop_update_lock, flags); |
335 | if (!vb->stop_update) | |
fd0e21c3 | 336 | queue_work(system_freezable_wq, &vb->update_balloon_size_work); |
fad7b7b2 | 337 | spin_unlock_irqrestore(&vb->stop_update_lock, flags); |
6b35e407 RR |
338 | } |
339 | ||
bdc1681c | 340 | static inline s64 towards_target(struct virtio_balloon *vb) |
6b35e407 | 341 | { |
1a87228f | 342 | s64 target; |
df81b29c | 343 | u32 num_pages; |
1a87228f | 344 | |
df81b29c MT |
345 | virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, |
346 | &num_pages); | |
855e0c52 | 347 | |
df81b29c MT |
348 | /* Legacy balloon config space is LE, unlike all other devices. */ |
349 | if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) | |
350 | num_pages = le32_to_cpu((__force __le32)num_pages); | |
351 | ||
352 | target = num_pages; | |
1a87228f | 353 | return target - vb->num_pages; |
6b35e407 RR |
354 | } |
355 | ||
356 | static void update_balloon_size(struct virtio_balloon *vb) | |
357 | { | |
df81b29c MT |
358 | u32 actual = vb->num_pages; |
359 | ||
360 | /* Legacy balloon config space is LE, unlike all other devices. */ | |
361 | if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) | |
362 | actual = (__force u32)cpu_to_le32(actual); | |
6b35e407 | 363 | |
3459f11a | 364 | virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, |
855e0c52 | 365 | &actual); |
6b35e407 RR |
366 | } |
367 | ||
5a10b7db RM |
368 | /* |
369 | * virtballoon_oom_notify - release pages when system is under severe | |
370 | * memory pressure (called from out_of_memory()) | |
371 | * @self : notifier block struct | |
372 | * @dummy: not used | |
373 | * @parm : returned - number of freed pages | |
374 | * | |
375 | * The balancing of memory by use of the virtio balloon should not cause | |
376 | * the termination of processes while there are pages in the balloon. | |
377 | * If virtio balloon manages to release some memory, it will make the | |
378 | * system return and retry the allocation that forced the OOM killer | |
379 | * to run. | |
380 | */ | |
381 | static int virtballoon_oom_notify(struct notifier_block *self, | |
382 | unsigned long dummy, void *parm) | |
383 | { | |
384 | struct virtio_balloon *vb; | |
385 | unsigned long *freed; | |
386 | unsigned num_freed_pages; | |
387 | ||
388 | vb = container_of(self, struct virtio_balloon, nb); | |
389 | if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) | |
390 | return NOTIFY_OK; | |
391 | ||
392 | freed = parm; | |
393 | num_freed_pages = leak_balloon(vb, oom_pages); | |
394 | update_balloon_size(vb); | |
395 | *freed += num_freed_pages; | |
396 | ||
397 | return NOTIFY_OK; | |
398 | } | |
399 | ||
fd0e21c3 | 400 | static void update_balloon_stats_func(struct work_struct *work) |
6b35e407 | 401 | { |
fd0e21c3 | 402 | struct virtio_balloon *vb; |
3d2a3774 | 403 | |
fd0e21c3 PM |
404 | vb = container_of(work, struct virtio_balloon, |
405 | update_balloon_stats_work); | |
406 | stats_handle_request(vb); | |
407 | } | |
1f74ef0f | 408 | |
fd0e21c3 | 409 | static void update_balloon_size_func(struct work_struct *work) |
6b35e407 | 410 | { |
fad7b7b2 PM |
411 | struct virtio_balloon *vb; |
412 | s64 diff; | |
3d2a3774 | 413 | |
fd0e21c3 PM |
414 | vb = container_of(work, struct virtio_balloon, |
415 | update_balloon_size_work); | |
fad7b7b2 | 416 | diff = towards_target(vb); |
1f74ef0f | 417 | |
fad7b7b2 PM |
418 | if (diff > 0) |
419 | diff -= fill_balloon(vb, diff); | |
420 | else if (diff < 0) | |
421 | diff += leak_balloon(vb, -diff); | |
422 | update_balloon_size(vb); | |
423 | ||
424 | if (diff) | |
425 | queue_work(system_freezable_wq, work); | |
6b35e407 RR |
426 | } |
427 | ||
be91c33d | 428 | static int init_vqs(struct virtio_balloon *vb) |
6b35e407 | 429 | { |
9564e138 | 430 | struct virtqueue *vqs[3]; |
1f34c71a | 431 | vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; |
f7ad26ff | 432 | static const char * const names[] = { "inflate", "deflate", "stats" }; |
9564e138 | 433 | int err, nvqs; |
6b35e407 | 434 | |
be91c33d AS |
435 | /* |
436 | * We expect two virtqueues: inflate and deflate, and | |
437 | * optionally stat. | |
438 | */ | |
9564e138 | 439 | nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; |
9b2bbdb2 | 440 | err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); |
d2a7ddda | 441 | if (err) |
be91c33d | 442 | return err; |
6b35e407 | 443 | |
d2a7ddda MT |
444 | vb->inflate_vq = vqs[0]; |
445 | vb->deflate_vq = vqs[1]; | |
9564e138 AL |
446 | if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { |
447 | struct scatterlist sg; | |
9646b26e | 448 | unsigned int num_stats; |
9564e138 AL |
449 | vb->stats_vq = vqs[2]; |
450 | ||
451 | /* | |
452 | * Prime this virtqueue with one buffer so the hypervisor can | |
4951cc90 | 453 | * use it to signal us later (it can't be broken yet!). |
9564e138 | 454 | */ |
9646b26e | 455 | num_stats = update_balloon_stats(vb); |
fc865322 | 456 | |
9646b26e | 457 | sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); |
74cf5b16 WW |
458 | err = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, |
459 | GFP_KERNEL); | |
460 | if (err) { | |
461 | dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", | |
462 | __func__); | |
463 | return err; | |
464 | } | |
946cfe0e | 465 | virtqueue_kick(vb->stats_vq); |
9564e138 | 466 | } |
be91c33d AS |
467 | return 0; |
468 | } | |
469 | ||
e2250429 RA |
470 | #ifdef CONFIG_BALLOON_COMPACTION |
471 | /* | |
472 | * virtballoon_migratepage - perform the balloon page migration on behalf of | |
473 | * a compation thread. (called under page lock) | |
9d1ba805 | 474 | * @vb_dev_info: the balloon device |
e2250429 RA |
475 | * @newpage: page that will replace the isolated page after migration finishes. |
476 | * @page : the isolated (old) page that is about to be migrated to newpage. | |
477 | * @mode : compaction mode -- not used for balloon page migration. | |
478 | * | |
479 | * After a ballooned page gets isolated by compaction procedures, this is the | |
480 | * function that performs the page migration on behalf of a compaction thread | |
481 | * The page migration for virtio balloon is done in a simple swap fashion which | |
482 | * follows these two macro steps: | |
483 | * 1) insert newpage into vb->pages list and update the host about it; | |
484 | * 2) update the host about the old page removed from vb->pages list; | |
485 | * | |
486 | * This function preforms the balloon page migration task. | |
487 | * Called through balloon_mapping->a_ops->migratepage | |
488 | */ | |
9d1ba805 | 489 | static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, |
e2250429 RA |
490 | struct page *newpage, struct page *page, enum migrate_mode mode) |
491 | { | |
9d1ba805 KK |
492 | struct virtio_balloon *vb = container_of(vb_dev_info, |
493 | struct virtio_balloon, vb_dev_info); | |
e2250429 RA |
494 | unsigned long flags; |
495 | ||
e2250429 RA |
496 | /* |
497 | * In order to avoid lock contention while migrating pages concurrently | |
498 | * to leak_balloon() or fill_balloon() we just give up the balloon_lock | |
499 | * this turn, as it is easier to retry the page migration later. | |
500 | * This also prevents fill_balloon() getting stuck into a mutex | |
501 | * recursion in the case it ends up triggering memory compaction | |
502 | * while it is attempting to inflate the ballon. | |
503 | */ | |
504 | if (!mutex_trylock(&vb->balloon_lock)) | |
505 | return -EAGAIN; | |
506 | ||
d6d86c0a KK |
507 | get_page(newpage); /* balloon reference */ |
508 | ||
e2250429 RA |
509 | /* balloon's page migration 1st step -- inflate "newpage" */ |
510 | spin_lock_irqsave(&vb_dev_info->pages_lock, flags); | |
9d1ba805 | 511 | balloon_page_insert(vb_dev_info, newpage); |
e2250429 | 512 | vb_dev_info->isolated_pages--; |
09316c09 | 513 | __count_vm_event(BALLOON_MIGRATE); |
e2250429 RA |
514 | spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); |
515 | vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; | |
87c9403b | 516 | set_page_pfns(vb, vb->pfns, newpage); |
e2250429 RA |
517 | tell_host(vb, vb->inflate_vq); |
518 | ||
d6d86c0a | 519 | /* balloon's page migration 2nd step -- deflate "page" */ |
89da619b | 520 | spin_lock_irqsave(&vb_dev_info->pages_lock, flags); |
e2250429 | 521 | balloon_page_delete(page); |
89da619b | 522 | spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); |
e2250429 | 523 | vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; |
87c9403b | 524 | set_page_pfns(vb, vb->pfns, page); |
e2250429 RA |
525 | tell_host(vb, vb->deflate_vq); |
526 | ||
527 | mutex_unlock(&vb->balloon_lock); | |
528 | ||
d6d86c0a KK |
529 | put_page(page); /* balloon reference */ |
530 | ||
dd4123f3 | 531 | return MIGRATEPAGE_SUCCESS; |
e2250429 | 532 | } |
b1123ea6 MK |
533 | |
534 | static struct dentry *balloon_mount(struct file_system_type *fs_type, | |
535 | int flags, const char *dev_name, void *data) | |
536 | { | |
537 | static const struct dentry_operations ops = { | |
538 | .d_dname = simple_dname, | |
539 | }; | |
540 | ||
541 | return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops, | |
542 | BALLOON_KVM_MAGIC); | |
543 | } | |
544 | ||
545 | static struct file_system_type balloon_fs = { | |
546 | .name = "balloon-kvm", | |
547 | .mount = balloon_mount, | |
548 | .kill_sb = kill_anon_super, | |
549 | }; | |
550 | ||
e2250429 RA |
551 | #endif /* CONFIG_BALLOON_COMPACTION */ |
552 | ||
be91c33d AS |
553 | static int virtballoon_probe(struct virtio_device *vdev) |
554 | { | |
555 | struct virtio_balloon *vb; | |
556 | int err; | |
557 | ||
2d9becc1 MT |
558 | if (!vdev->config->get) { |
559 | dev_err(&vdev->dev, "%s failure: config access disabled\n", | |
560 | __func__); | |
561 | return -EINVAL; | |
562 | } | |
563 | ||
c51d8fca | 564 | vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL); |
be91c33d AS |
565 | if (!vb) { |
566 | err = -ENOMEM; | |
567 | goto out; | |
568 | } | |
569 | ||
fd0e21c3 PM |
570 | INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func); |
571 | INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func); | |
fad7b7b2 | 572 | spin_lock_init(&vb->stop_update_lock); |
e2250429 | 573 | mutex_init(&vb->balloon_lock); |
9c378abc | 574 | init_waitqueue_head(&vb->acked); |
be91c33d | 575 | vb->vdev = vdev; |
be91c33d | 576 | |
9d1ba805 | 577 | balloon_devinfo_init(&vb->vb_dev_info); |
e2250429 | 578 | |
be91c33d AS |
579 | err = init_vqs(vb); |
580 | if (err) | |
9d1ba805 | 581 | goto out_free_vb; |
6b35e407 | 582 | |
5a10b7db RM |
583 | vb->nb.notifier_call = virtballoon_oom_notify; |
584 | vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; | |
585 | err = register_oom_notifier(&vb->nb); | |
586 | if (err < 0) | |
b1123ea6 MK |
587 | goto out_del_vqs; |
588 | ||
589 | #ifdef CONFIG_BALLOON_COMPACTION | |
590 | balloon_mnt = kern_mount(&balloon_fs); | |
591 | if (IS_ERR(balloon_mnt)) { | |
592 | err = PTR_ERR(balloon_mnt); | |
593 | unregister_oom_notifier(&vb->nb); | |
594 | goto out_del_vqs; | |
595 | } | |
596 | ||
597 | vb->vb_dev_info.migratepage = virtballoon_migratepage; | |
598 | vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); | |
599 | if (IS_ERR(vb->vb_dev_info.inode)) { | |
600 | err = PTR_ERR(vb->vb_dev_info.inode); | |
601 | kern_unmount(balloon_mnt); | |
602 | unregister_oom_notifier(&vb->nb); | |
b1123ea6 MK |
603 | goto out_del_vqs; |
604 | } | |
605 | vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; | |
606 | #endif | |
5a10b7db | 607 | |
88660f7f MT |
608 | virtio_device_ready(vdev); |
609 | ||
8424af53 KN |
610 | if (towards_target(vb)) |
611 | virtballoon_changed(vdev); | |
6b35e407 RR |
612 | return 0; |
613 | ||
b1123ea6 | 614 | out_del_vqs: |
d2a7ddda | 615 | vdev->config->del_vqs(vdev); |
6b35e407 RR |
616 | out_free_vb: |
617 | kfree(vb); | |
618 | out: | |
619 | return err; | |
620 | } | |
621 | ||
c877bab5 | 622 | static void remove_common(struct virtio_balloon *vb) |
6b35e407 | 623 | { |
6b35e407 RR |
624 | /* There might be pages left in the balloon: free them. */ |
625 | while (vb->num_pages) | |
626 | leak_balloon(vb, vb->num_pages); | |
b8ae0eb3 | 627 | update_balloon_size(vb); |
6b35e407 RR |
628 | |
629 | /* Now we reset the device so we can clean up the queues. */ | |
c877bab5 | 630 | vb->vdev->config->reset(vb->vdev); |
6b35e407 | 631 | |
c877bab5 AS |
632 | vb->vdev->config->del_vqs(vb->vdev); |
633 | } | |
634 | ||
8590dbc7 | 635 | static void virtballoon_remove(struct virtio_device *vdev) |
c877bab5 AS |
636 | { |
637 | struct virtio_balloon *vb = vdev->priv; | |
638 | ||
5a10b7db | 639 | unregister_oom_notifier(&vb->nb); |
fad7b7b2 PM |
640 | |
641 | spin_lock_irq(&vb->stop_update_lock); | |
642 | vb->stop_update = true; | |
643 | spin_unlock_irq(&vb->stop_update_lock); | |
fd0e21c3 PM |
644 | cancel_work_sync(&vb->update_balloon_size_work); |
645 | cancel_work_sync(&vb->update_balloon_stats_work); | |
fad7b7b2 | 646 | |
c877bab5 | 647 | remove_common(vb); |
9c57b580 | 648 | #ifdef CONFIG_BALLOON_COMPACTION |
b1123ea6 MK |
649 | if (vb->vb_dev_info.inode) |
650 | iput(vb->vb_dev_info.inode); | |
9c57b580 YX |
651 | |
652 | kern_unmount(balloon_mnt); | |
653 | #endif | |
6b35e407 RR |
654 | kfree(vb); |
655 | } | |
656 | ||
89107000 | 657 | #ifdef CONFIG_PM_SLEEP |
e562966d AS |
658 | static int virtballoon_freeze(struct virtio_device *vdev) |
659 | { | |
4eb05d56 AS |
660 | struct virtio_balloon *vb = vdev->priv; |
661 | ||
e562966d | 662 | /* |
fad7b7b2 | 663 | * The workqueue is already frozen by the PM core before this |
e562966d AS |
664 | * function is called. |
665 | */ | |
c877bab5 | 666 | remove_common(vb); |
e562966d AS |
667 | return 0; |
668 | } | |
669 | ||
c45b4166 | 670 | static int virtballoon_restore(struct virtio_device *vdev) |
4eb05d56 AS |
671 | { |
672 | struct virtio_balloon *vb = vdev->priv; | |
673 | int ret; | |
674 | ||
675 | ret = init_vqs(vdev->priv); | |
676 | if (ret) | |
677 | return ret; | |
678 | ||
486d2e63 MT |
679 | virtio_device_ready(vdev); |
680 | ||
fad7b7b2 PM |
681 | if (towards_target(vb)) |
682 | virtballoon_changed(vdev); | |
4eb05d56 AS |
683 | update_balloon_size(vb); |
684 | return 0; | |
685 | } | |
e562966d AS |
686 | #endif |
687 | ||
e41b1355 MT |
688 | static int virtballoon_validate(struct virtio_device *vdev) |
689 | { | |
690 | __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); | |
691 | return 0; | |
692 | } | |
693 | ||
9564e138 AL |
694 | static unsigned int features[] = { |
695 | VIRTIO_BALLOON_F_MUST_TELL_HOST, | |
696 | VIRTIO_BALLOON_F_STATS_VQ, | |
5a10b7db | 697 | VIRTIO_BALLOON_F_DEFLATE_ON_OOM, |
9564e138 | 698 | }; |
c45a6816 | 699 | |
d817cd52 | 700 | static struct virtio_driver virtio_balloon_driver = { |
c45a6816 RR |
701 | .feature_table = features, |
702 | .feature_table_size = ARRAY_SIZE(features), | |
6b35e407 RR |
703 | .driver.name = KBUILD_MODNAME, |
704 | .driver.owner = THIS_MODULE, | |
705 | .id_table = id_table, | |
e41b1355 | 706 | .validate = virtballoon_validate, |
6b35e407 | 707 | .probe = virtballoon_probe, |
8590dbc7 | 708 | .remove = virtballoon_remove, |
6b35e407 | 709 | .config_changed = virtballoon_changed, |
89107000 | 710 | #ifdef CONFIG_PM_SLEEP |
e562966d AS |
711 | .freeze = virtballoon_freeze, |
712 | .restore = virtballoon_restore, | |
e562966d | 713 | #endif |
6b35e407 RR |
714 | }; |
715 | ||
b2a17029 | 716 | module_virtio_driver(virtio_balloon_driver); |
6b35e407 RR |
717 | MODULE_DEVICE_TABLE(virtio, id_table); |
718 | MODULE_DESCRIPTION("Virtio balloon driver"); | |
719 | MODULE_LICENSE("GPL"); |