vmw_balloon: rename VMW_BALLOON_2M_SHIFT to VMW_BALLOON_2M_ORDER
[linux-2.6-block.git] / drivers / misc / vmw_balloon.c
CommitLineData
8b4770ec 1// SPDX-License-Identifier: GPL-2.0
453dc659
DT
2/*
3 * VMware Balloon driver.
4 *
8b4770ec 5 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
453dc659 6 *
453dc659
DT
7 * This is VMware physical memory management driver for Linux. The driver
8 * acts like a "balloon" that can be inflated to reclaim physical pages by
9 * reserving them in the guest and invalidating them in the monitor,
10 * freeing up the underlying machine pages so they can be allocated to
11 * other guests. The balloon can also be deflated to allow the guest to
12 * use more physical memory. Higher level policies can control the sizes
13 * of balloons in VMs in order to manage physical memory resources.
14 */
15
16//#define DEBUG
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/mm.h>
f220a80f 22#include <linux/vmalloc.h>
453dc659
DT
23#include <linux/sched.h>
24#include <linux/module.h>
25#include <linux/workqueue.h>
26#include <linux/debugfs.h>
27#include <linux/seq_file.h>
48e3d668
PM
28#include <linux/vmw_vmci_defs.h>
29#include <linux/vmw_vmci_api.h>
a10a5698 30#include <asm/hypervisor.h>
453dc659
DT
31
32MODULE_AUTHOR("VMware, Inc.");
33MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
48e3d668 34MODULE_VERSION("1.5.0.0-k");
453dc659
DT
35MODULE_ALIAS("dmi:*:svnVMware*:*");
36MODULE_ALIAS("vmware_vmmemctl");
37MODULE_LICENSE("GPL");
38
453dc659 39/*
622074a9
NA
40 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait
41 * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page
42 * allocation failure warnings. Disallow access to emergency low-memory pools.
453dc659 43 */
622074a9
NA
44#define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
45 __GFP_NOMEMALLOC)
453dc659
DT
46
47/*
622074a9
NA
48 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight
49 * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation
50 * failure warnings. Disallow access to emergency low-memory pools.
453dc659 51 */
622074a9
NA
52#define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
53 __GFP_NOMEMALLOC|__GFP_NORETRY)
453dc659 54
55adaa49
DT
55/* Maximum number of refused pages we accumulate during inflation cycle */
56#define VMW_BALLOON_MAX_REFUSED 16
453dc659
DT
57
58/*
59 * Hypervisor communication port definitions.
60 */
61#define VMW_BALLOON_HV_PORT 0x5670
62#define VMW_BALLOON_HV_MAGIC 0x456c6d6f
453dc659
DT
63#define VMW_BALLOON_GUEST_ID 1 /* Linux */
64
eb79100f
XD
65enum vmwballoon_capabilities {
66 /*
67 * Bit 0 is reserved and not associated to any capability.
68 */
48e3d668
PM
69 VMW_BALLOON_BASIC_CMDS = (1 << 1),
70 VMW_BALLOON_BATCHED_CMDS = (1 << 2),
71 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
72 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
eb79100f
XD
73};
74
f220a80f 75#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
365bd7ef 76 | VMW_BALLOON_BATCHED_CMDS \
48e3d668
PM
77 | VMW_BALLOON_BATCHED_2M_CMDS \
78 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
365bd7ef 79
25acbdd7 80#define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT)
365bd7ef 81#define VMW_BALLOON_NUM_PAGE_SIZES (2)
eb79100f 82
f220a80f
XD
83/*
84 * Backdoor commands availability:
85 *
86 * START, GET_TARGET and GUEST_ID are always available,
87 *
88 * VMW_BALLOON_BASIC_CMDS:
89 * LOCK and UNLOCK commands,
90 * VMW_BALLOON_BATCHED_CMDS:
91 * BATCHED_LOCK and BATCHED_UNLOCK commands.
365bd7ef 92 * VMW BALLOON_BATCHED_2M_CMDS:
48e3d668
PM
93 * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
94 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
95 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
f220a80f 96 */
365bd7ef
PM
97#define VMW_BALLOON_CMD_START 0
98#define VMW_BALLOON_CMD_GET_TARGET 1
99#define VMW_BALLOON_CMD_LOCK 2
100#define VMW_BALLOON_CMD_UNLOCK 3
101#define VMW_BALLOON_CMD_GUEST_ID 4
102#define VMW_BALLOON_CMD_BATCHED_LOCK 6
103#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7
104#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8
105#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9
48e3d668 106#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10
365bd7ef 107
68131184 108#define VMW_BALLOON_CMD_NUM 11
453dc659
DT
109
110/* error codes */
eb79100f
XD
111#define VMW_BALLOON_SUCCESS 0
112#define VMW_BALLOON_FAILURE -1
113#define VMW_BALLOON_ERROR_CMD_INVALID 1
114#define VMW_BALLOON_ERROR_PPN_INVALID 2
115#define VMW_BALLOON_ERROR_PPN_LOCKED 3
116#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4
117#define VMW_BALLOON_ERROR_PPN_PINNED 5
118#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6
119#define VMW_BALLOON_ERROR_RESET 7
120#define VMW_BALLOON_ERROR_BUSY 8
121
122#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
123
10a95d5d
NA
124#define VMW_BALLOON_CMD_WITH_TARGET_MASK \
125 ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \
126 (1UL << VMW_BALLOON_CMD_LOCK) | \
127 (1UL << VMW_BALLOON_CMD_UNLOCK) | \
128 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \
129 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \
130 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \
131 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
132
68131184
NA
133static const char * const vmballoon_cmd_names[] = {
134 [VMW_BALLOON_CMD_START] = "start",
135 [VMW_BALLOON_CMD_GET_TARGET] = "target",
136 [VMW_BALLOON_CMD_LOCK] = "lock",
137 [VMW_BALLOON_CMD_UNLOCK] = "unlock",
138 [VMW_BALLOON_CMD_GUEST_ID] = "guestType",
139 [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock",
140 [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock",
141 [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock",
142 [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock",
143 [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet"
144};
145
453dc659
DT
146#ifdef CONFIG_DEBUG_FS
147struct vmballoon_stats {
148 unsigned int timer;
48e3d668 149 unsigned int doorbell;
453dc659 150
2ca02df6 151 /* allocation statistics */
365bd7ef
PM
152 unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
153 unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
365bd7ef
PM
154 unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
155 unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
156 unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
453dc659 157
68131184
NA
158 /* Monitor operations. */
159 unsigned long ops[VMW_BALLOON_CMD_NUM];
160 unsigned long ops_fail[VMW_BALLOON_CMD_NUM];
453dc659
DT
161};
162
163#define STATS_INC(stat) (stat)++
164#else
165#define STATS_INC(stat)
166#endif
167
df8d0d42 168static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
f220a80f 169
365bd7ef 170struct vmballoon_page_size {
453dc659
DT
171 /* list of reserved physical pages */
172 struct list_head pages;
173
174 /* transient list of non-balloonable pages */
175 struct list_head refused_pages;
55adaa49 176 unsigned int n_refused_pages;
365bd7ef
PM
177};
178
6c948757
NA
179/**
180 * struct vmballoon_batch_entry - a batch entry for lock or unlock.
181 *
182 * @status: the status of the operation, which is written by the hypervisor.
183 * @reserved: reserved for future use. Must be set to zero.
184 * @pfn: the physical frame number of the page to be locked or unlocked.
185 */
186struct vmballoon_batch_entry {
187 u64 status : 5;
188 u64 reserved : PAGE_SHIFT - 5;
189 u64 pfn : 52;
190} __packed;
191
365bd7ef
PM
192struct vmballoon {
193 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
194
195 /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
196 unsigned supported_page_sizes;
453dc659
DT
197
198 /* balloon size in pages */
199 unsigned int size;
200 unsigned int target;
201
202 /* reset flag */
203 bool reset_required;
204
f220a80f
XD
205 unsigned long capabilities;
206
6c948757
NA
207 /**
208 * @batch_page: pointer to communication batch page.
209 *
210 * When batching is used, batch_page points to a page, which holds up to
211 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
212 */
213 struct vmballoon_batch_entry *batch_page;
214
f220a80f
XD
215 unsigned int batch_max_pages;
216 struct page *page;
217
453dc659
DT
218#ifdef CONFIG_DEBUG_FS
219 /* statistics */
220 struct vmballoon_stats stats;
221
222 /* debugfs file exporting statistics */
223 struct dentry *dbg_entry;
224#endif
225
226 struct sysinfo sysinfo;
227
228 struct delayed_work dwork;
48e3d668
PM
229
230 struct vmci_handle vmci_doorbell;
453dc659
DT
231};
232
233static struct vmballoon balloon;
453dc659 234
10a95d5d
NA
235static inline unsigned long
236__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
237 unsigned long arg2, unsigned long *result)
238{
239 unsigned long status, dummy1, dummy2, dummy3, local_result;
240
68131184
NA
241 STATS_INC(b->stats.ops[cmd]);
242
10a95d5d
NA
243 asm volatile ("inl %%dx" :
244 "=a"(status),
245 "=c"(dummy1),
246 "=d"(dummy2),
247 "=b"(local_result),
248 "=S"(dummy3) :
249 "0"(VMW_BALLOON_HV_MAGIC),
250 "1"(cmd),
251 "2"(VMW_BALLOON_HV_PORT),
252 "3"(arg1),
253 "4"(arg2) :
254 "memory");
255
256 /* update the result if needed */
257 if (result)
258 *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
259 local_result;
260
261 /* update target when applicable */
262 if (status == VMW_BALLOON_SUCCESS &&
263 ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
264 b->target = local_result;
265
68131184
NA
266 if (status != VMW_BALLOON_SUCCESS &&
267 status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
268 STATS_INC(b->stats.ops_fail[cmd]);
269 pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
270 __func__, vmballoon_cmd_names[cmd], arg1, arg2,
271 status);
272 }
273
10a95d5d
NA
274 /* mark reset required accordingly */
275 if (status == VMW_BALLOON_ERROR_RESET)
276 b->reset_required = true;
277
278 return status;
279}
280
281static __always_inline unsigned long
282vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
283 unsigned long arg2)
284{
285 unsigned long dummy;
286
287 return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
288}
289
453dc659
DT
290/*
291 * Send "start" command to the host, communicating supported version
292 * of the protocol.
293 */
f220a80f 294static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
453dc659 295{
10a95d5d 296 unsigned long status, capabilities;
365bd7ef 297 bool success;
453dc659 298
10a95d5d
NA
299 status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
300 &capabilities);
f220a80f
XD
301
302 switch (status) {
303 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
304 b->capabilities = capabilities;
365bd7ef
PM
305 success = true;
306 break;
f220a80f
XD
307 case VMW_BALLOON_SUCCESS:
308 b->capabilities = VMW_BALLOON_BASIC_CMDS;
365bd7ef
PM
309 success = true;
310 break;
311 default:
312 success = false;
f220a80f 313 }
453dc659 314
5081efd1
NA
315 /*
316 * 2MB pages are only supported with batching. If batching is for some
317 * reason disabled, do not use 2MB pages, since otherwise the legacy
318 * mechanism is used with 2MB pages, causing a failure.
319 */
320 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
321 (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
365bd7ef
PM
322 b->supported_page_sizes = 2;
323 else
324 b->supported_page_sizes = 1;
325
365bd7ef 326 return success;
453dc659
DT
327}
328
453dc659
DT
329/*
330 * Communicate guest type to the host so that it can adjust ballooning
331 * algorithm to the one most appropriate for the guest. This command
332 * is normally issued after sending "start" command and is part of
333 * standard reset sequence.
334 */
335static bool vmballoon_send_guest_id(struct vmballoon *b)
336{
10a95d5d 337 unsigned long status;
453dc659 338
10a95d5d
NA
339 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
340 VMW_BALLOON_GUEST_ID, 0);
453dc659 341
10a95d5d 342 if (status == VMW_BALLOON_SUCCESS)
453dc659
DT
343 return true;
344
453dc659
DT
345 return false;
346}
347
365bd7ef
PM
348static u16 vmballoon_page_size(bool is_2m_page)
349{
350 if (is_2m_page)
25acbdd7 351 return 1 << VMW_BALLOON_2M_ORDER;
365bd7ef
PM
352
353 return 1;
354}
355
453dc659
DT
356/*
357 * Retrieve desired balloon size from the host.
358 */
10a95d5d 359static bool vmballoon_send_get_target(struct vmballoon *b)
453dc659
DT
360{
361 unsigned long status;
453dc659
DT
362 unsigned long limit;
363 u32 limit32;
364
365 /*
366 * si_meminfo() is cheap. Moreover, we want to provide dynamic
367 * max balloon size later. So let us call si_meminfo() every
368 * iteration.
369 */
370 si_meminfo(&b->sysinfo);
371 limit = b->sysinfo.totalram;
372
373 /* Ensure limit fits in 32-bits */
374 limit32 = (u32)limit;
375 if (limit != limit32)
376 return false;
377
10a95d5d
NA
378 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
379
380 if (status == VMW_BALLOON_SUCCESS)
453dc659 381 return true;
453dc659 382
453dc659
DT
383 return false;
384}
385
622074a9 386static struct page *vmballoon_alloc_page(bool is_2m_page)
365bd7ef
PM
387{
388 if (is_2m_page)
622074a9 389 return alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS,
25acbdd7 390 VMW_BALLOON_2M_ORDER);
365bd7ef 391
622074a9 392 return alloc_page(VMW_PAGE_ALLOC_FLAGS);
365bd7ef
PM
393}
394
395static void vmballoon_free_page(struct page *page, bool is_2m_page)
396{
397 if (is_2m_page)
25acbdd7 398 __free_pages(page, VMW_BALLOON_2M_ORDER);
365bd7ef
PM
399 else
400 __free_page(page);
401}
402
453dc659
DT
403/*
404 * Quickly release all pages allocated for the balloon. This function is
405 * called when host decides to "reset" balloon for one reason or another.
406 * Unlike normal "deflate" we do not (shall not) notify host of the pages
407 * being released.
408 */
409static void vmballoon_pop(struct vmballoon *b)
410{
411 struct page *page, *next;
365bd7ef
PM
412 unsigned is_2m_pages;
413
414 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
415 is_2m_pages++) {
416 struct vmballoon_page_size *page_size =
417 &b->page_sizes[is_2m_pages];
418 u16 size_per_page = vmballoon_page_size(is_2m_pages);
419
420 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
421 list_del(&page->lru);
422 vmballoon_free_page(page, is_2m_pages);
423 STATS_INC(b->stats.free[is_2m_pages]);
424 b->size -= size_per_page;
425 cond_resched();
426 }
453dc659 427 }
453dc659 428
b23220fe
GK
429 /* Clearing the batch_page unconditionally has no adverse effect */
430 free_page((unsigned long)b->batch_page);
431 b->batch_page = NULL;
453dc659
DT
432}
433
df8d0d42
NA
434/**
435 * vmballoon_status_page - returns the status of (un)lock operation
436 *
437 * @b: pointer to the balloon.
438 * @idx: index for the page for which the operation is performed.
439 * @p: pointer to where the page struct is returned.
440 *
441 * Following a lock or unlock operation, returns the status of the operation for
442 * an individual page. Provides the page that the operation was performed on on
443 * the @page argument.
444 *
445 * Returns: The status of a lock or unlock operation for an individual page.
453dc659 446 */
df8d0d42
NA
447static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
448 struct page **p)
453dc659 449{
df8d0d42
NA
450 if (static_branch_likely(&vmw_balloon_batching)) {
451 /* batching mode */
452 *p = pfn_to_page(b->batch_page[idx].pfn);
453 return b->batch_page[idx].status;
454 }
10a95d5d 455
df8d0d42
NA
456 /* non-batching mode */
457 *p = b->page;
453dc659 458
df8d0d42
NA
459 /*
460 * If a failure occurs, the indication will be provided in the status
461 * of the entire operation, which is considered before the individual
462 * page status. So for non-batching mode, the indication is always of
463 * success.
464 */
465 return VMW_BALLOON_SUCCESS;
466}
453dc659 467
df8d0d42
NA
468/**
469 * vmballoon_lock_op - notifies the host about inflated/deflated pages.
470 * @b: pointer to the balloon.
471 * @num_pages: number of inflated/deflated pages.
472 * @is_2m_pages: whether the page(s) are 2M (or 4k).
473 * @lock: whether the operation is lock (or unlock).
474 *
475 * Notify the host about page(s) that were ballooned (or removed from the
476 * balloon) so that host can use it without fear that guest will need it (or
477 * stop using them since the VM does). Host may reject some pages, we need to
478 * check the return value and maybe submit a different page. The pages that are
479 * inflated/deflated are pointed by @b->page.
480 *
481 * Return: result as provided by the hypervisor.
482 */
483static unsigned long vmballoon_lock_op(struct vmballoon *b,
484 unsigned int num_pages,
485 bool is_2m_pages, bool lock)
486{
487 unsigned long cmd, pfn;
488
489 if (static_branch_likely(&vmw_balloon_batching)) {
490 if (lock)
491 cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_LOCK :
492 VMW_BALLOON_CMD_BATCHED_LOCK;
493 else
494 cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
495 VMW_BALLOON_CMD_BATCHED_UNLOCK;
496
497 pfn = PHYS_PFN(virt_to_phys(b->batch_page));
498 } else {
499 cmd = lock ? VMW_BALLOON_CMD_LOCK : VMW_BALLOON_CMD_UNLOCK;
500 pfn = page_to_pfn(b->page);
501
502 /* In non-batching mode, PFNs must fit in 32-bit */
503 if (unlikely(pfn != (u32)pfn))
504 return VMW_BALLOON_ERROR_PPN_INVALID;
ef0f8f11 505 }
453dc659 506
df8d0d42 507 return vmballoon_cmd(b, cmd, pfn, num_pages);
453dc659
DT
508}
509
df8d0d42
NA
510static int vmballoon_lock(struct vmballoon *b, unsigned int num_pages,
511 bool is_2m_pages)
f220a80f 512{
df8d0d42
NA
513 unsigned long batch_status;
514 int i;
365bd7ef 515 u16 size_per_page = vmballoon_page_size(is_2m_pages);
f220a80f 516
df8d0d42 517 batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, true);
f220a80f
XD
518
519 for (i = 0; i < num_pages; i++) {
df8d0d42
NA
520 unsigned long status;
521 struct page *p;
365bd7ef
PM
522 struct vmballoon_page_size *page_size =
523 &b->page_sizes[is_2m_pages];
f220a80f 524
df8d0d42
NA
525 status = vmballoon_status_page(b, i, &p);
526
527 /*
528 * Failure of the whole batch overrides a single operation
529 * results.
530 */
531 if (batch_status != VMW_BALLOON_SUCCESS)
532 status = batch_status;
f220a80f 533
df8d0d42
NA
534 if (status == VMW_BALLOON_SUCCESS) {
535 /* track allocated page */
365bd7ef 536 list_add(&p->lru, &page_size->pages);
df8d0d42
NA
537
538 /* update balloon size */
365bd7ef 539 b->size += size_per_page;
df8d0d42
NA
540 continue;
541 }
542
543 /* Error occurred */
544 STATS_INC(b->stats.refused_alloc[is_2m_pages]);
545
8fa3c61a
NA
546 /*
547 * Place page on the list of non-balloonable pages
548 * and retry allocation, unless we already accumulated
549 * too many of them, in which case take a breather.
550 */
551 list_add(&p->lru, &page_size->refused_pages);
552 page_size->n_refused_pages++;
f220a80f
XD
553 }
554
df8d0d42 555 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
f220a80f
XD
556}
557
453dc659
DT
558/*
559 * Release the page allocated for the balloon. Note that we first notify
560 * the host so it can make sure the page will be available for the guest
561 * to use, if needed.
562 */
df8d0d42
NA
563static int vmballoon_unlock(struct vmballoon *b, unsigned int num_pages,
564 bool is_2m_pages)
453dc659 565{
df8d0d42
NA
566 int i;
567 unsigned long batch_status;
365bd7ef 568 u16 size_per_page = vmballoon_page_size(is_2m_pages);
f220a80f 569
df8d0d42 570 batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, false);
f220a80f
XD
571
572 for (i = 0; i < num_pages; i++) {
df8d0d42
NA
573 struct vmballoon_page_size *page_size;
574 unsigned long status;
575 struct page *p;
576
577 status = vmballoon_status_page(b, i, &p);
578 page_size = &b->page_sizes[is_2m_pages];
f220a80f 579
df8d0d42
NA
580 /*
581 * Failure of the whole batch overrides a single operation
582 * results.
583 */
584 if (batch_status != VMW_BALLOON_SUCCESS)
585 status = batch_status;
586
587 if (status != VMW_BALLOON_SUCCESS) {
f220a80f
XD
588 /*
589 * That page wasn't successfully unlocked by the
590 * hypervisor, re-add it to the list of pages owned by
591 * the balloon driver.
592 */
365bd7ef 593 list_add(&p->lru, &page_size->pages);
f220a80f
XD
594 } else {
595 /* deallocate page */
365bd7ef
PM
596 vmballoon_free_page(p, is_2m_pages);
597 STATS_INC(b->stats.free[is_2m_pages]);
f220a80f
XD
598
599 /* update balloon size */
365bd7ef 600 b->size -= size_per_page;
f220a80f
XD
601 }
602 }
603
df8d0d42 604 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
f220a80f
XD
605}
606
453dc659
DT
607/*
608 * Release pages that were allocated while attempting to inflate the
609 * balloon but were refused by the host for one reason or another.
610 */
365bd7ef
PM
611static void vmballoon_release_refused_pages(struct vmballoon *b,
612 bool is_2m_pages)
453dc659
DT
613{
614 struct page *page, *next;
365bd7ef
PM
615 struct vmballoon_page_size *page_size =
616 &b->page_sizes[is_2m_pages];
453dc659 617
365bd7ef 618 list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
453dc659 619 list_del(&page->lru);
365bd7ef
PM
620 vmballoon_free_page(page, is_2m_pages);
621 STATS_INC(b->stats.refused_free[is_2m_pages]);
453dc659 622 }
55adaa49 623
365bd7ef 624 page_size->n_refused_pages = 0;
453dc659
DT
625}
626
f220a80f
XD
627static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
628{
df8d0d42
NA
629 if (static_branch_likely(&vmw_balloon_batching))
630 b->batch_page[idx] = (struct vmballoon_batch_entry)
6c948757 631 { .pfn = page_to_pfn(p) };
df8d0d42
NA
632 else
633 b->page = p;
f220a80f
XD
634}
635
453dc659
DT
636/*
637 * Inflate the balloon towards its target size. Note that we try to limit
638 * the rate of allocation to make sure we are not choking the rest of the
639 * system.
640 */
641static void vmballoon_inflate(struct vmballoon *b)
642{
f220a80f 643 unsigned int num_pages = 0;
453dc659 644 int error = 0;
365bd7ef 645 bool is_2m_pages;
453dc659
DT
646
647 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
648
649 /*
650 * First try NOSLEEP page allocations to inflate balloon.
651 *
652 * If we do not throttle nosleep allocations, we can drain all
653 * free pages in the guest quickly (if the balloon target is high).
654 * As a side-effect, draining free pages helps to inform (force)
655 * the guest to start swapping if balloon target is not met yet,
656 * which is a desired behavior. However, balloon driver can consume
657 * all available CPU cycles if too many pages are allocated in a
658 * second. Therefore, we throttle nosleep allocations even when
659 * the guest is not under memory pressure. OTOH, if we have already
660 * predicted that the guest is under memory pressure, then we
661 * slowdown page allocations considerably.
662 */
663
453dc659
DT
664 /*
665 * Start with no sleep allocation rate which may be higher
666 * than sleeping allocation rate.
667 */
ec992cc7 668 is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
453dc659 669
ec992cc7 670 pr_debug("%s - goal: %d", __func__, b->target - b->size);
453dc659 671
33d268ed 672 while (!b->reset_required &&
365bd7ef
PM
673 b->size + num_pages * vmballoon_page_size(is_2m_pages)
674 < b->target) {
4670de4d 675 struct page *page;
453dc659 676
622074a9
NA
677 STATS_INC(b->stats.alloc[is_2m_pages]);
678 page = vmballoon_alloc_page(is_2m_pages);
ef0f8f11 679 if (!page) {
365bd7ef 680 STATS_INC(b->stats.alloc_fail[is_2m_pages]);
365bd7ef 681 if (is_2m_pages) {
df8d0d42 682 vmballoon_lock(b, num_pages, true);
365bd7ef
PM
683
684 /*
685 * ignore errors from locking as we now switch
686 * to 4k pages and we might get different
687 * errors.
688 */
689
690 num_pages = 0;
691 is_2m_pages = false;
692 continue;
693 }
622074a9 694 break;
453dc659
DT
695 }
696
df8d0d42 697 vmballoon_add_page(b, num_pages++, page);
f220a80f 698 if (num_pages == b->batch_max_pages) {
8fa3c61a
NA
699 struct vmballoon_page_size *page_size =
700 &b->page_sizes[is_2m_pages];
701
df8d0d42 702 error = vmballoon_lock(b, num_pages, is_2m_pages);
10a95d5d 703
f220a80f 704 num_pages = 0;
8fa3c61a
NA
705
706 /*
707 * Stop allocating this page size if we already
708 * accumulated too many pages that the hypervisor
709 * refused.
710 */
711 if (page_size->n_refused_pages >=
712 VMW_BALLOON_MAX_REFUSED) {
713 if (!is_2m_pages)
714 break;
715
716 /*
717 * Release the refused pages as we move to 4k
718 * pages.
719 */
720 vmballoon_release_refused_pages(b, true);
721 is_2m_pages = true;
722 }
723
f220a80f
XD
724 if (error)
725 break;
726 }
ef0f8f11 727
33d268ed 728 cond_resched();
453dc659
DT
729 }
730
f220a80f 731 if (num_pages > 0)
df8d0d42 732 vmballoon_lock(b, num_pages, is_2m_pages);
f220a80f 733
365bd7ef
PM
734 vmballoon_release_refused_pages(b, true);
735 vmballoon_release_refused_pages(b, false);
453dc659
DT
736}
737
738/*
739 * Decrease the size of the balloon allowing guest to use more memory.
740 */
741static void vmballoon_deflate(struct vmballoon *b)
742{
365bd7ef 743 unsigned is_2m_pages;
453dc659 744
33d268ed 745 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
453dc659
DT
746
747 /* free pages to reach target */
365bd7ef
PM
748 for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
749 is_2m_pages++) {
750 struct page *page, *next;
751 unsigned int num_pages = 0;
752 struct vmballoon_page_size *page_size =
753 &b->page_sizes[is_2m_pages];
754
755 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
756 if (b->reset_required ||
757 (b->target > 0 &&
758 b->size - num_pages
759 * vmballoon_page_size(is_2m_pages)
760 < b->target + vmballoon_page_size(true)))
761 break;
f220a80f 762
365bd7ef 763 list_del(&page->lru);
df8d0d42 764 vmballoon_add_page(b, num_pages++, page);
33d268ed 765
365bd7ef
PM
766 if (num_pages == b->batch_max_pages) {
767 int error;
453dc659 768
df8d0d42 769 error = vmballoon_unlock(b, num_pages,
10a95d5d 770 is_2m_pages);
365bd7ef
PM
771 num_pages = 0;
772 if (error)
773 return;
774 }
33d268ed 775
365bd7ef
PM
776 cond_resched();
777 }
453dc659 778
365bd7ef 779 if (num_pages > 0)
df8d0d42 780 vmballoon_unlock(b, num_pages, is_2m_pages);
365bd7ef 781 }
f220a80f
XD
782}
783
df8d0d42
NA
784/**
785 * vmballoon_deinit_batching - disables batching mode.
786 *
787 * @b: pointer to &struct vmballoon.
788 *
789 * Disables batching, by deallocating the page for communication with the
790 * hypervisor and disabling the static key to indicate that batching is off.
791 */
792static void vmballoon_deinit_batching(struct vmballoon *b)
793{
794 free_page((unsigned long)b->batch_page);
795 b->batch_page = NULL;
796 static_branch_disable(&vmw_balloon_batching);
797 b->batch_max_pages = 1;
798}
f220a80f 799
df8d0d42
NA
800/**
801 * vmballoon_init_batching - enable batching mode.
802 *
803 * @b: pointer to &struct vmballoon.
804 *
805 * Enables batching, by allocating a page for communication with the hypervisor
806 * and enabling the static_key to use batching.
807 *
808 * Return: zero on success or an appropriate error-code.
809 */
810static int vmballoon_init_batching(struct vmballoon *b)
f220a80f 811{
b23220fe 812 struct page *page;
f220a80f 813
b23220fe
GK
814 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
815 if (!page)
df8d0d42 816 return -ENOMEM;
f220a80f 817
b23220fe 818 b->batch_page = page_address(page);
df8d0d42
NA
819 b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
820
821 static_branch_enable(&vmw_balloon_batching);
822
823 return 0;
f220a80f
XD
824}
825
48e3d668
PM
826/*
827 * Receive notification and resize balloon
828 */
829static void vmballoon_doorbell(void *client_data)
830{
831 struct vmballoon *b = client_data;
832
833 STATS_INC(b->stats.doorbell);
834
835 mod_delayed_work(system_freezable_wq, &b->dwork, 0);
836}
837
838/*
839 * Clean up vmci doorbell
840 */
841static void vmballoon_vmci_cleanup(struct vmballoon *b)
842{
10a95d5d
NA
843 vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
844 VMCI_INVALID_ID, VMCI_INVALID_ID);
48e3d668 845
48e3d668
PM
846 if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
847 vmci_doorbell_destroy(b->vmci_doorbell);
848 b->vmci_doorbell = VMCI_INVALID_HANDLE;
849 }
850}
851
852/*
853 * Initialize vmci doorbell, to get notified as soon as balloon changes
854 */
855static int vmballoon_vmci_init(struct vmballoon *b)
856{
10a95d5d 857 unsigned long error;
48e3d668 858
ce664331
NA
859 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
860 return 0;
48e3d668 861
ce664331
NA
862 error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
863 VMCI_PRIVILEGE_FLAG_RESTRICTED,
864 vmballoon_doorbell, b);
48e3d668 865
ce664331
NA
866 if (error != VMCI_SUCCESS)
867 goto fail;
868
10a95d5d
NA
869 error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
870 b->vmci_doorbell.context,
871 b->vmci_doorbell.resource, NULL);
ce664331 872
ce664331
NA
873 if (error != VMW_BALLOON_SUCCESS)
874 goto fail;
48e3d668
PM
875
876 return 0;
ce664331
NA
877fail:
878 vmballoon_vmci_cleanup(b);
879 return -EIO;
48e3d668
PM
880}
881
f220a80f
XD
882/*
883 * Perform standard reset sequence by popping the balloon (in case it
884 * is not empty) and then restarting protocol. This operation normally
885 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
886 */
887static void vmballoon_reset(struct vmballoon *b)
888{
48e3d668
PM
889 int error;
890
891 vmballoon_vmci_cleanup(b);
892
f220a80f
XD
893 /* free all pages, skipping monitor unlock */
894 vmballoon_pop(b);
895
896 if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
897 return;
898
899 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
df8d0d42 900 if (vmballoon_init_batching(b)) {
f220a80f
XD
901 /*
902 * We failed to initialize batching, inform the monitor
903 * about it by sending a null capability.
904 *
905 * The guest will retry in one second.
906 */
907 vmballoon_send_start(b, 0);
908 return;
909 }
910 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
df8d0d42 911 vmballoon_deinit_batching(b);
f220a80f
XD
912 }
913
914 b->reset_required = false;
48e3d668
PM
915
916 error = vmballoon_vmci_init(b);
917 if (error)
918 pr_err("failed to initialize vmci doorbell\n");
919
f220a80f
XD
920 if (!vmballoon_send_guest_id(b))
921 pr_err("failed to send guest ID to the host\n");
453dc659
DT
922}
923
924/*
925 * Balloon work function: reset protocol, if needed, get the new size and
926 * adjust balloon as needed. Repeat in 1 sec.
927 */
928static void vmballoon_work(struct work_struct *work)
929{
930 struct delayed_work *dwork = to_delayed_work(work);
931 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
453dc659
DT
932
933 STATS_INC(b->stats.timer);
934
935 if (b->reset_required)
936 vmballoon_reset(b);
937
10a95d5d
NA
938 if (!b->reset_required && vmballoon_send_get_target(b)) {
939 unsigned long target = b->target;
453dc659 940
10a95d5d 941 /* update target, adjust size */
453dc659
DT
942 if (b->size < target)
943 vmballoon_inflate(b);
365bd7ef
PM
944 else if (target == 0 ||
945 b->size > target + vmballoon_page_size(true))
453dc659
DT
946 vmballoon_deflate(b);
947 }
948
beda94da
DT
949 /*
950 * We are using a freezable workqueue so that balloon operations are
951 * stopped while the system transitions to/from sleep/hibernation.
952 */
953 queue_delayed_work(system_freezable_wq,
954 dwork, round_jiffies_relative(HZ));
453dc659
DT
955}
956
957/*
958 * DEBUGFS Interface
959 */
960#ifdef CONFIG_DEBUG_FS
961
962static int vmballoon_debug_show(struct seq_file *f, void *offset)
963{
964 struct vmballoon *b = f->private;
965 struct vmballoon_stats *stats = &b->stats;
68131184 966 int i;
453dc659 967
b36e89da
PM
968 /* format capabilities info */
969 seq_printf(f,
970 "balloon capabilities: %#4x\n"
d7568c13
PM
971 "used capabilities: %#4lx\n"
972 "is resetting: %c\n",
973 VMW_BALLOON_CAPABILITIES, b->capabilities,
974 b->reset_required ? 'y' : 'n');
b36e89da 975
453dc659
DT
976 /* format size info */
977 seq_printf(f,
978 "target: %8d pages\n"
979 "current: %8d pages\n",
980 b->target, b->size);
981
68131184
NA
982 for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
983 if (vmballoon_cmd_names[i] == NULL)
984 continue;
985
986 seq_printf(f, "%-22s: %16lu (%lu failed)\n",
987 vmballoon_cmd_names[i], stats->ops[i],
988 stats->ops_fail[i]);
989 }
990
453dc659
DT
991 seq_printf(f,
992 "\n"
993 "timer: %8u\n"
48e3d668 994 "doorbell: %8u\n"
365bd7ef 995 "prim2mAlloc: %8u (%4u failed)\n"
622074a9 996 "prim4kAlloc: %8u (%4u failed)\n"
365bd7ef 997 "prim2mFree: %8u\n"
453dc659 998 "primFree: %8u\n"
365bd7ef 999 "err2mAlloc: %8u\n"
453dc659 1000 "errAlloc: %8u\n"
365bd7ef 1001 "err2mFree: %8u\n"
68131184 1002 "errFree: %8u\n",
453dc659 1003 stats->timer,
48e3d668 1004 stats->doorbell,
365bd7ef
PM
1005 stats->alloc[true], stats->alloc_fail[true],
1006 stats->alloc[false], stats->alloc_fail[false],
365bd7ef
PM
1007 stats->free[true],
1008 stats->free[false],
1009 stats->refused_alloc[true], stats->refused_alloc[false],
68131184 1010 stats->refused_free[true], stats->refused_free[false]);
453dc659
DT
1011
1012 return 0;
1013}
1014
1015static int vmballoon_debug_open(struct inode *inode, struct file *file)
1016{
1017 return single_open(file, vmballoon_debug_show, inode->i_private);
1018}
1019
1020static const struct file_operations vmballoon_debug_fops = {
1021 .owner = THIS_MODULE,
1022 .open = vmballoon_debug_open,
1023 .read = seq_read,
1024 .llseek = seq_lseek,
1025 .release = single_release,
1026};
1027
1028static int __init vmballoon_debugfs_init(struct vmballoon *b)
1029{
1030 int error;
1031
1032 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1033 &vmballoon_debug_fops);
1034 if (IS_ERR(b->dbg_entry)) {
1035 error = PTR_ERR(b->dbg_entry);
1036 pr_err("failed to create debugfs entry, error: %d\n", error);
1037 return error;
1038 }
1039
1040 return 0;
1041}
1042
1043static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
1044{
1045 debugfs_remove(b->dbg_entry);
1046}
1047
1048#else
1049
1050static inline int vmballoon_debugfs_init(struct vmballoon *b)
1051{
1052 return 0;
1053}
1054
1055static inline void vmballoon_debugfs_exit(struct vmballoon *b)
1056{
1057}
1058
1059#endif /* CONFIG_DEBUG_FS */
1060
1061static int __init vmballoon_init(void)
1062{
1063 int error;
365bd7ef 1064 unsigned is_2m_pages;
453dc659
DT
1065 /*
1066 * Check if we are running on VMware's hypervisor and bail out
1067 * if we are not.
1068 */
03b2a320 1069 if (x86_hyper_type != X86_HYPER_VMWARE)
453dc659
DT
1070 return -ENODEV;
1071
365bd7ef
PM
1072 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
1073 is_2m_pages++) {
1074 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
1075 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
1076 }
453dc659 1077
453dc659
DT
1078 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
1079
453dc659
DT
1080 error = vmballoon_debugfs_init(&balloon);
1081 if (error)
beda94da 1082 return error;
453dc659 1083
48e3d668 1084 balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
d7568c13
PM
1085 balloon.batch_page = NULL;
1086 balloon.page = NULL;
1087 balloon.reset_required = true;
1088
beda94da 1089 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
453dc659
DT
1090
1091 return 0;
453dc659 1092}
c3cc1b0f
NA
1093
1094/*
1095 * Using late_initcall() instead of module_init() allows the balloon to use the
1096 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
1097 * VMCI is probed only after the balloon is initialized. If the balloon is used
1098 * as a module, late_initcall() is equivalent to module_init().
1099 */
1100late_initcall(vmballoon_init);
453dc659
DT
1101
1102static void __exit vmballoon_exit(void)
1103{
48e3d668 1104 vmballoon_vmci_cleanup(&balloon);
453dc659 1105 cancel_delayed_work_sync(&balloon.dwork);
453dc659
DT
1106
1107 vmballoon_debugfs_exit(&balloon);
1108
1109 /*
1110 * Deallocate all reserved memory, and reset connection with monitor.
1111 * Reset connection before deallocating memory to avoid potential for
1112 * additional spurious resets from guest touching deallocated pages.
1113 */
d7568c13 1114 vmballoon_send_start(&balloon, 0);
453dc659
DT
1115 vmballoon_pop(&balloon);
1116}
1117module_exit(vmballoon_exit);