Commit | Line | Data |
---|---|---|
2a5d76a4 RC |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Resource Director Technology (RDT) | |
4 | * | |
5 | * Pseudo-locking support built on top of Cache Allocation Technology (CAT) | |
6 | * | |
7 | * Copyright (C) 2018 Intel Corporation | |
8 | * | |
9 | * Author: Reinette Chatre <reinette.chatre@intel.com> | |
10 | */ | |
11 | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
13 | ||
018961ae RC |
14 | #include <linux/cacheinfo.h> |
15 | #include <linux/cpu.h> | |
16 | #include <linux/cpumask.h> | |
443810fe | 17 | #include <linux/debugfs.h> |
018961ae | 18 | #include <linux/kthread.h> |
746e0859 | 19 | #include <linux/mman.h> |
0a701c9d | 20 | #include <linux/perf_event.h> |
6fc0de37 | 21 | #include <linux/pm_qos.h> |
63657c1c | 22 | #include <linux/slab.h> |
746e0859 RC |
23 | #include <linux/uaccess.h> |
24 | ||
018961ae | 25 | #include <asm/cacheflush.h> |
f2a17729 | 26 | #include <asm/intel-family.h> |
8dd97c65 | 27 | #include <asm/resctrl.h> |
8a2fc0e1 | 28 | #include <asm/perf_event.h> |
746e0859 | 29 | |
dd45407c | 30 | #include "../../events/perf_event.h" /* For X86_CONFIG() */ |
fa7d9493 | 31 | #include "internal.h" |
2a5d76a4 | 32 | |
443810fe | 33 | #define CREATE_TRACE_POINTS |
fa7d9493 | 34 | #include "pseudo_lock_event.h" |
443810fe | 35 | |
f2a17729 RC |
36 | /* |
37 | * The bits needed to disable hardware prefetching varies based on the | |
38 | * platform. During initialization we will discover which bits to use. | |
39 | */ | |
40 | static u64 prefetch_disable_bits; | |
41 | ||
746e0859 RC |
42 | /* |
43 | * Major number assigned to and shared by all devices exposing | |
44 | * pseudo-locked regions. | |
45 | */ | |
46 | static unsigned int pseudo_lock_major; | |
47 | static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0); | |
7630ea17 IO |
48 | |
49 | static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode) | |
50 | { | |
51 | const struct rdtgroup *rdtgrp; | |
52 | ||
53 | rdtgrp = dev_get_drvdata(dev); | |
54 | if (mode) | |
55 | *mode = 0600; | |
56 | return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name); | |
57 | } | |
58 | ||
59 | static const struct class pseudo_lock_class = { | |
60 | .name = "pseudo_lock", | |
61 | .devnode = pseudo_lock_devnode, | |
62 | }; | |
746e0859 | 63 | |
f2a17729 RC |
64 | /** |
65 | * get_prefetch_disable_bits - prefetch disable bits of supported platforms | |
f9b871c8 | 66 | * @void: It takes no parameters. |
f2a17729 RC |
67 | * |
68 | * Capture the list of platforms that have been validated to support | |
69 | * pseudo-locking. This includes testing to ensure pseudo-locked regions | |
70 | * with low cache miss rates can be created under variety of load conditions | |
71 | * as well as that these pseudo-locked regions can maintain their low cache | |
72 | * miss rates under variety of load conditions for significant lengths of time. | |
73 | * | |
74 | * After a platform has been validated to support pseudo-locking its | |
75 | * hardware prefetch disable bits are included here as they are documented | |
76 | * in the SDM. | |
77 | * | |
8a2fc0e1 RC |
78 | * When adding a platform here also add support for its cache events to |
79 | * measure_cycles_perf_fn() | |
80 | * | |
f2a17729 RC |
81 | * Return: |
82 | * If platform is supported, the bits to disable hardware prefetchers, 0 | |
83 | * if platform is not supported. | |
84 | */ | |
85 | static u64 get_prefetch_disable_bits(void) | |
86 | { | |
87 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || | |
88 | boot_cpu_data.x86 != 6) | |
89 | return 0; | |
90 | ||
91 | switch (boot_cpu_data.x86_model) { | |
92 | case INTEL_FAM6_BROADWELL_X: | |
93 | /* | |
94 | * SDM defines bits of MSR_MISC_FEATURE_CONTROL register | |
95 | * as: | |
96 | * 0 L2 Hardware Prefetcher Disable (R/W) | |
97 | * 1 L2 Adjacent Cache Line Prefetcher Disable (R/W) | |
98 | * 2 DCU Hardware Prefetcher Disable (R/W) | |
99 | * 3 DCU IP Prefetcher Disable (R/W) | |
100 | * 63:4 Reserved | |
101 | */ | |
102 | return 0xF; | |
103 | case INTEL_FAM6_ATOM_GOLDMONT: | |
f2c4db1b | 104 | case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
f2a17729 RC |
105 | /* |
106 | * SDM defines bits of MSR_MISC_FEATURE_CONTROL register | |
107 | * as: | |
108 | * 0 L2 Hardware Prefetcher Disable (R/W) | |
109 | * 1 Reserved | |
110 | * 2 DCU Hardware Prefetcher Disable (R/W) | |
111 | * 63:3 Reserved | |
112 | */ | |
113 | return 0x5; | |
114 | } | |
115 | ||
116 | return 0; | |
117 | } | |
118 | ||
746e0859 RC |
119 | /** |
120 | * pseudo_lock_minor_get - Obtain available minor number | |
121 | * @minor: Pointer to where new minor number will be stored | |
122 | * | |
123 | * A bitmask is used to track available minor numbers. Here the next free | |
124 | * minor number is marked as unavailable and returned. | |
125 | * | |
126 | * Return: 0 on success, <0 on failure. | |
127 | */ | |
128 | static int pseudo_lock_minor_get(unsigned int *minor) | |
129 | { | |
130 | unsigned long first_bit; | |
131 | ||
132 | first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS); | |
133 | ||
134 | if (first_bit == MINORBITS) | |
135 | return -ENOSPC; | |
136 | ||
137 | __clear_bit(first_bit, &pseudo_lock_minor_avail); | |
138 | *minor = first_bit; | |
139 | ||
140 | return 0; | |
141 | } | |
142 | ||
143 | /** | |
144 | * pseudo_lock_minor_release - Return minor number to available | |
145 | * @minor: The minor number made available | |
146 | */ | |
147 | static void pseudo_lock_minor_release(unsigned int minor) | |
148 | { | |
149 | __set_bit(minor, &pseudo_lock_minor_avail); | |
150 | } | |
151 | ||
152 | /** | |
153 | * region_find_by_minor - Locate a pseudo-lock region by inode minor number | |
154 | * @minor: The minor number of the device representing pseudo-locked region | |
155 | * | |
156 | * When the character device is accessed we need to determine which | |
157 | * pseudo-locked region it belongs to. This is done by matching the minor | |
158 | * number of the device to the pseudo-locked region it belongs. | |
159 | * | |
160 | * Minor numbers are assigned at the time a pseudo-locked region is associated | |
161 | * with a cache instance. | |
162 | * | |
163 | * Return: On success return pointer to resource group owning the pseudo-locked | |
164 | * region, NULL on failure. | |
165 | */ | |
166 | static struct rdtgroup *region_find_by_minor(unsigned int minor) | |
167 | { | |
168 | struct rdtgroup *rdtgrp, *rdtgrp_match = NULL; | |
169 | ||
170 | list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { | |
171 | if (rdtgrp->plr && rdtgrp->plr->minor == minor) { | |
172 | rdtgrp_match = rdtgrp; | |
173 | break; | |
174 | } | |
175 | } | |
176 | return rdtgrp_match; | |
177 | } | |
178 | ||
6fc0de37 | 179 | /** |
f9b871c8 | 180 | * struct pseudo_lock_pm_req - A power management QoS request list entry |
6fc0de37 RC |
181 | * @list: Entry within the @pm_reqs list for a pseudo-locked region |
182 | * @req: PM QoS request | |
183 | */ | |
184 | struct pseudo_lock_pm_req { | |
185 | struct list_head list; | |
186 | struct dev_pm_qos_request req; | |
187 | }; | |
188 | ||
189 | static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr) | |
190 | { | |
191 | struct pseudo_lock_pm_req *pm_req, *next; | |
192 | ||
193 | list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) { | |
194 | dev_pm_qos_remove_request(&pm_req->req); | |
195 | list_del(&pm_req->list); | |
196 | kfree(pm_req); | |
197 | } | |
198 | } | |
199 | ||
200 | /** | |
201 | * pseudo_lock_cstates_constrain - Restrict cores from entering C6 | |
f9b871c8 | 202 | * @plr: Pseudo-locked region |
6fc0de37 RC |
203 | * |
204 | * To prevent the cache from being affected by power management entering | |
205 | * C6 has to be avoided. This is accomplished by requesting a latency | |
206 | * requirement lower than lowest C6 exit latency of all supported | |
207 | * platforms as found in the cpuidle state tables in the intel_idle driver. | |
208 | * At this time it is possible to do so with a single latency requirement | |
209 | * for all supported platforms. | |
210 | * | |
211 | * Since Goldmont is supported, which is affected by X86_BUG_MONITOR, | |
212 | * the ACPI latencies need to be considered while keeping in mind that C2 | |
213 | * may be set to map to deeper sleep states. In this case the latency | |
214 | * requirement needs to prevent entering C2 also. | |
f9b871c8 FDF |
215 | * |
216 | * Return: 0 on success, <0 on failure | |
6fc0de37 RC |
217 | */ |
218 | static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr) | |
219 | { | |
220 | struct pseudo_lock_pm_req *pm_req; | |
221 | int cpu; | |
222 | int ret; | |
223 | ||
224 | for_each_cpu(cpu, &plr->d->cpu_mask) { | |
225 | pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL); | |
226 | if (!pm_req) { | |
723f1a0d | 227 | rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n"); |
6fc0de37 RC |
228 | ret = -ENOMEM; |
229 | goto out_err; | |
230 | } | |
231 | ret = dev_pm_qos_add_request(get_cpu_device(cpu), | |
232 | &pm_req->req, | |
233 | DEV_PM_QOS_RESUME_LATENCY, | |
234 | 30); | |
235 | if (ret < 0) { | |
723f1a0d | 236 | rdt_last_cmd_printf("Failed to add latency req CPU%d\n", |
6fc0de37 RC |
237 | cpu); |
238 | kfree(pm_req); | |
239 | ret = -1; | |
240 | goto out_err; | |
241 | } | |
242 | list_add(&pm_req->list, &plr->pm_reqs); | |
243 | } | |
244 | ||
245 | return 0; | |
246 | ||
247 | out_err: | |
248 | pseudo_lock_cstates_relax(plr); | |
249 | return ret; | |
250 | } | |
251 | ||
ce730f1c RC |
252 | /** |
253 | * pseudo_lock_region_clear - Reset pseudo-lock region data | |
254 | * @plr: pseudo-lock region | |
255 | * | |
256 | * All content of the pseudo-locked region is reset - any memory allocated | |
257 | * freed. | |
258 | * | |
259 | * Return: void | |
260 | */ | |
261 | static void pseudo_lock_region_clear(struct pseudo_lock_region *plr) | |
262 | { | |
263 | plr->size = 0; | |
264 | plr->line_size = 0; | |
265 | kfree(plr->kmem); | |
266 | plr->kmem = NULL; | |
32150edd | 267 | plr->s = NULL; |
ce730f1c RC |
268 | if (plr->d) |
269 | plr->d->plr = NULL; | |
270 | plr->d = NULL; | |
271 | plr->cbm = 0; | |
272 | plr->debugfs_dir = NULL; | |
273 | } | |
274 | ||
018961ae RC |
275 | /** |
276 | * pseudo_lock_region_init - Initialize pseudo-lock region information | |
277 | * @plr: pseudo-lock region | |
278 | * | |
279 | * Called after user provided a schemata to be pseudo-locked. From the | |
280 | * schemata the &struct pseudo_lock_region is on entry already initialized | |
281 | * with the resource, domain, and capacity bitmask. Here the information | |
282 | * required for pseudo-locking is deduced from this data and &struct | |
283 | * pseudo_lock_region initialized further. This information includes: | |
284 | * - size in bytes of the region to be pseudo-locked | |
285 | * - cache line size to know the stride with which data needs to be accessed | |
286 | * to be pseudo-locked | |
287 | * - a cpu associated with the cache instance on which the pseudo-locking | |
288 | * flow can be executed | |
289 | * | |
290 | * Return: 0 on success, <0 on failure. Descriptive error will be written | |
291 | * to last_cmd_status buffer. | |
292 | */ | |
293 | static int pseudo_lock_region_init(struct pseudo_lock_region *plr) | |
294 | { | |
295 | struct cpu_cacheinfo *ci; | |
546d3c74 | 296 | int ret; |
018961ae RC |
297 | int i; |
298 | ||
299 | /* Pick the first cpu we find that is associated with the cache. */ | |
300 | plr->cpu = cpumask_first(&plr->d->cpu_mask); | |
301 | ||
302 | if (!cpu_online(plr->cpu)) { | |
723f1a0d | 303 | rdt_last_cmd_printf("CPU %u associated with cache not online\n", |
018961ae | 304 | plr->cpu); |
546d3c74 RC |
305 | ret = -ENODEV; |
306 | goto out_region; | |
018961ae RC |
307 | } |
308 | ||
309 | ci = get_cpu_cacheinfo(plr->cpu); | |
310 | ||
32150edd | 311 | plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); |
018961ae RC |
312 | |
313 | for (i = 0; i < ci->num_leaves; i++) { | |
32150edd | 314 | if (ci->info_list[i].level == plr->s->res->cache_level) { |
018961ae RC |
315 | plr->line_size = ci->info_list[i].coherency_line_size; |
316 | return 0; | |
317 | } | |
318 | } | |
319 | ||
546d3c74 | 320 | ret = -1; |
723f1a0d | 321 | rdt_last_cmd_puts("Unable to determine cache line size\n"); |
546d3c74 RC |
322 | out_region: |
323 | pseudo_lock_region_clear(plr); | |
324 | return ret; | |
018961ae RC |
325 | } |
326 | ||
63657c1c RC |
327 | /** |
328 | * pseudo_lock_init - Initialize a pseudo-lock region | |
329 | * @rdtgrp: resource group to which new pseudo-locked region will belong | |
330 | * | |
331 | * A pseudo-locked region is associated with a resource group. When this | |
332 | * association is created the pseudo-locked region is initialized. The | |
333 | * details of the pseudo-locked region are not known at this time so only | |
334 | * allocation is done and association established. | |
335 | * | |
336 | * Return: 0 on success, <0 on failure | |
337 | */ | |
338 | static int pseudo_lock_init(struct rdtgroup *rdtgrp) | |
339 | { | |
340 | struct pseudo_lock_region *plr; | |
341 | ||
342 | plr = kzalloc(sizeof(*plr), GFP_KERNEL); | |
343 | if (!plr) | |
344 | return -ENOMEM; | |
345 | ||
018961ae | 346 | init_waitqueue_head(&plr->lock_thread_wq); |
6fc0de37 | 347 | INIT_LIST_HEAD(&plr->pm_reqs); |
63657c1c RC |
348 | rdtgrp->plr = plr; |
349 | return 0; | |
350 | } | |
351 | ||
018961ae RC |
352 | /** |
353 | * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked | |
354 | * @plr: pseudo-lock region | |
355 | * | |
356 | * Initialize the details required to set up the pseudo-locked region and | |
357 | * allocate the contiguous memory that will be pseudo-locked to the cache. | |
358 | * | |
359 | * Return: 0 on success, <0 on failure. Descriptive error will be written | |
360 | * to last_cmd_status buffer. | |
361 | */ | |
362 | static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr) | |
363 | { | |
364 | int ret; | |
365 | ||
366 | ret = pseudo_lock_region_init(plr); | |
367 | if (ret < 0) | |
368 | return ret; | |
369 | ||
370 | /* | |
371 | * We do not yet support contiguous regions larger than | |
372 | * KMALLOC_MAX_SIZE. | |
373 | */ | |
374 | if (plr->size > KMALLOC_MAX_SIZE) { | |
723f1a0d | 375 | rdt_last_cmd_puts("Requested region exceeds maximum size\n"); |
546d3c74 RC |
376 | ret = -E2BIG; |
377 | goto out_region; | |
018961ae RC |
378 | } |
379 | ||
380 | plr->kmem = kzalloc(plr->size, GFP_KERNEL); | |
381 | if (!plr->kmem) { | |
723f1a0d | 382 | rdt_last_cmd_puts("Unable to allocate memory\n"); |
546d3c74 RC |
383 | ret = -ENOMEM; |
384 | goto out_region; | |
018961ae RC |
385 | } |
386 | ||
546d3c74 RC |
387 | ret = 0; |
388 | goto out; | |
389 | out_region: | |
390 | pseudo_lock_region_clear(plr); | |
391 | out: | |
392 | return ret; | |
018961ae RC |
393 | } |
394 | ||
63657c1c RC |
395 | /** |
396 | * pseudo_lock_free - Free a pseudo-locked region | |
397 | * @rdtgrp: resource group to which pseudo-locked region belonged | |
398 | * | |
399 | * The pseudo-locked region's resources have already been released, or not | |
400 | * yet created at this point. Now it can be freed and disassociated from the | |
401 | * resource group. | |
402 | * | |
403 | * Return: void | |
404 | */ | |
405 | static void pseudo_lock_free(struct rdtgroup *rdtgrp) | |
406 | { | |
018961ae | 407 | pseudo_lock_region_clear(rdtgrp->plr); |
63657c1c RC |
408 | kfree(rdtgrp->plr); |
409 | rdtgrp->plr = NULL; | |
410 | } | |
411 | ||
018961ae RC |
412 | /** |
413 | * pseudo_lock_fn - Load kernel memory into cache | |
414 | * @_rdtgrp: resource group to which pseudo-lock region belongs | |
415 | * | |
416 | * This is the core pseudo-locking flow. | |
417 | * | |
418 | * First we ensure that the kernel memory cannot be found in the cache. | |
419 | * Then, while taking care that there will be as little interference as | |
420 | * possible, the memory to be loaded is accessed while core is running | |
421 | * with class of service set to the bitmask of the pseudo-locked region. | |
422 | * After this is complete no future CAT allocations will be allowed to | |
423 | * overlap with this bitmask. | |
424 | * | |
425 | * Local register variables are utilized to ensure that the memory region | |
426 | * to be locked is the only memory access made during the critical locking | |
427 | * loop. | |
428 | * | |
429 | * Return: 0. Waiter on waitqueue will be woken on completion. | |
430 | */ | |
431 | static int pseudo_lock_fn(void *_rdtgrp) | |
432 | { | |
433 | struct rdtgroup *rdtgrp = _rdtgrp; | |
434 | struct pseudo_lock_region *plr = rdtgrp->plr; | |
435 | u32 rmid_p, closid_p; | |
436 | unsigned long i; | |
499c8bb4 | 437 | u64 saved_msr; |
018961ae RC |
438 | #ifdef CONFIG_KASAN |
439 | /* | |
440 | * The registers used for local register variables are also used | |
441 | * when KASAN is active. When KASAN is active we use a regular | |
442 | * variable to ensure we always use a valid pointer, but the cost | |
443 | * is that this variable will enter the cache through evicting the | |
444 | * memory we are trying to lock into the cache. Thus expect lower | |
445 | * pseudo-locking success rate when KASAN is active. | |
446 | */ | |
447 | unsigned int line_size; | |
448 | unsigned int size; | |
449 | void *mem_r; | |
450 | #else | |
451 | register unsigned int line_size asm("esi"); | |
452 | register unsigned int size asm("edi"); | |
515f0453 | 453 | register void *mem_r asm(_ASM_BX); |
018961ae RC |
454 | #endif /* CONFIG_KASAN */ |
455 | ||
456 | /* | |
457 | * Make sure none of the allocated memory is cached. If it is we | |
458 | * will get a cache hit in below loop from outside of pseudo-locked | |
459 | * region. | |
460 | * wbinvd (as opposed to clflush/clflushopt) is required to | |
461 | * increase likelihood that allocated cache portion will be filled | |
462 | * with associated memory. | |
463 | */ | |
464 | native_wbinvd(); | |
465 | ||
466 | /* | |
467 | * Always called with interrupts enabled. By disabling interrupts | |
468 | * ensure that we will not be preempted during this critical section. | |
469 | */ | |
470 | local_irq_disable(); | |
471 | ||
472 | /* | |
473 | * Call wrmsr and rdmsr as directly as possible to avoid tracing | |
474 | * clobbering local register variables or affecting cache accesses. | |
475 | * | |
476 | * Disable the hardware prefetcher so that when the end of the memory | |
477 | * being pseudo-locked is reached the hardware will not read beyond | |
478 | * the buffer and evict pseudo-locked memory read earlier from the | |
479 | * cache. | |
480 | */ | |
499c8bb4 | 481 | saved_msr = __rdmsr(MSR_MISC_FEATURE_CONTROL); |
018961ae RC |
482 | __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0); |
483 | closid_p = this_cpu_read(pqr_state.cur_closid); | |
484 | rmid_p = this_cpu_read(pqr_state.cur_rmid); | |
485 | mem_r = plr->kmem; | |
486 | size = plr->size; | |
487 | line_size = plr->line_size; | |
488 | /* | |
489 | * Critical section begin: start by writing the closid associated | |
490 | * with the capacity bitmask of the cache region being | |
491 | * pseudo-locked followed by reading of kernel memory to load it | |
492 | * into the cache. | |
493 | */ | |
97fa21f6 | 494 | __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, rdtgrp->closid); |
018961ae RC |
495 | /* |
496 | * Cache was flushed earlier. Now access kernel memory to read it | |
497 | * into cache region associated with just activated plr->closid. | |
498 | * Loop over data twice: | |
499 | * - In first loop the cache region is shared with the page walker | |
500 | * as it populates the paging structure caches (including TLB). | |
501 | * - In the second loop the paging structure caches are used and | |
502 | * cache region is populated with the memory being referenced. | |
503 | */ | |
504 | for (i = 0; i < size; i += PAGE_SIZE) { | |
505 | /* | |
506 | * Add a barrier to prevent speculative execution of this | |
507 | * loop reading beyond the end of the buffer. | |
508 | */ | |
509 | rmb(); | |
510 | asm volatile("mov (%0,%1,1), %%eax\n\t" | |
511 | : | |
512 | : "r" (mem_r), "r" (i) | |
513 | : "%eax", "memory"); | |
514 | } | |
515 | for (i = 0; i < size; i += line_size) { | |
516 | /* | |
517 | * Add a barrier to prevent speculative execution of this | |
518 | * loop reading beyond the end of the buffer. | |
519 | */ | |
520 | rmb(); | |
521 | asm volatile("mov (%0,%1,1), %%eax\n\t" | |
522 | : | |
523 | : "r" (mem_r), "r" (i) | |
524 | : "%eax", "memory"); | |
525 | } | |
526 | /* | |
527 | * Critical section end: restore closid with capacity bitmask that | |
528 | * does not overlap with pseudo-locked region. | |
529 | */ | |
97fa21f6 | 530 | __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, closid_p); |
018961ae RC |
531 | |
532 | /* Re-enable the hardware prefetcher(s) */ | |
499c8bb4 | 533 | wrmsrl(MSR_MISC_FEATURE_CONTROL, saved_msr); |
018961ae RC |
534 | local_irq_enable(); |
535 | ||
536 | plr->thread_done = 1; | |
537 | wake_up_interruptible(&plr->lock_thread_wq); | |
538 | return 0; | |
539 | } | |
540 | ||
bbcee99b RC |
541 | /** |
542 | * rdtgroup_monitor_in_progress - Test if monitoring in progress | |
f9b871c8 | 543 | * @rdtgrp: resource group being queried |
bbcee99b RC |
544 | * |
545 | * Return: 1 if monitor groups have been created for this resource | |
546 | * group, 0 otherwise. | |
547 | */ | |
63657c1c | 548 | static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp) |
bbcee99b RC |
549 | { |
550 | return !list_empty(&rdtgrp->mon.crdtgrp_list); | |
551 | } | |
552 | ||
2a5d76a4 RC |
553 | /** |
554 | * rdtgroup_locksetup_user_restrict - Restrict user access to group | |
555 | * @rdtgrp: resource group needing access restricted | |
556 | * | |
557 | * A resource group used for cache pseudo-locking cannot have cpus or tasks | |
558 | * assigned to it. This is communicated to the user by restricting access | |
559 | * to all the files that can be used to make such changes. | |
560 | * | |
561 | * Permissions restored with rdtgroup_locksetup_user_restore() | |
562 | * | |
563 | * Return: 0 on success, <0 on failure. If a failure occurs during the | |
564 | * restriction of access an attempt will be made to restore permissions but | |
565 | * the state of the mode of these files will be uncertain when a failure | |
566 | * occurs. | |
567 | */ | |
63657c1c | 568 | static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp) |
2a5d76a4 RC |
569 | { |
570 | int ret; | |
571 | ||
572 | ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks"); | |
573 | if (ret) | |
574 | return ret; | |
575 | ||
576 | ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus"); | |
577 | if (ret) | |
578 | goto err_tasks; | |
579 | ||
580 | ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list"); | |
581 | if (ret) | |
582 | goto err_cpus; | |
583 | ||
30017b60 | 584 | if (resctrl_arch_mon_capable()) { |
2a5d76a4 RC |
585 | ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups"); |
586 | if (ret) | |
587 | goto err_cpus_list; | |
588 | } | |
589 | ||
590 | ret = 0; | |
591 | goto out; | |
592 | ||
593 | err_cpus_list: | |
392487de | 594 | rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777); |
2a5d76a4 | 595 | err_cpus: |
392487de | 596 | rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777); |
2a5d76a4 | 597 | err_tasks: |
392487de | 598 | rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777); |
2a5d76a4 RC |
599 | out: |
600 | return ret; | |
601 | } | |
602 | ||
603 | /** | |
604 | * rdtgroup_locksetup_user_restore - Restore user access to group | |
605 | * @rdtgrp: resource group needing access restored | |
606 | * | |
607 | * Restore all file access previously removed using | |
608 | * rdtgroup_locksetup_user_restrict() | |
609 | * | |
610 | * Return: 0 on success, <0 on failure. If a failure occurs during the | |
611 | * restoration of access an attempt will be made to restrict permissions | |
612 | * again but the state of the mode of these files will be uncertain when | |
613 | * a failure occurs. | |
614 | */ | |
63657c1c | 615 | static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp) |
2a5d76a4 RC |
616 | { |
617 | int ret; | |
618 | ||
392487de | 619 | ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777); |
2a5d76a4 RC |
620 | if (ret) |
621 | return ret; | |
622 | ||
392487de | 623 | ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777); |
2a5d76a4 RC |
624 | if (ret) |
625 | goto err_tasks; | |
626 | ||
392487de | 627 | ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777); |
2a5d76a4 RC |
628 | if (ret) |
629 | goto err_cpus; | |
630 | ||
30017b60 | 631 | if (resctrl_arch_mon_capable()) { |
392487de | 632 | ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777); |
2a5d76a4 RC |
633 | if (ret) |
634 | goto err_cpus_list; | |
635 | } | |
636 | ||
637 | ret = 0; | |
638 | goto out; | |
639 | ||
640 | err_cpus_list: | |
641 | rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list"); | |
642 | err_cpus: | |
643 | rdtgroup_kn_mode_restrict(rdtgrp, "cpus"); | |
644 | err_tasks: | |
645 | rdtgroup_kn_mode_restrict(rdtgrp, "tasks"); | |
646 | out: | |
647 | return ret; | |
648 | } | |
63657c1c RC |
649 | |
650 | /** | |
651 | * rdtgroup_locksetup_enter - Resource group enters locksetup mode | |
652 | * @rdtgrp: resource group requested to enter locksetup mode | |
653 | * | |
654 | * A resource group enters locksetup mode to reflect that it would be used | |
655 | * to represent a pseudo-locked region and is in the process of being set | |
656 | * up to do so. A resource group used for a pseudo-locked region would | |
657 | * lose the closid associated with it so we cannot allow it to have any | |
658 | * tasks or cpus assigned nor permit tasks or cpus to be assigned in the | |
659 | * future. Monitoring of a pseudo-locked region is not allowed either. | |
660 | * | |
661 | * The above and more restrictions on a pseudo-locked region are checked | |
662 | * for and enforced before the resource group enters the locksetup mode. | |
663 | * | |
664 | * Returns: 0 if the resource group successfully entered locksetup mode, <0 | |
665 | * on failure. On failure the last_cmd_status buffer is updated with text to | |
666 | * communicate details of failure to the user. | |
667 | */ | |
668 | int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) | |
669 | { | |
670 | int ret; | |
671 | ||
672 | /* | |
673 | * The default resource group can neither be removed nor lose the | |
674 | * default closid associated with it. | |
675 | */ | |
676 | if (rdtgrp == &rdtgroup_default) { | |
723f1a0d | 677 | rdt_last_cmd_puts("Cannot pseudo-lock default group\n"); |
63657c1c RC |
678 | return -EINVAL; |
679 | } | |
680 | ||
681 | /* | |
682 | * Cache Pseudo-locking not supported when CDP is enabled. | |
683 | * | |
684 | * Some things to consider if you would like to enable this | |
685 | * support (using L3 CDP as example): | |
686 | * - When CDP is enabled two separate resources are exposed, | |
687 | * L3DATA and L3CODE, but they are actually on the same cache. | |
688 | * The implication for pseudo-locking is that if a | |
689 | * pseudo-locked region is created on a domain of one | |
690 | * resource (eg. L3CODE), then a pseudo-locked region cannot | |
691 | * be created on that same domain of the other resource | |
692 | * (eg. L3DATA). This is because the creation of a | |
693 | * pseudo-locked region involves a call to wbinvd that will | |
694 | * affect all cache allocations on particular domain. | |
695 | * - Considering the previous, it may be possible to only | |
696 | * expose one of the CDP resources to pseudo-locking and | |
697 | * hide the other. For example, we could consider to only | |
698 | * expose L3DATA and since the L3 cache is unified it is | |
699 | * still possible to place instructions there are execute it. | |
700 | * - If only one region is exposed to pseudo-locking we should | |
701 | * still keep in mind that availability of a portion of cache | |
702 | * for pseudo-locking should take into account both resources. | |
703 | * Similarly, if a pseudo-locked region is created in one | |
704 | * resource, the portion of cache used by it should be made | |
705 | * unavailable to all future allocations from both resources. | |
706 | */ | |
c091e907 JM |
707 | if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) || |
708 | resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) { | |
63657c1c RC |
709 | rdt_last_cmd_puts("CDP enabled\n"); |
710 | return -EINVAL; | |
711 | } | |
712 | ||
f2a17729 RC |
713 | /* |
714 | * Not knowing the bits to disable prefetching implies that this | |
715 | * platform does not support Cache Pseudo-Locking. | |
716 | */ | |
717 | prefetch_disable_bits = get_prefetch_disable_bits(); | |
718 | if (prefetch_disable_bits == 0) { | |
723f1a0d | 719 | rdt_last_cmd_puts("Pseudo-locking not supported\n"); |
f2a17729 RC |
720 | return -EINVAL; |
721 | } | |
722 | ||
63657c1c | 723 | if (rdtgroup_monitor_in_progress(rdtgrp)) { |
723f1a0d | 724 | rdt_last_cmd_puts("Monitoring in progress\n"); |
63657c1c RC |
725 | return -EINVAL; |
726 | } | |
727 | ||
728 | if (rdtgroup_tasks_assigned(rdtgrp)) { | |
723f1a0d | 729 | rdt_last_cmd_puts("Tasks assigned to resource group\n"); |
63657c1c RC |
730 | return -EINVAL; |
731 | } | |
732 | ||
733 | if (!cpumask_empty(&rdtgrp->cpu_mask)) { | |
734 | rdt_last_cmd_puts("CPUs assigned to resource group\n"); | |
735 | return -EINVAL; | |
736 | } | |
737 | ||
738 | if (rdtgroup_locksetup_user_restrict(rdtgrp)) { | |
723f1a0d | 739 | rdt_last_cmd_puts("Unable to modify resctrl permissions\n"); |
63657c1c RC |
740 | return -EIO; |
741 | } | |
742 | ||
743 | ret = pseudo_lock_init(rdtgrp); | |
744 | if (ret) { | |
723f1a0d | 745 | rdt_last_cmd_puts("Unable to init pseudo-lock region\n"); |
63657c1c RC |
746 | goto out_release; |
747 | } | |
748 | ||
749 | /* | |
750 | * If this system is capable of monitoring a rmid would have been | |
751 | * allocated when the control group was created. This is not needed | |
752 | * anymore when this group would be used for pseudo-locking. This | |
753 | * is safe to call on platforms not capable of monitoring. | |
754 | */ | |
40fc735b | 755 | free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); |
63657c1c RC |
756 | |
757 | ret = 0; | |
758 | goto out; | |
759 | ||
760 | out_release: | |
761 | rdtgroup_locksetup_user_restore(rdtgrp); | |
762 | out: | |
763 | return ret; | |
764 | } | |
765 | ||
766 | /** | |
767 | * rdtgroup_locksetup_exit - resource group exist locksetup mode | |
768 | * @rdtgrp: resource group | |
769 | * | |
770 | * When a resource group exits locksetup mode the earlier restrictions are | |
771 | * lifted. | |
772 | * | |
773 | * Return: 0 on success, <0 on failure | |
774 | */ | |
775 | int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) | |
776 | { | |
777 | int ret; | |
778 | ||
30017b60 | 779 | if (resctrl_arch_mon_capable()) { |
c4c0376e | 780 | ret = alloc_rmid(rdtgrp->closid); |
63657c1c | 781 | if (ret < 0) { |
723f1a0d | 782 | rdt_last_cmd_puts("Out of RMIDs\n"); |
63657c1c RC |
783 | return ret; |
784 | } | |
785 | rdtgrp->mon.rmid = ret; | |
786 | } | |
787 | ||
788 | ret = rdtgroup_locksetup_user_restore(rdtgrp); | |
789 | if (ret) { | |
40fc735b | 790 | free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); |
63657c1c RC |
791 | return ret; |
792 | } | |
793 | ||
794 | pseudo_lock_free(rdtgrp); | |
795 | return 0; | |
796 | } | |
72d50505 RC |
797 | |
798 | /** | |
799 | * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked | |
800 | * @d: RDT domain | |
49e00eee | 801 | * @cbm: CBM to test |
72d50505 | 802 | * |
49e00eee RC |
803 | * @d represents a cache instance and @cbm a capacity bitmask that is |
804 | * considered for it. Determine if @cbm overlaps with any existing | |
72d50505 RC |
805 | * pseudo-locked region on @d. |
806 | * | |
49e00eee RC |
807 | * @cbm is unsigned long, even if only 32 bits are used, to make the |
808 | * bitmap functions work correctly. | |
809 | * | |
810 | * Return: true if @cbm overlaps with pseudo-locked region on @d, false | |
72d50505 RC |
811 | * otherwise. |
812 | */ | |
49e00eee | 813 | bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) |
72d50505 | 814 | { |
72d50505 | 815 | unsigned int cbm_len; |
49e00eee | 816 | unsigned long cbm_b; |
72d50505 RC |
817 | |
818 | if (d->plr) { | |
32150edd | 819 | cbm_len = d->plr->s->res->cache.cbm_len; |
49e00eee RC |
820 | cbm_b = d->plr->cbm; |
821 | if (bitmap_intersects(&cbm, &cbm_b, cbm_len)) | |
72d50505 RC |
822 | return true; |
823 | } | |
72d50505 RC |
824 | return false; |
825 | } | |
826 | ||
827 | /** | |
828 | * rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy | |
829 | * @d: RDT domain under test | |
830 | * | |
831 | * The setup of a pseudo-locked region affects all cache instances within | |
832 | * the hierarchy of the region. It is thus essential to know if any | |
833 | * pseudo-locked regions exist within a cache hierarchy to prevent any | |
834 | * attempts to create new pseudo-locked regions in the same hierarchy. | |
835 | * | |
836 | * Return: true if a pseudo-locked region exists in the hierarchy of @d or | |
837 | * if it is not possible to test due to memory allocation issue, | |
838 | * false otherwise. | |
839 | */ | |
840 | bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) | |
841 | { | |
842 | cpumask_var_t cpu_with_psl; | |
843 | struct rdt_resource *r; | |
844 | struct rdt_domain *d_i; | |
845 | bool ret = false; | |
846 | ||
fb700810 JM |
847 | /* Walking r->domains, ensure it can't race with cpuhp */ |
848 | lockdep_assert_cpus_held(); | |
849 | ||
72d50505 RC |
850 | if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL)) |
851 | return true; | |
852 | ||
853 | /* | |
854 | * First determine which cpus have pseudo-locked regions | |
855 | * associated with them. | |
856 | */ | |
4d269ed4 | 857 | for_each_alloc_capable_rdt_resource(r) { |
72d50505 RC |
858 | list_for_each_entry(d_i, &r->domains, list) { |
859 | if (d_i->plr) | |
860 | cpumask_or(cpu_with_psl, cpu_with_psl, | |
861 | &d_i->cpu_mask); | |
862 | } | |
863 | } | |
864 | ||
865 | /* | |
866 | * Next test if new pseudo-locked region would intersect with | |
867 | * existing region. | |
868 | */ | |
869 | if (cpumask_intersects(&d->cpu_mask, cpu_with_psl)) | |
870 | ret = true; | |
871 | ||
872 | free_cpumask_var(cpu_with_psl); | |
873 | return ret; | |
874 | } | |
018961ae | 875 | |
443810fe RC |
876 | /** |
877 | * measure_cycles_lat_fn - Measure cycle latency to read pseudo-locked memory | |
878 | * @_plr: pseudo-lock region to measure | |
879 | * | |
880 | * There is no deterministic way to test if a memory region is cached. One | |
881 | * way is to measure how long it takes to read the memory, the speed of | |
882 | * access is a good way to learn how close to the cpu the data was. Even | |
883 | * more, if the prefetcher is disabled and the memory is read at a stride | |
884 | * of half the cache line, then a cache miss will be easy to spot since the | |
885 | * read of the first half would be significantly slower than the read of | |
886 | * the second half. | |
887 | * | |
888 | * Return: 0. Waiter on waitqueue will be woken on completion. | |
889 | */ | |
890 | static int measure_cycles_lat_fn(void *_plr) | |
891 | { | |
892 | struct pseudo_lock_region *plr = _plr; | |
499c8bb4 | 893 | u32 saved_low, saved_high; |
443810fe RC |
894 | unsigned long i; |
895 | u64 start, end; | |
443810fe | 896 | void *mem_r; |
443810fe RC |
897 | |
898 | local_irq_disable(); | |
899 | /* | |
b5e4274e | 900 | * Disable hardware prefetchers. |
443810fe | 901 | */ |
499c8bb4 | 902 | rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); |
b5e4274e RC |
903 | wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0); |
904 | mem_r = READ_ONCE(plr->kmem); | |
443810fe RC |
905 | /* |
906 | * Dummy execute of the time measurement to load the needed | |
907 | * instructions into the L1 instruction cache. | |
908 | */ | |
909 | start = rdtsc_ordered(); | |
910 | for (i = 0; i < plr->size; i += 32) { | |
911 | start = rdtsc_ordered(); | |
912 | asm volatile("mov (%0,%1,1), %%eax\n\t" | |
913 | : | |
914 | : "r" (mem_r), "r" (i) | |
915 | : "%eax", "memory"); | |
916 | end = rdtsc_ordered(); | |
917 | trace_pseudo_lock_mem_latency((u32)(end - start)); | |
918 | } | |
499c8bb4 | 919 | wrmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); |
443810fe RC |
920 | local_irq_enable(); |
921 | plr->thread_done = 1; | |
922 | wake_up_interruptible(&plr->lock_thread_wq); | |
923 | return 0; | |
924 | } | |
925 | ||
0a701c9d RC |
926 | /* |
927 | * Create a perf_event_attr for the hit and miss perf events that will | |
928 | * be used during the performance measurement. A perf_event maintains | |
929 | * a pointer to its perf_event_attr so a unique attribute structure is | |
930 | * created for each perf_event. | |
931 | * | |
932 | * The actual configuration of the event is set right before use in order | |
933 | * to use the X86_CONFIG macro. | |
934 | */ | |
dd45407c | 935 | static struct perf_event_attr perf_miss_attr = { |
0a701c9d RC |
936 | .type = PERF_TYPE_RAW, |
937 | .size = sizeof(struct perf_event_attr), | |
938 | .pinned = 1, | |
939 | .disabled = 0, | |
940 | .exclude_user = 1, | |
941 | }; | |
942 | ||
dd45407c | 943 | static struct perf_event_attr perf_hit_attr = { |
0a701c9d RC |
944 | .type = PERF_TYPE_RAW, |
945 | .size = sizeof(struct perf_event_attr), | |
946 | .pinned = 1, | |
947 | .disabled = 0, | |
948 | .exclude_user = 1, | |
949 | }; | |
950 | ||
dd45407c RC |
951 | struct residency_counts { |
952 | u64 miss_before, hits_before; | |
953 | u64 miss_after, hits_after; | |
954 | }; | |
955 | ||
956 | static int measure_residency_fn(struct perf_event_attr *miss_attr, | |
957 | struct perf_event_attr *hit_attr, | |
958 | struct pseudo_lock_region *plr, | |
959 | struct residency_counts *counts) | |
8a2fc0e1 | 960 | { |
dd45407c RC |
961 | u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0; |
962 | struct perf_event *miss_event, *hit_event; | |
963 | int hit_pmcnum, miss_pmcnum; | |
499c8bb4 | 964 | u32 saved_low, saved_high; |
8a2fc0e1 RC |
965 | unsigned int line_size; |
966 | unsigned int size; | |
b5e4274e | 967 | unsigned long i; |
8a2fc0e1 | 968 | void *mem_r; |
dd45407c | 969 | u64 tmp; |
8a2fc0e1 | 970 | |
dd45407c RC |
971 | miss_event = perf_event_create_kernel_counter(miss_attr, plr->cpu, |
972 | NULL, NULL, NULL); | |
973 | if (IS_ERR(miss_event)) | |
974 | goto out; | |
8a2fc0e1 | 975 | |
dd45407c RC |
976 | hit_event = perf_event_create_kernel_counter(hit_attr, plr->cpu, |
977 | NULL, NULL, NULL); | |
978 | if (IS_ERR(hit_event)) | |
979 | goto out_miss; | |
980 | ||
981 | local_irq_disable(); | |
8a2fc0e1 | 982 | /* |
dd45407c RC |
983 | * Check any possible error state of events used by performing |
984 | * one local read. | |
8a2fc0e1 | 985 | */ |
dd45407c RC |
986 | if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) { |
987 | local_irq_enable(); | |
988 | goto out_hit; | |
989 | } | |
990 | if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) { | |
991 | local_irq_enable(); | |
992 | goto out_hit; | |
8a2fc0e1 RC |
993 | } |
994 | ||
8a2fc0e1 | 995 | /* |
b5e4274e | 996 | * Disable hardware prefetchers. |
8a2fc0e1 | 997 | */ |
499c8bb4 | 998 | rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); |
b5e4274e | 999 | wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0); |
dd45407c RC |
1000 | |
1001 | /* Initialize rest of local variables */ | |
1002 | /* | |
1003 | * Performance event has been validated right before this with | |
1004 | * interrupts disabled - it is thus safe to read the counter index. | |
1005 | */ | |
1006 | miss_pmcnum = x86_perf_rdpmc_index(miss_event); | |
1007 | hit_pmcnum = x86_perf_rdpmc_index(hit_event); | |
1008 | line_size = READ_ONCE(plr->line_size); | |
b5e4274e RC |
1009 | mem_r = READ_ONCE(plr->kmem); |
1010 | size = READ_ONCE(plr->size); | |
dd45407c RC |
1011 | |
1012 | /* | |
1013 | * Read counter variables twice - first to load the instructions | |
1014 | * used in L1 cache, second to capture accurate value that does not | |
1015 | * include cache misses incurred because of instruction loads. | |
1016 | */ | |
1017 | rdpmcl(hit_pmcnum, hits_before); | |
1018 | rdpmcl(miss_pmcnum, miss_before); | |
1019 | /* | |
1020 | * From SDM: Performing back-to-back fast reads are not guaranteed | |
1021 | * to be monotonic. | |
1022 | * Use LFENCE to ensure all previous instructions are retired | |
1023 | * before proceeding. | |
1024 | */ | |
1025 | rmb(); | |
1026 | rdpmcl(hit_pmcnum, hits_before); | |
1027 | rdpmcl(miss_pmcnum, miss_before); | |
1028 | /* | |
1029 | * Use LFENCE to ensure all previous instructions are retired | |
1030 | * before proceeding. | |
1031 | */ | |
1032 | rmb(); | |
8a2fc0e1 | 1033 | for (i = 0; i < size; i += line_size) { |
dd45407c RC |
1034 | /* |
1035 | * Add a barrier to prevent speculative execution of this | |
1036 | * loop reading beyond the end of the buffer. | |
1037 | */ | |
1038 | rmb(); | |
8a2fc0e1 RC |
1039 | asm volatile("mov (%0,%1,1), %%eax\n\t" |
1040 | : | |
1041 | : "r" (mem_r), "r" (i) | |
1042 | : "%eax", "memory"); | |
1043 | } | |
1044 | /* | |
dd45407c RC |
1045 | * Use LFENCE to ensure all previous instructions are retired |
1046 | * before proceeding. | |
8a2fc0e1 | 1047 | */ |
dd45407c RC |
1048 | rmb(); |
1049 | rdpmcl(hit_pmcnum, hits_after); | |
1050 | rdpmcl(miss_pmcnum, miss_after); | |
1051 | /* | |
1052 | * Use LFENCE to ensure all previous instructions are retired | |
1053 | * before proceeding. | |
1054 | */ | |
1055 | rmb(); | |
1056 | /* Re-enable hardware prefetchers */ | |
499c8bb4 | 1057 | wrmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); |
8a2fc0e1 | 1058 | local_irq_enable(); |
dd45407c RC |
1059 | out_hit: |
1060 | perf_event_release_kernel(hit_event); | |
1061 | out_miss: | |
1062 | perf_event_release_kernel(miss_event); | |
1063 | out: | |
1064 | /* | |
1065 | * All counts will be zero on failure. | |
1066 | */ | |
1067 | counts->miss_before = miss_before; | |
1068 | counts->hits_before = hits_before; | |
1069 | counts->miss_after = miss_after; | |
1070 | counts->hits_after = hits_after; | |
1071 | return 0; | |
1072 | } | |
1073 | ||
1074 | static int measure_l2_residency(void *_plr) | |
1075 | { | |
1076 | struct pseudo_lock_region *plr = _plr; | |
1077 | struct residency_counts counts = {0}; | |
1078 | ||
1079 | /* | |
1080 | * Non-architectural event for the Goldmont Microarchitecture | |
1081 | * from Intel x86 Architecture Software Developer Manual (SDM): | |
1082 | * MEM_LOAD_UOPS_RETIRED D1H (event number) | |
1083 | * Umask values: | |
1084 | * L2_HIT 02H | |
1085 | * L2_MISS 10H | |
1086 | */ | |
1087 | switch (boot_cpu_data.x86_model) { | |
1088 | case INTEL_FAM6_ATOM_GOLDMONT: | |
f2c4db1b | 1089 | case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
dd45407c RC |
1090 | perf_miss_attr.config = X86_CONFIG(.event = 0xd1, |
1091 | .umask = 0x10); | |
1092 | perf_hit_attr.config = X86_CONFIG(.event = 0xd1, | |
1093 | .umask = 0x2); | |
1094 | break; | |
1095 | default: | |
1096 | goto out; | |
1097 | } | |
1098 | ||
1099 | measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts); | |
1100 | /* | |
1101 | * If a failure prevented the measurements from succeeding | |
1102 | * tracepoints will still be written and all counts will be zero. | |
1103 | */ | |
1104 | trace_pseudo_lock_l2(counts.hits_after - counts.hits_before, | |
1105 | counts.miss_after - counts.miss_before); | |
1106 | out: | |
1107 | plr->thread_done = 1; | |
1108 | wake_up_interruptible(&plr->lock_thread_wq); | |
1109 | return 0; | |
1110 | } | |
1111 | ||
1112 | static int measure_l3_residency(void *_plr) | |
1113 | { | |
1114 | struct pseudo_lock_region *plr = _plr; | |
1115 | struct residency_counts counts = {0}; | |
1116 | ||
f3be1e7b | 1117 | /* |
dd45407c RC |
1118 | * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event |
1119 | * has two "no fix" errata associated with it: BDM35 and BDM100. On | |
1120 | * this platform the following events are used instead: | |
1121 | * LONGEST_LAT_CACHE 2EH (Documented in SDM) | |
1122 | * REFERENCE 4FH | |
1123 | * MISS 41H | |
f3be1e7b | 1124 | */ |
dd45407c RC |
1125 | |
1126 | switch (boot_cpu_data.x86_model) { | |
1127 | case INTEL_FAM6_BROADWELL_X: | |
1128 | /* On BDW the hit event counts references, not hits */ | |
1129 | perf_hit_attr.config = X86_CONFIG(.event = 0x2e, | |
1130 | .umask = 0x4f); | |
1131 | perf_miss_attr.config = X86_CONFIG(.event = 0x2e, | |
1132 | .umask = 0x41); | |
1133 | break; | |
1134 | default: | |
1135 | goto out; | |
1136 | } | |
1137 | ||
1138 | measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts); | |
1139 | /* | |
1140 | * If a failure prevented the measurements from succeeding | |
1141 | * tracepoints will still be written and all counts will be zero. | |
1142 | */ | |
1143 | ||
1144 | counts.miss_after -= counts.miss_before; | |
1145 | if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) { | |
1146 | /* | |
1147 | * On BDW references and misses are counted, need to adjust. | |
1148 | * Sometimes the "hits" counter is a bit more than the | |
1149 | * references, for example, x references but x + 1 hits. | |
1150 | * To not report invalid hit values in this case we treat | |
1151 | * that as misses equal to references. | |
1152 | */ | |
1153 | /* First compute the number of cache references measured */ | |
1154 | counts.hits_after -= counts.hits_before; | |
1155 | /* Next convert references to cache hits */ | |
1156 | counts.hits_after -= min(counts.miss_after, counts.hits_after); | |
1157 | } else { | |
1158 | counts.hits_after -= counts.hits_before; | |
f3be1e7b | 1159 | } |
8a2fc0e1 | 1160 | |
dd45407c | 1161 | trace_pseudo_lock_l3(counts.hits_after, counts.miss_after); |
8a2fc0e1 RC |
1162 | out: |
1163 | plr->thread_done = 1; | |
1164 | wake_up_interruptible(&plr->lock_thread_wq); | |
1165 | return 0; | |
1166 | } | |
1167 | ||
443810fe RC |
1168 | /** |
1169 | * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region | |
f9b871c8 FDF |
1170 | * @rdtgrp: Resource group to which the pseudo-locked region belongs. |
1171 | * @sel: Selector of which measurement to perform on a pseudo-locked region. | |
443810fe RC |
1172 | * |
1173 | * The measurement of latency to access a pseudo-locked region should be | |
1174 | * done from a cpu that is associated with that pseudo-locked region. | |
1175 | * Determine which cpu is associated with this region and start a thread on | |
1176 | * that cpu to perform the measurement, wait for that thread to complete. | |
1177 | * | |
1178 | * Return: 0 on success, <0 on failure | |
1179 | */ | |
8a2fc0e1 | 1180 | static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) |
443810fe RC |
1181 | { |
1182 | struct pseudo_lock_region *plr = rdtgrp->plr; | |
1183 | struct task_struct *thread; | |
1184 | unsigned int cpu; | |
8a2fc0e1 | 1185 | int ret = -1; |
443810fe RC |
1186 | |
1187 | cpus_read_lock(); | |
1188 | mutex_lock(&rdtgroup_mutex); | |
1189 | ||
1190 | if (rdtgrp->flags & RDT_DELETED) { | |
1191 | ret = -ENODEV; | |
1192 | goto out; | |
1193 | } | |
1194 | ||
b61b8bba JJ |
1195 | if (!plr->d) { |
1196 | ret = -ENODEV; | |
1197 | goto out; | |
1198 | } | |
1199 | ||
443810fe RC |
1200 | plr->thread_done = 0; |
1201 | cpu = cpumask_first(&plr->d->cpu_mask); | |
1202 | if (!cpu_online(cpu)) { | |
1203 | ret = -ENODEV; | |
1204 | goto out; | |
1205 | } | |
1206 | ||
dd45407c RC |
1207 | plr->cpu = cpu; |
1208 | ||
8a2fc0e1 RC |
1209 | if (sel == 1) |
1210 | thread = kthread_create_on_node(measure_cycles_lat_fn, plr, | |
1211 | cpu_to_node(cpu), | |
1212 | "pseudo_lock_measure/%u", | |
1213 | cpu); | |
1214 | else if (sel == 2) | |
dd45407c RC |
1215 | thread = kthread_create_on_node(measure_l2_residency, plr, |
1216 | cpu_to_node(cpu), | |
1217 | "pseudo_lock_measure/%u", | |
1218 | cpu); | |
1219 | else if (sel == 3) | |
1220 | thread = kthread_create_on_node(measure_l3_residency, plr, | |
8a2fc0e1 RC |
1221 | cpu_to_node(cpu), |
1222 | "pseudo_lock_measure/%u", | |
1223 | cpu); | |
1224 | else | |
1225 | goto out; | |
1226 | ||
443810fe RC |
1227 | if (IS_ERR(thread)) { |
1228 | ret = PTR_ERR(thread); | |
1229 | goto out; | |
1230 | } | |
1231 | kthread_bind(thread, cpu); | |
1232 | wake_up_process(thread); | |
1233 | ||
1234 | ret = wait_event_interruptible(plr->lock_thread_wq, | |
1235 | plr->thread_done == 1); | |
1236 | if (ret < 0) | |
1237 | goto out; | |
1238 | ||
1239 | ret = 0; | |
1240 | ||
1241 | out: | |
1242 | mutex_unlock(&rdtgroup_mutex); | |
1243 | cpus_read_unlock(); | |
1244 | return ret; | |
1245 | } | |
1246 | ||
1247 | static ssize_t pseudo_lock_measure_trigger(struct file *file, | |
1248 | const char __user *user_buf, | |
1249 | size_t count, loff_t *ppos) | |
1250 | { | |
1251 | struct rdtgroup *rdtgrp = file->private_data; | |
1252 | size_t buf_size; | |
1253 | char buf[32]; | |
1254 | int ret; | |
8a2fc0e1 | 1255 | int sel; |
443810fe RC |
1256 | |
1257 | buf_size = min(count, (sizeof(buf) - 1)); | |
1258 | if (copy_from_user(buf, user_buf, buf_size)) | |
1259 | return -EFAULT; | |
1260 | ||
1261 | buf[buf_size] = '\0'; | |
8a2fc0e1 RC |
1262 | ret = kstrtoint(buf, 10, &sel); |
1263 | if (ret == 0) { | |
53ed74af | 1264 | if (sel != 1 && sel != 2 && sel != 3) |
8a2fc0e1 | 1265 | return -EINVAL; |
443810fe RC |
1266 | ret = debugfs_file_get(file->f_path.dentry); |
1267 | if (ret) | |
1268 | return ret; | |
8a2fc0e1 | 1269 | ret = pseudo_lock_measure_cycles(rdtgrp, sel); |
443810fe RC |
1270 | if (ret == 0) |
1271 | ret = count; | |
1272 | debugfs_file_put(file->f_path.dentry); | |
1273 | } | |
1274 | ||
1275 | return ret; | |
1276 | } | |
1277 | ||
1278 | static const struct file_operations pseudo_measure_fops = { | |
1279 | .write = pseudo_lock_measure_trigger, | |
1280 | .open = simple_open, | |
1281 | .llseek = default_llseek, | |
1282 | }; | |
1283 | ||
018961ae RC |
1284 | /** |
1285 | * rdtgroup_pseudo_lock_create - Create a pseudo-locked region | |
1286 | * @rdtgrp: resource group to which pseudo-lock region belongs | |
1287 | * | |
1288 | * Called when a resource group in the pseudo-locksetup mode receives a | |
1289 | * valid schemata that should be pseudo-locked. Since the resource group is | |
1290 | * in pseudo-locksetup mode the &struct pseudo_lock_region has already been | |
1291 | * allocated and initialized with the essential information. If a failure | |
1292 | * occurs the resource group remains in the pseudo-locksetup mode with the | |
1293 | * &struct pseudo_lock_region associated with it, but cleared from all | |
1294 | * information and ready for the user to re-attempt pseudo-locking by | |
1295 | * writing the schemata again. | |
1296 | * | |
1297 | * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0 | |
1298 | * on failure. Descriptive error will be written to last_cmd_status buffer. | |
1299 | */ | |
1300 | int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) | |
1301 | { | |
1302 | struct pseudo_lock_region *plr = rdtgrp->plr; | |
1303 | struct task_struct *thread; | |
746e0859 RC |
1304 | unsigned int new_minor; |
1305 | struct device *dev; | |
018961ae RC |
1306 | int ret; |
1307 | ||
1308 | ret = pseudo_lock_region_alloc(plr); | |
1309 | if (ret < 0) | |
1310 | return ret; | |
1311 | ||
6fc0de37 RC |
1312 | ret = pseudo_lock_cstates_constrain(plr); |
1313 | if (ret < 0) { | |
1314 | ret = -EINVAL; | |
1315 | goto out_region; | |
1316 | } | |
1317 | ||
018961ae RC |
1318 | plr->thread_done = 0; |
1319 | ||
1320 | thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp, | |
1321 | cpu_to_node(plr->cpu), | |
1322 | "pseudo_lock/%u", plr->cpu); | |
1323 | if (IS_ERR(thread)) { | |
1324 | ret = PTR_ERR(thread); | |
723f1a0d | 1325 | rdt_last_cmd_printf("Locking thread returned error %d\n", ret); |
6fc0de37 | 1326 | goto out_cstates; |
018961ae RC |
1327 | } |
1328 | ||
1329 | kthread_bind(thread, plr->cpu); | |
1330 | wake_up_process(thread); | |
1331 | ||
1332 | ret = wait_event_interruptible(plr->lock_thread_wq, | |
1333 | plr->thread_done == 1); | |
1334 | if (ret < 0) { | |
1335 | /* | |
1336 | * If the thread does not get on the CPU for whatever | |
1337 | * reason and the process which sets up the region is | |
1338 | * interrupted then this will leave the thread in runnable | |
d9f6e12f | 1339 | * state and once it gets on the CPU it will dereference |
018961ae RC |
1340 | * the cleared, but not freed, plr struct resulting in an |
1341 | * empty pseudo-locking loop. | |
1342 | */ | |
723f1a0d | 1343 | rdt_last_cmd_puts("Locking thread interrupted\n"); |
6fc0de37 | 1344 | goto out_cstates; |
018961ae RC |
1345 | } |
1346 | ||
746e0859 RC |
1347 | ret = pseudo_lock_minor_get(&new_minor); |
1348 | if (ret < 0) { | |
723f1a0d | 1349 | rdt_last_cmd_puts("Unable to obtain a new minor number\n"); |
2989360d | 1350 | goto out_cstates; |
746e0859 RC |
1351 | } |
1352 | ||
1353 | /* | |
1354 | * Unlock access but do not release the reference. The | |
1355 | * pseudo-locked region will still be here on return. | |
1356 | * | |
1357 | * The mutex has to be released temporarily to avoid a potential | |
c1e8d7c6 ML |
1358 | * deadlock with the mm->mmap_lock which is obtained in the |
1359 | * device_create() and debugfs_create_dir() callpath below as well as | |
1360 | * before the mmap() callback is called. | |
746e0859 RC |
1361 | */ |
1362 | mutex_unlock(&rdtgroup_mutex); | |
1363 | ||
2989360d RC |
1364 | if (!IS_ERR_OR_NULL(debugfs_resctrl)) { |
1365 | plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name, | |
1366 | debugfs_resctrl); | |
1367 | if (!IS_ERR_OR_NULL(plr->debugfs_dir)) | |
1368 | debugfs_create_file("pseudo_lock_measure", 0200, | |
1369 | plr->debugfs_dir, rdtgrp, | |
1370 | &pseudo_measure_fops); | |
1371 | } | |
1372 | ||
7630ea17 | 1373 | dev = device_create(&pseudo_lock_class, NULL, |
746e0859 RC |
1374 | MKDEV(pseudo_lock_major, new_minor), |
1375 | rdtgrp, "%s", rdtgrp->kn->name); | |
1376 | ||
1377 | mutex_lock(&rdtgroup_mutex); | |
1378 | ||
1379 | if (IS_ERR(dev)) { | |
1380 | ret = PTR_ERR(dev); | |
723f1a0d | 1381 | rdt_last_cmd_printf("Failed to create character device: %d\n", |
746e0859 | 1382 | ret); |
2989360d | 1383 | goto out_debugfs; |
746e0859 RC |
1384 | } |
1385 | ||
1386 | /* We released the mutex - check if group was removed while we did so */ | |
1387 | if (rdtgrp->flags & RDT_DELETED) { | |
1388 | ret = -ENODEV; | |
1389 | goto out_device; | |
1390 | } | |
1391 | ||
1392 | plr->minor = new_minor; | |
1393 | ||
018961ae RC |
1394 | rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED; |
1395 | closid_free(rdtgrp->closid); | |
33dc3e41 RC |
1396 | rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444); |
1397 | rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444); | |
1398 | ||
018961ae RC |
1399 | ret = 0; |
1400 | goto out; | |
1401 | ||
746e0859 | 1402 | out_device: |
7630ea17 | 1403 | device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor)); |
746e0859 RC |
1404 | out_debugfs: |
1405 | debugfs_remove_recursive(plr->debugfs_dir); | |
2989360d | 1406 | pseudo_lock_minor_release(new_minor); |
6fc0de37 RC |
1407 | out_cstates: |
1408 | pseudo_lock_cstates_relax(plr); | |
018961ae RC |
1409 | out_region: |
1410 | pseudo_lock_region_clear(plr); | |
1411 | out: | |
1412 | return ret; | |
1413 | } | |
1414 | ||
1415 | /** | |
1416 | * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region | |
1417 | * @rdtgrp: resource group to which the pseudo-locked region belongs | |
1418 | * | |
1419 | * The removal of a pseudo-locked region can be initiated when the resource | |
1420 | * group is removed from user space via a "rmdir" from userspace or the | |
1421 | * unmount of the resctrl filesystem. On removal the resource group does | |
1422 | * not go back to pseudo-locksetup mode before it is removed, instead it is | |
d9f6e12f | 1423 | * removed directly. There is thus asymmetry with the creation where the |
018961ae RC |
1424 | * &struct pseudo_lock_region is removed here while it was not created in |
1425 | * rdtgroup_pseudo_lock_create(). | |
1426 | * | |
1427 | * Return: void | |
1428 | */ | |
1429 | void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) | |
1430 | { | |
746e0859 RC |
1431 | struct pseudo_lock_region *plr = rdtgrp->plr; |
1432 | ||
443810fe | 1433 | if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { |
018961ae RC |
1434 | /* |
1435 | * Default group cannot be a pseudo-locked region so we can | |
1436 | * free closid here. | |
1437 | */ | |
1438 | closid_free(rdtgrp->closid); | |
443810fe RC |
1439 | goto free; |
1440 | } | |
1441 | ||
6fc0de37 | 1442 | pseudo_lock_cstates_relax(plr); |
443810fe | 1443 | debugfs_remove_recursive(rdtgrp->plr->debugfs_dir); |
7630ea17 | 1444 | device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor)); |
746e0859 | 1445 | pseudo_lock_minor_release(plr->minor); |
018961ae | 1446 | |
443810fe | 1447 | free: |
018961ae RC |
1448 | pseudo_lock_free(rdtgrp); |
1449 | } | |
746e0859 RC |
1450 | |
1451 | static int pseudo_lock_dev_open(struct inode *inode, struct file *filp) | |
1452 | { | |
1453 | struct rdtgroup *rdtgrp; | |
1454 | ||
1455 | mutex_lock(&rdtgroup_mutex); | |
1456 | ||
1457 | rdtgrp = region_find_by_minor(iminor(inode)); | |
1458 | if (!rdtgrp) { | |
1459 | mutex_unlock(&rdtgroup_mutex); | |
1460 | return -ENODEV; | |
1461 | } | |
1462 | ||
1463 | filp->private_data = rdtgrp; | |
1464 | atomic_inc(&rdtgrp->waitcount); | |
1465 | /* Perform a non-seekable open - llseek is not supported */ | |
1466 | filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); | |
1467 | ||
1468 | mutex_unlock(&rdtgroup_mutex); | |
1469 | ||
1470 | return 0; | |
1471 | } | |
1472 | ||
1473 | static int pseudo_lock_dev_release(struct inode *inode, struct file *filp) | |
1474 | { | |
1475 | struct rdtgroup *rdtgrp; | |
1476 | ||
1477 | mutex_lock(&rdtgroup_mutex); | |
1478 | rdtgrp = filp->private_data; | |
1479 | WARN_ON(!rdtgrp); | |
1480 | if (!rdtgrp) { | |
1481 | mutex_unlock(&rdtgroup_mutex); | |
1482 | return -ENODEV; | |
1483 | } | |
1484 | filp->private_data = NULL; | |
1485 | atomic_dec(&rdtgrp->waitcount); | |
1486 | mutex_unlock(&rdtgroup_mutex); | |
1487 | return 0; | |
1488 | } | |
1489 | ||
14d07113 | 1490 | static int pseudo_lock_dev_mremap(struct vm_area_struct *area) |
746e0859 RC |
1491 | { |
1492 | /* Not supported */ | |
1493 | return -EINVAL; | |
1494 | } | |
1495 | ||
1496 | static const struct vm_operations_struct pseudo_mmap_ops = { | |
1497 | .mremap = pseudo_lock_dev_mremap, | |
1498 | }; | |
1499 | ||
1500 | static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) | |
1501 | { | |
1502 | unsigned long vsize = vma->vm_end - vma->vm_start; | |
1503 | unsigned long off = vma->vm_pgoff << PAGE_SHIFT; | |
1504 | struct pseudo_lock_region *plr; | |
1505 | struct rdtgroup *rdtgrp; | |
1506 | unsigned long physical; | |
1507 | unsigned long psize; | |
1508 | ||
1509 | mutex_lock(&rdtgroup_mutex); | |
1510 | ||
1511 | rdtgrp = filp->private_data; | |
1512 | WARN_ON(!rdtgrp); | |
1513 | if (!rdtgrp) { | |
1514 | mutex_unlock(&rdtgroup_mutex); | |
1515 | return -ENODEV; | |
1516 | } | |
1517 | ||
1518 | plr = rdtgrp->plr; | |
1519 | ||
b61b8bba JJ |
1520 | if (!plr->d) { |
1521 | mutex_unlock(&rdtgroup_mutex); | |
1522 | return -ENODEV; | |
1523 | } | |
1524 | ||
746e0859 RC |
1525 | /* |
1526 | * Task is required to run with affinity to the cpus associated | |
1527 | * with the pseudo-locked region. If this is not the case the task | |
1528 | * may be scheduled elsewhere and invalidate entries in the | |
1529 | * pseudo-locked region. | |
1530 | */ | |
3bd37062 | 1531 | if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { |
746e0859 RC |
1532 | mutex_unlock(&rdtgroup_mutex); |
1533 | return -EINVAL; | |
1534 | } | |
1535 | ||
1536 | physical = __pa(plr->kmem) >> PAGE_SHIFT; | |
1537 | psize = plr->size - off; | |
1538 | ||
1539 | if (off > plr->size) { | |
1540 | mutex_unlock(&rdtgroup_mutex); | |
1541 | return -ENOSPC; | |
1542 | } | |
1543 | ||
1544 | /* | |
1545 | * Ensure changes are carried directly to the memory being mapped, | |
1546 | * do not allow copy-on-write mapping. | |
1547 | */ | |
1548 | if (!(vma->vm_flags & VM_SHARED)) { | |
1549 | mutex_unlock(&rdtgroup_mutex); | |
1550 | return -EINVAL; | |
1551 | } | |
1552 | ||
1553 | if (vsize > psize) { | |
1554 | mutex_unlock(&rdtgroup_mutex); | |
1555 | return -ENOSPC; | |
1556 | } | |
1557 | ||
1558 | memset(plr->kmem + off, 0, vsize); | |
1559 | ||
1560 | if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff, | |
1561 | vsize, vma->vm_page_prot)) { | |
1562 | mutex_unlock(&rdtgroup_mutex); | |
1563 | return -EAGAIN; | |
1564 | } | |
1565 | vma->vm_ops = &pseudo_mmap_ops; | |
1566 | mutex_unlock(&rdtgroup_mutex); | |
1567 | return 0; | |
1568 | } | |
1569 | ||
1570 | static const struct file_operations pseudo_lock_dev_fops = { | |
1571 | .owner = THIS_MODULE, | |
1572 | .llseek = no_llseek, | |
1573 | .read = NULL, | |
1574 | .write = NULL, | |
1575 | .open = pseudo_lock_dev_open, | |
1576 | .release = pseudo_lock_dev_release, | |
1577 | .mmap = pseudo_lock_dev_mmap, | |
1578 | }; | |
1579 | ||
746e0859 RC |
1580 | int rdt_pseudo_lock_init(void) |
1581 | { | |
1582 | int ret; | |
1583 | ||
1584 | ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops); | |
1585 | if (ret < 0) | |
1586 | return ret; | |
1587 | ||
1588 | pseudo_lock_major = ret; | |
1589 | ||
7630ea17 IO |
1590 | ret = class_register(&pseudo_lock_class); |
1591 | if (ret) { | |
746e0859 RC |
1592 | unregister_chrdev(pseudo_lock_major, "pseudo_lock"); |
1593 | return ret; | |
1594 | } | |
1595 | ||
746e0859 RC |
1596 | return 0; |
1597 | } | |
1598 | ||
1599 | void rdt_pseudo_lock_release(void) | |
1600 | { | |
7630ea17 | 1601 | class_unregister(&pseudo_lock_class); |
746e0859 RC |
1602 | unregister_chrdev(pseudo_lock_major, "pseudo_lock"); |
1603 | pseudo_lock_major = 0; | |
1604 | } |