Commit | Line | Data |
---|---|---|
790bf585 FY |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Cache Allocation Technology (CAT) test | |
4 | * | |
5 | * Copyright (C) 2018 Intel Corporation | |
6 | * | |
7 | * Authors: | |
8 | * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, | |
9 | * Fenghua Yu <fenghua.yu@intel.com> | |
10 | */ | |
11 | #include "resctrl.h" | |
12 | #include <unistd.h> | |
13 | ||
205de6dd | 14 | #define RESULT_FILE_NAME "result_cat" |
790bf585 | 15 | #define NUM_OF_RUNS 5 |
790bf585 | 16 | |
790bf585 | 17 | /* |
205de6dd IJ |
18 | * Minimum difference in LLC misses between a test with n+1 bits CBM to the |
19 | * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4 | |
20 | * bits in the CBM mask, the minimum difference must be at least | |
21 | * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent. | |
22 | * | |
23 | * The relationship between number of used CBM bits and difference in LLC | |
24 | * misses is not expected to be linear. With a small number of bits, the | |
25 | * margin is smaller than with larger number of bits. For selftest purposes, | |
26 | * however, linear approach is enough because ultimately only pass/fail | |
27 | * decision has to be made and distinction between strong and stronger | |
28 | * signal is irrelevant. | |
790bf585 | 29 | */ |
205de6dd | 30 | #define MIN_DIFF_PERCENT_PER_BIT 1UL |
790bf585 | 31 | |
33403bc7 | 32 | static int show_results_info(__u64 sum_llc_val, int no_of_bits, |
205de6dd IJ |
33 | unsigned long cache_span, |
34 | unsigned long min_diff_percent, | |
35 | unsigned long num_of_runs, bool platform, | |
36 | __s64 *prev_avg_llc_val) | |
5caf1b64 | 37 | { |
33403bc7 | 38 | __u64 avg_llc_val = 0; |
205de6dd IJ |
39 | float avg_diff; |
40 | int ret = 0; | |
5caf1b64 IJ |
41 | |
42 | avg_llc_val = sum_llc_val / num_of_runs; | |
205de6dd IJ |
43 | if (*prev_avg_llc_val) { |
44 | float delta = (__s64)(avg_llc_val - *prev_avg_llc_val); | |
5caf1b64 | 45 | |
205de6dd IJ |
46 | avg_diff = delta / *prev_avg_llc_val; |
47 | ret = platform && (avg_diff * 100) < (float)min_diff_percent; | |
5caf1b64 | 48 | |
205de6dd IJ |
49 | ksft_print_msg("%s Check cache miss rate changed more than %.1f%%\n", |
50 | ret ? "Fail:" : "Pass:", (float)min_diff_percent); | |
5caf1b64 | 51 | |
205de6dd IJ |
52 | ksft_print_msg("Percent diff=%.1f\n", avg_diff * 100); |
53 | } | |
54 | *prev_avg_llc_val = avg_llc_val; | |
5caf1b64 IJ |
55 | |
56 | show_cache_info(no_of_bits, avg_llc_val, cache_span, true); | |
57 | ||
58 | return ret; | |
59 | } | |
60 | ||
205de6dd IJ |
61 | /* Remove the highest bit from CBM */ |
62 | static unsigned long next_mask(unsigned long current_mask) | |
63 | { | |
64 | return current_mask & (current_mask >> 1); | |
65 | } | |
66 | ||
67 | static int check_results(struct resctrl_val_param *param, const char *cache_type, | |
68 | unsigned long cache_total_size, unsigned long full_cache_mask, | |
69 | unsigned long current_mask) | |
790bf585 FY |
70 | { |
71 | char *token_array[8], temp[512]; | |
33403bc7 | 72 | __u64 sum_llc_perf_miss = 0; |
205de6dd IJ |
73 | __s64 prev_avg_llc_val = 0; |
74 | unsigned long alloc_size; | |
75 | int runs = 0; | |
76 | int fail = 0; | |
77 | int ret; | |
790bf585 FY |
78 | FILE *fp; |
79 | ||
ca2f4214 | 80 | ksft_print_msg("Checking for pass/fail\n"); |
790bf585 FY |
81 | fp = fopen(param->filename, "r"); |
82 | if (!fp) { | |
cc8ff7f5 | 83 | ksft_perror("Cannot open file"); |
790bf585 | 84 | |
c90fba60 | 85 | return -1; |
790bf585 FY |
86 | } |
87 | ||
88 | while (fgets(temp, sizeof(temp), fp)) { | |
89 | char *token = strtok(temp, ":\t"); | |
90 | int fields = 0; | |
205de6dd | 91 | int bits; |
790bf585 FY |
92 | |
93 | while (token) { | |
94 | token_array[fields++] = token; | |
95 | token = strtok(NULL, ":\t"); | |
96 | } | |
205de6dd IJ |
97 | |
98 | sum_llc_perf_miss += strtoull(token_array[3], NULL, 0); | |
790bf585 | 99 | runs++; |
205de6dd IJ |
100 | |
101 | if (runs < NUM_OF_RUNS) | |
102 | continue; | |
103 | ||
104 | if (!current_mask) { | |
105 | ksft_print_msg("Unexpected empty cache mask\n"); | |
106 | break; | |
107 | } | |
108 | ||
109 | alloc_size = cache_portion_size(cache_total_size, current_mask, full_cache_mask); | |
110 | ||
111 | bits = count_bits(current_mask); | |
112 | ||
113 | ret = show_results_info(sum_llc_perf_miss, bits, | |
114 | alloc_size / 64, | |
115 | MIN_DIFF_PERCENT_PER_BIT * (bits - 1), | |
116 | runs, get_vendor() == ARCH_INTEL, | |
117 | &prev_avg_llc_val); | |
118 | if (ret) | |
119 | fail = 1; | |
120 | ||
121 | runs = 0; | |
122 | sum_llc_perf_miss = 0; | |
123 | current_mask = next_mask(current_mask); | |
790bf585 FY |
124 | } |
125 | ||
126 | fclose(fp); | |
790bf585 | 127 | |
205de6dd | 128 | return fail; |
790bf585 FY |
129 | } |
130 | ||
6cd36898 | 131 | static void cat_test_cleanup(void) |
790bf585 | 132 | { |
205de6dd | 133 | remove(RESULT_FILE_NAME); |
790bf585 FY |
134 | } |
135 | ||
433f437b IJ |
136 | /* |
137 | * cat_test - Execute CAT benchmark and measure cache misses | |
ca160887 | 138 | * @test: Test information structure |
15f29882 | 139 | * @uparams: User supplied parameters |
433f437b IJ |
140 | * @param: Parameters passed to cat_test() |
141 | * @span: Buffer size for the benchmark | |
205de6dd IJ |
142 | * @current_mask Start mask for the first iteration |
143 | * | |
144 | * Run CAT selftest by varying the allocated cache portion and comparing the | |
145 | * impact on cache misses (the result analysis is done in check_results() | |
146 | * and show_results_info(), not in this function). | |
147 | * | |
148 | * One bit is removed from the CAT allocation bit mask (in current_mask) for | |
149 | * each subsequent test which keeps reducing the size of the allocated cache | |
150 | * portion. A single test flushes the buffer, reads it to warm up the cache, | |
151 | * and reads the buffer again. The cache misses are measured during the last | |
152 | * read pass. | |
433f437b IJ |
153 | * |
154 | * Return: 0 when the test was run, < 0 on error. | |
155 | */ | |
ca160887 IJ |
156 | static int cat_test(const struct resctrl_test *test, |
157 | const struct user_params *uparams, | |
158 | struct resctrl_val_param *param, | |
15f29882 | 159 | size_t span, unsigned long current_mask) |
433f437b | 160 | { |
433f437b IJ |
161 | struct perf_event_read pe_read; |
162 | struct perf_event_attr pea; | |
6c8cb747 | 163 | cpu_set_t old_affinity; |
205de6dd IJ |
164 | unsigned char *buf; |
165 | char schemata[64]; | |
166 | int ret, i, pe_fd; | |
433f437b | 167 | pid_t bm_pid; |
433f437b IJ |
168 | |
169 | if (strcmp(param->filename, "") == 0) | |
170 | sprintf(param->filename, "stdio"); | |
171 | ||
172 | bm_pid = getpid(); | |
173 | ||
174 | /* Taskset benchmark to specified cpu */ | |
15f29882 | 175 | ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity); |
433f437b IJ |
176 | if (ret) |
177 | return ret; | |
178 | ||
179 | /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/ | |
0d66ddb2 | 180 | ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); |
433f437b | 181 | if (ret) |
6c8cb747 | 182 | goto reset_affinity; |
433f437b IJ |
183 | |
184 | perf_event_attr_initialize(&pea, PERF_COUNT_HW_CACHE_MISSES); | |
185 | perf_event_initialize_read_format(&pe_read); | |
15f29882 | 186 | pe_fd = perf_open(&pea, bm_pid, uparams->cpu); |
6c8cb747 IJ |
187 | if (pe_fd < 0) { |
188 | ret = -1; | |
189 | goto reset_affinity; | |
190 | } | |
433f437b | 191 | |
205de6dd IJ |
192 | buf = alloc_buffer(span, 1); |
193 | if (!buf) { | |
194 | ret = -1; | |
195 | goto pe_close; | |
196 | } | |
2892731e | 197 | |
205de6dd IJ |
198 | while (current_mask) { |
199 | snprintf(schemata, sizeof(schemata), "%lx", param->mask & ~current_mask); | |
ca160887 | 200 | ret = write_schemata("", schemata, uparams->cpu, test->resource); |
2892731e | 201 | if (ret) |
205de6dd IJ |
202 | goto free_buf; |
203 | snprintf(schemata, sizeof(schemata), "%lx", current_mask); | |
ca160887 | 204 | ret = write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource); |
205de6dd IJ |
205 | if (ret) |
206 | goto free_buf; | |
433f437b | 207 | |
205de6dd IJ |
208 | for (i = 0; i < NUM_OF_RUNS; i++) { |
209 | mem_flush(buf, span); | |
210 | fill_cache_read(buf, span, true); | |
433f437b | 211 | |
205de6dd IJ |
212 | ret = perf_event_reset_enable(pe_fd); |
213 | if (ret) | |
214 | goto free_buf; | |
433f437b | 215 | |
205de6dd | 216 | fill_cache_read(buf, span, true); |
433f437b | 217 | |
205de6dd IJ |
218 | ret = perf_event_measure(pe_fd, &pe_read, param->filename, bm_pid); |
219 | if (ret) | |
220 | goto free_buf; | |
221 | } | |
222 | current_mask = next_mask(current_mask); | |
223 | } | |
224 | ||
225 | free_buf: | |
226 | free(buf); | |
433f437b IJ |
227 | pe_close: |
228 | close(pe_fd); | |
6c8cb747 IJ |
229 | reset_affinity: |
230 | taskset_restore(bm_pid, &old_affinity); | |
205de6dd | 231 | |
433f437b IJ |
232 | return ret; |
233 | } | |
234 | ||
c603ff5b | 235 | static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams) |
790bf585 | 236 | { |
205de6dd | 237 | unsigned long long_mask, start_mask, full_cache_mask; |
19e94a23 | 238 | unsigned long cache_total_size = 0; |
15f29882 | 239 | int n = uparams->bits; |
205de6dd | 240 | unsigned int start; |
85b73447 | 241 | int count_of_bits; |
b1a901e0 | 242 | size_t span; |
205de6dd | 243 | int ret; |
790bf585 | 244 | |
c603ff5b | 245 | ret = get_full_cbm(test->resource, &full_cache_mask); |
b6dfac94 IJ |
246 | if (ret) |
247 | return ret; | |
248 | /* Get the largest contiguous exclusive portion of the cache */ | |
c603ff5b | 249 | ret = get_mask_no_shareable(test->resource, &long_mask); |
790bf585 FY |
250 | if (ret) |
251 | return ret; | |
252 | ||
790bf585 | 253 | /* Get L3/L2 cache size */ |
c603ff5b | 254 | ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size); |
790bf585 FY |
255 | if (ret) |
256 | return ret; | |
19e94a23 | 257 | ksft_print_msg("Cache size :%lu\n", cache_total_size); |
790bf585 | 258 | |
205de6dd | 259 | count_of_bits = count_contiguous_bits(long_mask, &start); |
790bf585 | 260 | |
09a67934 FY |
261 | if (!n) |
262 | n = count_of_bits / 2; | |
263 | ||
264 | if (n > count_of_bits - 1) { | |
ca2f4214 FY |
265 | ksft_print_msg("Invalid input value for no_of_bits n!\n"); |
266 | ksft_print_msg("Please enter value in range 1 to %d\n", | |
267 | count_of_bits - 1); | |
790bf585 FY |
268 | return -1; |
269 | } | |
205de6dd | 270 | start_mask = create_bit_mask(start, n); |
790bf585 FY |
271 | |
272 | struct resctrl_val_param param = { | |
205de6dd IJ |
273 | .ctrlgrp = "c1", |
274 | .filename = RESULT_FILE_NAME, | |
275 | .num_of_runs = 0, | |
790bf585 | 276 | }; |
205de6dd IJ |
277 | param.mask = long_mask; |
278 | span = cache_portion_size(cache_total_size, start_mask, full_cache_mask); | |
790bf585 FY |
279 | |
280 | remove(param.filename); | |
281 | ||
ca160887 | 282 | ret = cat_test(test, uparams, ¶m, span, start_mask); |
205de6dd | 283 | if (ret) |
6cd36898 | 284 | return ret; |
790bf585 | 285 | |
c603ff5b IJ |
286 | ret = check_results(¶m, test->resource, |
287 | cache_total_size, full_cache_mask, start_mask); | |
39e34ddc | 288 | return ret; |
790bf585 | 289 | } |
c603ff5b | 290 | |
48236960 BM |
291 | static bool arch_supports_noncont_cat(const struct resctrl_test *test) |
292 | { | |
48236960 BM |
293 | /* AMD always supports non-contiguous CBM. */ |
294 | if (get_vendor() == ARCH_AMD) | |
295 | return true; | |
296 | ||
7beaf1da SK |
297 | #if defined(__i386__) || defined(__x86_64__) /* arch */ |
298 | unsigned int eax, ebx, ecx, edx; | |
48236960 BM |
299 | /* Intel support for non-contiguous CBM needs to be discovered. */ |
300 | if (!strcmp(test->resource, "L3")) | |
301 | __cpuid_count(0x10, 1, eax, ebx, ecx, edx); | |
302 | else if (!strcmp(test->resource, "L2")) | |
303 | __cpuid_count(0x10, 2, eax, ebx, ecx, edx); | |
304 | else | |
305 | return false; | |
306 | ||
307 | return ((ecx >> 3) & 1); | |
7beaf1da SK |
308 | #endif /* end arch */ |
309 | ||
310 | return false; | |
48236960 BM |
311 | } |
312 | ||
ae638551 MWR |
313 | static int noncont_cat_run_test(const struct resctrl_test *test, |
314 | const struct user_params *uparams) | |
315 | { | |
316 | unsigned long full_cache_mask, cont_mask, noncont_mask; | |
48236960 | 317 | unsigned int sparse_masks; |
ae638551 MWR |
318 | int bit_center, ret; |
319 | char schemata[64]; | |
320 | ||
321 | /* Check to compare sparse_masks content to CPUID output. */ | |
322 | ret = resource_info_unsigned_get(test->resource, "sparse_masks", &sparse_masks); | |
323 | if (ret) | |
324 | return ret; | |
325 | ||
48236960 BM |
326 | if (arch_supports_noncont_cat(test) != sparse_masks) { |
327 | ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n"); | |
ae638551 MWR |
328 | return 1; |
329 | } | |
330 | ||
331 | /* Write checks initialization. */ | |
332 | ret = get_full_cbm(test->resource, &full_cache_mask); | |
333 | if (ret < 0) | |
334 | return ret; | |
335 | bit_center = count_bits(full_cache_mask) / 2; | |
336 | ||
337 | /* | |
338 | * The bit_center needs to be at least 3 to properly calculate the CBM | |
339 | * hole in the noncont_mask. If it's smaller return an error since the | |
340 | * cache mask is too short and that shouldn't happen. | |
341 | */ | |
342 | if (bit_center < 3) | |
343 | return -EINVAL; | |
344 | cont_mask = full_cache_mask >> bit_center; | |
345 | ||
346 | /* Contiguous mask write check. */ | |
347 | snprintf(schemata, sizeof(schemata), "%lx", cont_mask); | |
348 | ret = write_schemata("", schemata, uparams->cpu, test->resource); | |
349 | if (ret) { | |
350 | ksft_print_msg("Write of contiguous CBM failed\n"); | |
351 | return 1; | |
352 | } | |
353 | ||
354 | /* | |
355 | * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle. | |
356 | * Output is compared with support information to catch any edge case errors. | |
357 | */ | |
358 | noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask; | |
359 | snprintf(schemata, sizeof(schemata), "%lx", noncont_mask); | |
360 | ret = write_schemata("", schemata, uparams->cpu, test->resource); | |
361 | if (ret && sparse_masks) | |
362 | ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n"); | |
363 | else if (ret && !sparse_masks) | |
364 | ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n"); | |
365 | else if (!ret && !sparse_masks) | |
366 | ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n"); | |
367 | ||
368 | return !ret == !sparse_masks; | |
369 | } | |
370 | ||
371 | static bool noncont_cat_feature_check(const struct resctrl_test *test) | |
372 | { | |
373 | if (!resctrl_resource_exists(test->resource)) | |
374 | return false; | |
375 | ||
376 | return resource_info_file_exists(test->resource, "sparse_masks"); | |
377 | } | |
378 | ||
c603ff5b | 379 | struct resctrl_test l3_cat_test = { |
5339792b IJ |
380 | .name = "L3_CAT", |
381 | .group = "CAT", | |
c603ff5b IJ |
382 | .resource = "L3", |
383 | .feature_check = test_resource_feature_check, | |
384 | .run_test = cat_run_test, | |
8780bc88 | 385 | .cleanup = cat_test_cleanup, |
c603ff5b | 386 | }; |
ae638551 MWR |
387 | |
388 | struct resctrl_test l3_noncont_cat_test = { | |
389 | .name = "L3_NONCONT_CAT", | |
390 | .group = "CAT", | |
391 | .resource = "L3", | |
392 | .feature_check = noncont_cat_feature_check, | |
393 | .run_test = noncont_cat_run_test, | |
394 | }; | |
395 | ||
396 | struct resctrl_test l2_noncont_cat_test = { | |
397 | .name = "L2_NONCONT_CAT", | |
398 | .group = "CAT", | |
399 | .resource = "L2", | |
400 | .feature_check = noncont_cat_feature_check, | |
401 | .run_test = noncont_cat_run_test, | |
402 | }; |