Commit | Line | Data |
---|---|---|
2f9ade3c JA |
1 | /* |
2 | * Memory helpers | |
3 | */ | |
5921e80c | 4 | #include <fcntl.h> |
2f9ade3c | 5 | #include <unistd.h> |
2f9ade3c | 6 | #include <sys/mman.h> |
3d2d14bc | 7 | #include <sys/stat.h> |
2f9ade3c JA |
8 | |
9 | #include "fio.h" | |
a5e0ee11 O |
10 | #ifndef FIO_NO_HAVE_SHM_H |
11 | #include <sys/shm.h> | |
12 | #endif | |
2f9ade3c | 13 | |
1b79a070 | 14 | void fio_unpin_memory(struct thread_data *td) |
2f9ade3c | 15 | { |
1b79a070 JA |
16 | if (td->pinned_mem) { |
17 | dprint(FD_MEM, "unpinning %llu bytes\n", td->o.lockmem); | |
18 | if (munlock(td->pinned_mem, td->o.lockmem) < 0) | |
2f9ade3c | 19 | perror("munlock"); |
1b79a070 JA |
20 | munmap(td->pinned_mem, td->o.lockmem); |
21 | td->pinned_mem = NULL; | |
2f9ade3c JA |
22 | } |
23 | } | |
24 | ||
1b79a070 | 25 | int fio_pin_memory(struct thread_data *td) |
2f9ade3c JA |
26 | { |
27 | unsigned long long phys_mem; | |
28 | ||
1b79a070 | 29 | if (!td->o.lockmem) |
2f9ade3c JA |
30 | return 0; |
31 | ||
1b79a070 | 32 | dprint(FD_MEM, "pinning %llu bytes\n", td->o.lockmem); |
ee56ad50 | 33 | |
2f9ade3c | 34 | /* |
420b104a | 35 | * Don't allow mlock of more than real_mem-128MiB |
2f9ade3c JA |
36 | */ |
37 | phys_mem = os_phys_mem(); | |
38 | if (phys_mem) { | |
1b79a070 JA |
39 | if ((td->o.lockmem + 128 * 1024 * 1024) > phys_mem) { |
40 | td->o.lockmem = phys_mem - 128 * 1024 * 1024; | |
420b104a | 41 | log_info("fio: limiting mlocked memory to %lluMiB\n", |
1b79a070 | 42 | td->o.lockmem >> 20); |
2f9ade3c JA |
43 | } |
44 | } | |
45 | ||
1b79a070 | 46 | td->pinned_mem = mmap(NULL, td->o.lockmem, PROT_READ | PROT_WRITE, |
a55820db | 47 | MAP_PRIVATE | OS_MAP_ANON, -1, 0); |
1b79a070 | 48 | if (td->pinned_mem == MAP_FAILED) { |
2f9ade3c | 49 | perror("malloc locked mem"); |
1b79a070 | 50 | td->pinned_mem = NULL; |
2f9ade3c JA |
51 | return 1; |
52 | } | |
1b79a070 | 53 | if (mlock(td->pinned_mem, td->o.lockmem) < 0) { |
2f9ade3c | 54 | perror("mlock"); |
1b79a070 JA |
55 | munmap(td->pinned_mem, td->o.lockmem); |
56 | td->pinned_mem = NULL; | |
2f9ade3c JA |
57 | return 1; |
58 | } | |
59 | ||
60 | return 0; | |
61 | } | |
62 | ||
829a602c | 63 | static int alloc_mem_shm(struct thread_data *td, unsigned int total_mem) |
2f9ade3c | 64 | { |
91e47529 | 65 | #ifndef CONFIG_NO_SHM |
03e20d68 | 66 | int flags = IPC_CREAT | S_IRUSR | S_IWUSR; |
b6f9676e | 67 | |
a1242a20 JA |
68 | if (td->o.mem_type == MEM_SHMHUGE) { |
69 | unsigned long mask = td->o.hugepage_size - 1; | |
70 | ||
b6f9676e | 71 | flags |= SHM_HUGETLB; |
a1242a20 JA |
72 | total_mem = (total_mem + mask) & ~mask; |
73 | } | |
b6f9676e | 74 | |
829a602c JA |
75 | td->shm_id = shmget(IPC_PRIVATE, total_mem, flags); |
76 | dprint(FD_MEM, "shmget %u, %d\n", total_mem, td->shm_id); | |
b6f9676e JA |
77 | if (td->shm_id < 0) { |
78 | td_verror(td, errno, "shmget"); | |
da7d79b0 | 79 | if (geteuid() != 0 && (errno == ENOMEM || errno == EPERM)) |
b6f9676e | 80 | log_err("fio: you may need to run this job as root\n"); |
886b878a | 81 | if (td->o.mem_type == MEM_SHMHUGE) { |
5ec10eaa JA |
82 | if (errno == EINVAL) { |
83 | log_err("fio: check that you have free huge" | |
84 | " pages and that hugepage-size is" | |
85 | " correct.\n"); | |
86 | } else if (errno == ENOSYS) { | |
87 | log_err("fio: your system does not appear to" | |
88 | " support huge pages.\n"); | |
89 | } else if (errno == ENOMEM) { | |
90 | log_err("fio: no huge pages available, do you" | |
dd117eb6 | 91 | " need to allocate some? See HOWTO.\n"); |
5ec10eaa | 92 | } |
d8602dd0 | 93 | } |
5ec10eaa | 94 | |
b6f9676e JA |
95 | return 1; |
96 | } | |
2f9ade3c | 97 | |
b6f9676e | 98 | td->orig_buffer = shmat(td->shm_id, NULL, 0); |
ee56ad50 | 99 | dprint(FD_MEM, "shmat %d, %p\n", td->shm_id, td->orig_buffer); |
b6f9676e JA |
100 | if (td->orig_buffer == (void *) -1) { |
101 | td_verror(td, errno, "shmat"); | |
102 | td->orig_buffer = NULL; | |
103 | return 1; | |
104 | } | |
105 | ||
106 | return 0; | |
91e47529 JA |
107 | #else |
108 | log_err("fio: shm not supported\n"); | |
109 | return 1; | |
110 | #endif | |
b6f9676e JA |
111 | } |
112 | ||
829a602c JA |
113 | static void free_mem_shm(struct thread_data *td) |
114 | { | |
91e47529 | 115 | #ifndef CONFIG_NO_SHM |
829a602c JA |
116 | struct shmid_ds sbuf; |
117 | ||
118 | dprint(FD_MEM, "shmdt/ctl %d %p\n", td->shm_id, td->orig_buffer); | |
119 | shmdt(td->orig_buffer); | |
120 | shmctl(td->shm_id, IPC_RMID, &sbuf); | |
91e47529 | 121 | #endif |
829a602c JA |
122 | } |
123 | ||
0f805c00 | 124 | static int alloc_mem_mmap(struct thread_data *td, size_t total_mem) |
b6f9676e | 125 | { |
d9759b1e | 126 | int flags = 0; |
b6f9676e | 127 | |
4a995dda | 128 | td->mmapfd = -1; |
b6f9676e | 129 | |
d6dc02fb JA |
130 | if (td->o.mem_type == MEM_MMAPHUGE) { |
131 | unsigned long mask = td->o.hugepage_size - 1; | |
132 | ||
d9759b1e | 133 | /* TODO: make sure the file is a real hugetlbfs file */ |
836fcc0f | 134 | if (!td->o.mmapfile) |
d9759b1e | 135 | flags |= MAP_HUGETLB; |
d6dc02fb JA |
136 | total_mem = (total_mem + mask) & ~mask; |
137 | } | |
138 | ||
83ea422a | 139 | if (td->o.mmapfile) { |
f9cfc7d4 SB |
140 | if (access(td->o.mmapfile, F_OK) == 0) |
141 | td->flags |= TD_F_MMAP_KEEP; | |
142 | ||
83ea422a | 143 | td->mmapfd = open(td->o.mmapfile, O_RDWR|O_CREAT, 0644); |
b6f9676e JA |
144 | |
145 | if (td->mmapfd < 0) { | |
146 | td_verror(td, errno, "open mmap file"); | |
2f9ade3c JA |
147 | td->orig_buffer = NULL; |
148 | return 1; | |
149 | } | |
d9759b1e | 150 | if (td->o.mem_type != MEM_MMAPHUGE && |
217b0f1d | 151 | td->o.mem_type != MEM_MMAPSHARED && |
d9759b1e | 152 | ftruncate(td->mmapfd, total_mem) < 0) { |
b6f9676e | 153 | td_verror(td, errno, "truncate mmap file"); |
2f9ade3c JA |
154 | td->orig_buffer = NULL; |
155 | return 1; | |
156 | } | |
217b0f1d LG |
157 | if (td->o.mem_type == MEM_MMAPHUGE || |
158 | td->o.mem_type == MEM_MMAPSHARED) | |
d9759b1e SL |
159 | flags |= MAP_SHARED; |
160 | else | |
161 | flags |= MAP_PRIVATE; | |
b6f9676e | 162 | } else |
d9759b1e | 163 | flags |= OS_MAP_ANON | MAP_PRIVATE; |
b6f9676e | 164 | |
829a602c JA |
165 | td->orig_buffer = mmap(NULL, total_mem, PROT_READ | PROT_WRITE, flags, |
166 | td->mmapfd, 0); | |
4b91ee8f JA |
167 | dprint(FD_MEM, "mmap %llu/%d %p\n", (unsigned long long) total_mem, |
168 | td->mmapfd, td->orig_buffer); | |
b6f9676e JA |
169 | if (td->orig_buffer == MAP_FAILED) { |
170 | td_verror(td, errno, "mmap"); | |
171 | td->orig_buffer = NULL; | |
9ce94349 | 172 | if (td->mmapfd != 1 && td->mmapfd != -1) { |
b6f9676e | 173 | close(td->mmapfd); |
f9cfc7d4 | 174 | if (td->o.mmapfile && !(td->flags & TD_F_MMAP_KEEP)) |
b3493a7a | 175 | unlink(td->o.mmapfile); |
b6f9676e | 176 | } |
5ec10eaa | 177 | |
b6f9676e | 178 | return 1; |
2f9ade3c JA |
179 | } |
180 | ||
181 | return 0; | |
182 | } | |
183 | ||
0f805c00 | 184 | static void free_mem_mmap(struct thread_data *td, size_t total_mem) |
b6f9676e | 185 | { |
4b91ee8f JA |
186 | dprint(FD_MEM, "munmap %llu %p\n", (unsigned long long) total_mem, |
187 | td->orig_buffer); | |
829a602c | 188 | munmap(td->orig_buffer, td->orig_buffer_size); |
83ea422a | 189 | if (td->o.mmapfile) { |
4a995dda JA |
190 | if (td->mmapfd != -1) |
191 | close(td->mmapfd); | |
f9cfc7d4 SB |
192 | if (!(td->flags & TD_F_MMAP_KEEP)) |
193 | unlink(td->o.mmapfile); | |
83ea422a | 194 | free(td->o.mmapfile); |
829a602c JA |
195 | } |
196 | } | |
d87612ac | 197 | |
0f805c00 | 198 | static int alloc_mem_malloc(struct thread_data *td, size_t total_mem) |
829a602c JA |
199 | { |
200 | td->orig_buffer = malloc(total_mem); | |
4b91ee8f JA |
201 | dprint(FD_MEM, "malloc %llu %p\n", (unsigned long long) total_mem, |
202 | td->orig_buffer); | |
5ec10eaa | 203 | |
829a602c JA |
204 | return td->orig_buffer == NULL; |
205 | } | |
b6f9676e | 206 | |
829a602c JA |
207 | static void free_mem_malloc(struct thread_data *td) |
208 | { | |
209 | dprint(FD_MEM, "free malloc mem %p\n", td->orig_buffer); | |
210 | free(td->orig_buffer); | |
b6f9676e JA |
211 | } |
212 | ||
03553853 YR |
213 | static int alloc_mem_cudamalloc(struct thread_data *td, size_t total_mem) |
214 | { | |
15600335 | 215 | #ifdef CONFIG_CUDA |
03553853 YR |
216 | CUresult ret; |
217 | char name[128]; | |
218 | ||
219 | ret = cuInit(0); | |
220 | if (ret != CUDA_SUCCESS) { | |
221 | log_err("fio: failed initialize cuda driver api\n"); | |
222 | return 1; | |
223 | } | |
224 | ||
225 | ret = cuDeviceGetCount(&td->gpu_dev_cnt); | |
226 | if (ret != CUDA_SUCCESS) { | |
227 | log_err("fio: failed get device count\n"); | |
228 | return 1; | |
229 | } | |
230 | dprint(FD_MEM, "found %d GPU devices\n", td->gpu_dev_cnt); | |
231 | ||
232 | if (td->gpu_dev_cnt == 0) { | |
233 | log_err("fio: no GPU device found. " | |
234 | "Can not perform GPUDirect RDMA.\n"); | |
235 | return 1; | |
236 | } | |
237 | ||
238 | td->gpu_dev_id = td->o.gpu_dev_id; | |
239 | ret = cuDeviceGet(&td->cu_dev, td->gpu_dev_id); | |
240 | if (ret != CUDA_SUCCESS) { | |
241 | log_err("fio: failed get GPU device\n"); | |
242 | return 1; | |
243 | } | |
244 | ||
245 | ret = cuDeviceGetName(name, sizeof(name), td->gpu_dev_id); | |
246 | if (ret != CUDA_SUCCESS) { | |
247 | log_err("fio: failed get device name\n"); | |
248 | return 1; | |
249 | } | |
250 | dprint(FD_MEM, "dev_id = [%d], device name = [%s]\n", \ | |
251 | td->gpu_dev_id, name); | |
252 | ||
253 | ret = cuCtxCreate(&td->cu_ctx, CU_CTX_MAP_HOST, td->cu_dev); | |
254 | if (ret != CUDA_SUCCESS) { | |
255 | log_err("fio: failed to create cuda context: %d\n", ret); | |
256 | return 1; | |
257 | } | |
258 | ||
259 | ret = cuMemAlloc(&td->dev_mem_ptr, total_mem); | |
260 | if (ret != CUDA_SUCCESS) { | |
261 | log_err("fio: cuMemAlloc %zu bytes failed\n", total_mem); | |
262 | return 1; | |
263 | } | |
264 | td->orig_buffer = (void *) td->dev_mem_ptr; | |
265 | ||
266 | dprint(FD_MEM, "cudaMalloc %llu %p\n", \ | |
267 | (unsigned long long) total_mem, td->orig_buffer); | |
268 | return 0; | |
15600335 JA |
269 | #else |
270 | return -EINVAL; | |
271 | #endif | |
03553853 YR |
272 | } |
273 | ||
274 | static void free_mem_cudamalloc(struct thread_data *td) | |
275 | { | |
15600335 | 276 | #ifdef CONFIG_CUDA |
400ff0fb | 277 | if (td->dev_mem_ptr) |
03553853 YR |
278 | cuMemFree(td->dev_mem_ptr); |
279 | ||
280 | if (cuCtxDestroy(td->cu_ctx) != CUDA_SUCCESS) | |
281 | log_err("fio: failed to destroy cuda context\n"); | |
03553853 | 282 | #endif |
15600335 | 283 | } |
03553853 | 284 | |
b6f9676e | 285 | /* |
03e20d68 | 286 | * Set up the buffer area we need for io. |
b6f9676e JA |
287 | */ |
288 | int allocate_io_mem(struct thread_data *td) | |
289 | { | |
0f805c00 | 290 | size_t total_mem; |
b6f9676e JA |
291 | int ret = 0; |
292 | ||
9b87f09b | 293 | if (td_ioengine_flagged(td, FIO_NOIO)) |
b4c5e1ac JA |
294 | return 0; |
295 | ||
829a602c | 296 | total_mem = td->orig_buffer_size; |
d529ee19 | 297 | |
d01612f3 | 298 | if (td->o.odirect || td->o.mem_align || td->o.oatomic || |
9b87f09b | 299 | td_ioengine_flagged(td, FIO_MEMALIGN)) { |
829a602c | 300 | total_mem += page_mask; |
d529ee19 JA |
301 | if (td->o.mem_align && td->o.mem_align > page_size) |
302 | total_mem += td->o.mem_align - page_size; | |
303 | } | |
829a602c | 304 | |
4b91ee8f | 305 | dprint(FD_MEM, "Alloc %llu for buffers\n", (unsigned long long) total_mem); |
0f805c00 | 306 | |
a6cb85e2 JA |
307 | /* |
308 | * If the IO engine has hooks to allocate/free memory, use those. But | |
309 | * error out if the user explicitly asked for something else. | |
310 | */ | |
311 | if (td->io_ops->iomem_alloc) { | |
312 | if (fio_option_is_set(&td->o, mem_type)) { | |
313 | log_err("fio: option 'mem/iomem' conflicts with specified IO engine\n"); | |
314 | ret = 1; | |
315 | } else | |
316 | ret = td->io_ops->iomem_alloc(td, total_mem); | |
317 | } else if (td->o.mem_type == MEM_MALLOC) | |
829a602c | 318 | ret = alloc_mem_malloc(td, total_mem); |
b6f9676e | 319 | else if (td->o.mem_type == MEM_SHM || td->o.mem_type == MEM_SHMHUGE) |
829a602c | 320 | ret = alloc_mem_shm(td, total_mem); |
217b0f1d LG |
321 | else if (td->o.mem_type == MEM_MMAP || td->o.mem_type == MEM_MMAPHUGE || |
322 | td->o.mem_type == MEM_MMAPSHARED) | |
829a602c | 323 | ret = alloc_mem_mmap(td, total_mem); |
03553853 YR |
324 | else if (td->o.mem_type == MEM_CUDA_MALLOC) |
325 | ret = alloc_mem_cudamalloc(td, total_mem); | |
b6f9676e JA |
326 | else { |
327 | log_err("fio: bad mem type: %d\n", td->o.mem_type); | |
328 | ret = 1; | |
329 | } | |
330 | ||
3deb3101 JA |
331 | if (ret) |
332 | td_verror(td, ENOMEM, "iomem allocation"); | |
333 | ||
b6f9676e JA |
334 | return ret; |
335 | } | |
336 | ||
2f9ade3c JA |
337 | void free_io_mem(struct thread_data *td) |
338 | { | |
829a602c JA |
339 | unsigned int total_mem; |
340 | ||
341 | total_mem = td->orig_buffer_size; | |
d01612f3 | 342 | if (td->o.odirect || td->o.oatomic) |
829a602c JA |
343 | total_mem += page_mask; |
344 | ||
a6cb85e2 JA |
345 | if (td->io_ops->iomem_alloc) { |
346 | if (td->io_ops->iomem_free) | |
347 | td->io_ops->iomem_free(td); | |
348 | } else if (td->o.mem_type == MEM_MALLOC) | |
829a602c JA |
349 | free_mem_malloc(td); |
350 | else if (td->o.mem_type == MEM_SHM || td->o.mem_type == MEM_SHMHUGE) | |
351 | free_mem_shm(td); | |
217b0f1d LG |
352 | else if (td->o.mem_type == MEM_MMAP || td->o.mem_type == MEM_MMAPHUGE || |
353 | td->o.mem_type == MEM_MMAPSHARED) | |
829a602c | 354 | free_mem_mmap(td, total_mem); |
03553853 YR |
355 | else if (td->o.mem_type == MEM_CUDA_MALLOC) |
356 | free_mem_cudamalloc(td); | |
829a602c | 357 | else |
2dc1bbeb | 358 | log_err("Bad memory type %u\n", td->o.mem_type); |
2f9ade3c JA |
359 | |
360 | td->orig_buffer = NULL; | |
829a602c | 361 | td->orig_buffer_size = 0; |
2f9ade3c | 362 | } |