Enable preadv2/pwritev2 engines by default on Linux
[fio.git] / os / os-linux.h
... / ...
CommitLineData
1#ifndef FIO_OS_LINUX_H
2#define FIO_OS_LINUX_H
3
4#define FIO_OS os_linux
5
6#include <sys/ioctl.h>
7#include <sys/uio.h>
8#include <sys/syscall.h>
9#include <sys/sysmacros.h>
10#include <sys/vfs.h>
11#include <sys/mman.h>
12#include <unistd.h>
13#include <fcntl.h>
14#include <errno.h>
15#include <sched.h>
16#include <linux/unistd.h>
17#include <linux/raw.h>
18#include <linux/major.h>
19#include <byteswap.h>
20
21#include "binject.h"
22#include "../file.h"
23
24#define FIO_HAVE_CPU_AFFINITY
25#define FIO_HAVE_DISK_UTIL
26#define FIO_HAVE_SGIO
27#define FIO_HAVE_IOPRIO
28#define FIO_HAVE_IOSCHED_SWITCH
29#define FIO_HAVE_ODIRECT
30#define FIO_HAVE_HUGETLB
31#define FIO_HAVE_RAWBIND
32#define FIO_HAVE_BLKTRACE
33#define FIO_HAVE_PSHARED_MUTEX
34#define FIO_HAVE_CL_SIZE
35#define FIO_HAVE_CGROUPS
36#define FIO_HAVE_FS_STAT
37#define FIO_HAVE_TRIM
38#define FIO_HAVE_BINJECT
39#define FIO_HAVE_GETTID
40#define FIO_USE_GENERIC_INIT_RANDOM_STATE
41#define FIO_HAVE_PWRITEV2
42
43#ifdef MAP_HUGETLB
44#define FIO_HAVE_MMAP_HUGE
45#endif
46
47#define OS_MAP_ANON MAP_ANONYMOUS
48
49typedef cpu_set_t os_cpu_mask_t;
50
51typedef struct drand48_data os_random_state_t;
52
53#ifdef CONFIG_3ARG_AFFINITY
54#define fio_setaffinity(pid, cpumask) \
55 sched_setaffinity((pid), sizeof(cpumask), &(cpumask))
56#define fio_getaffinity(pid, ptr) \
57 sched_getaffinity((pid), sizeof(cpu_set_t), (ptr))
58#elif defined(CONFIG_2ARG_AFFINITY)
59#define fio_setaffinity(pid, cpumask) \
60 sched_setaffinity((pid), &(cpumask))
61#define fio_getaffinity(pid, ptr) \
62 sched_getaffinity((pid), (ptr))
63#endif
64
65#define fio_cpu_clear(mask, cpu) (void) CPU_CLR((cpu), (mask))
66#define fio_cpu_set(mask, cpu) (void) CPU_SET((cpu), (mask))
67#define fio_cpu_isset(mask, cpu) CPU_ISSET((cpu), (mask))
68#define fio_cpu_count(mask) CPU_COUNT((mask))
69
70static inline int fio_cpuset_init(os_cpu_mask_t *mask)
71{
72 CPU_ZERO(mask);
73 return 0;
74}
75
76static inline int fio_cpuset_exit(os_cpu_mask_t *mask)
77{
78 return 0;
79}
80
81#define FIO_MAX_CPUS CPU_SETSIZE
82
83enum {
84 IOPRIO_CLASS_NONE,
85 IOPRIO_CLASS_RT,
86 IOPRIO_CLASS_BE,
87 IOPRIO_CLASS_IDLE,
88};
89
90enum {
91 IOPRIO_WHO_PROCESS = 1,
92 IOPRIO_WHO_PGRP,
93 IOPRIO_WHO_USER,
94};
95
96#define IOPRIO_BITS 16
97#define IOPRIO_CLASS_SHIFT 13
98
99static inline int ioprio_set(int which, int who, int ioprio_class, int ioprio)
100{
101 /*
102 * If no class is set, assume BE
103 */
104 if (!ioprio_class)
105 ioprio_class = IOPRIO_CLASS_BE;
106
107 ioprio |= ioprio_class << IOPRIO_CLASS_SHIFT;
108 return syscall(__NR_ioprio_set, which, who, ioprio);
109}
110
111static inline int gettid(void)
112{
113 return syscall(__NR_gettid);
114}
115
116#define SPLICE_DEF_SIZE (64*1024)
117
118#ifndef BLKGETSIZE64
119#define BLKGETSIZE64 _IOR(0x12,114,size_t)
120#endif
121
122#ifndef BLKFLSBUF
123#define BLKFLSBUF _IO(0x12,97)
124#endif
125
126#ifndef BLKDISCARD
127#define BLKDISCARD _IO(0x12,119)
128#endif
129
130static inline int blockdev_invalidate_cache(struct fio_file *f)
131{
132 return ioctl(f->fd, BLKFLSBUF);
133}
134
135static inline int blockdev_size(struct fio_file *f, unsigned long long *bytes)
136{
137 if (!ioctl(f->fd, BLKGETSIZE64, bytes))
138 return 0;
139
140 return errno;
141}
142
143static inline unsigned long long os_phys_mem(void)
144{
145 long pagesize, pages;
146
147 pagesize = sysconf(_SC_PAGESIZE);
148 pages = sysconf(_SC_PHYS_PAGES);
149 if (pages == -1 || pagesize == -1)
150 return 0;
151
152 return (unsigned long long) pages * (unsigned long long) pagesize;
153}
154
155static inline void os_random_seed(unsigned long seed, os_random_state_t *rs)
156{
157 srand48_r(seed, rs);
158}
159
160static inline long os_random_long(os_random_state_t *rs)
161{
162 long val;
163
164 lrand48_r(rs, &val);
165 return val;
166}
167
168static inline int fio_lookup_raw(dev_t dev, int *majdev, int *mindev)
169{
170 struct raw_config_request rq;
171 int fd;
172
173 if (major(dev) != RAW_MAJOR)
174 return 1;
175
176 /*
177 * we should be able to find /dev/rawctl or /dev/raw/rawctl
178 */
179 fd = open("/dev/rawctl", O_RDONLY);
180 if (fd < 0) {
181 fd = open("/dev/raw/rawctl", O_RDONLY);
182 if (fd < 0)
183 return 1;
184 }
185
186 rq.raw_minor = minor(dev);
187 if (ioctl(fd, RAW_GETBIND, &rq) < 0) {
188 close(fd);
189 return 1;
190 }
191
192 close(fd);
193 *majdev = rq.block_major;
194 *mindev = rq.block_minor;
195 return 0;
196}
197
198#ifdef O_NOATIME
199#define FIO_O_NOATIME O_NOATIME
200#else
201#define FIO_O_NOATIME 0
202#endif
203
204#ifdef O_ATOMIC
205#define OS_O_ATOMIC O_ATOMIC
206#else
207#define OS_O_ATOMIC 040000000
208#endif
209
210#ifdef MADV_REMOVE
211#define FIO_MADV_FREE MADV_REMOVE
212#endif
213
214#if defined(__builtin_bswap16)
215#define fio_swap16(x) __builtin_bswap16(x)
216#else
217#define fio_swap16(x) __bswap_16(x)
218#endif
219#if defined(__builtin_bswap32)
220#define fio_swap32(x) __builtin_bswap32(x)
221#else
222#define fio_swap32(x) __bswap_32(x)
223#endif
224#if defined(__builtin_bswap64)
225#define fio_swap64(x) __builtin_bswap64(x)
226#else
227#define fio_swap64(x) __bswap_64(x)
228#endif
229
230#define CACHE_LINE_FILE \
231 "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size"
232
233static inline int arch_cache_line_size(void)
234{
235 char size[32];
236 int fd, ret;
237
238 fd = open(CACHE_LINE_FILE, O_RDONLY);
239 if (fd < 0)
240 return -1;
241
242 ret = read(fd, size, sizeof(size));
243
244 close(fd);
245
246 if (ret <= 0)
247 return -1;
248 else
249 return atoi(size);
250}
251
252static inline unsigned long long get_fs_free_size(const char *path)
253{
254 unsigned long long ret;
255 struct statfs s;
256
257 if (statfs(path, &s) < 0)
258 return -1ULL;
259
260 ret = s.f_bsize;
261 ret *= (unsigned long long) s.f_bfree;
262 return ret;
263}
264
265static inline int os_trim(int fd, unsigned long long start,
266 unsigned long long len)
267{
268 uint64_t range[2];
269
270 range[0] = start;
271 range[1] = len;
272
273 if (!ioctl(fd, BLKDISCARD, range))
274 return 0;
275
276 return errno;
277}
278
279#ifdef CONFIG_SCHED_IDLE
280static inline int fio_set_sched_idle(void)
281{
282 struct sched_param p = { .sched_priority = 0, };
283 return sched_setscheduler(gettid(), SCHED_IDLE, &p);
284}
285#endif
286
287#ifndef POSIX_FADV_STREAMID
288#define POSIX_FADV_STREAMID 8
289#endif
290
291#define FIO_HAVE_STREAMID
292
293#ifndef RWF_HIPRI
294#define RWF_HIPRI 0x00000001
295#endif
296#ifndef RWF_DSYNC
297#define RWF_DSYNC 0x00000002
298#endif
299#ifndef RWF_SYNC
300#define RWF_SYNC 0x00000004
301#endif
302
303#ifndef CONFIG_PWRITEV2
304#ifdef __NR_preadv2
305static inline void make_pos_h_l(unsigned long *pos_h, unsigned long *pos_l,
306 off_t offset)
307{
308 *pos_l = offset & 0xffffffff;
309 *pos_h = ((uint64_t) offset) >> 32;
310
311}
312static inline ssize_t preadv2(int fd, const struct iovec *iov, int iovcnt,
313 off_t offset, unsigned int flags)
314{
315 unsigned long pos_l, pos_h;
316
317 make_pos_h_l(&pos_h, &pos_l, offset);
318 return syscall(__NR_preadv2, fd, iov, iovcnt, pos_l, pos_h, flags);
319}
320static inline ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt,
321 off_t offset, unsigned int flags)
322{
323 unsigned long pos_l, pos_h;
324
325 make_pos_h_l(&pos_h, &pos_l, offset);
326 return syscall(__NR_pwritev2, fd, iov, iovcnt, pos_l, pos_h, flags);
327}
328#else
329static inline ssize_t preadv2(int fd, const struct iovec *iov, int iovcnt,
330 off_t offset, unsigned int flags)
331{
332 errno = ENOSYS;
333 return -1;
334}
335static inline ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt,
336 off_t offset, unsigned int flags)
337{
338 errno = ENOSYS;
339 return -1;
340}
341#endif /* __NR_preadv2 */
342#endif /* CONFIG_PWRITEV2 */
343
344#endif