This (finally) provides macOS cache invalidation and is heavily based on
code originally provided by DeveloperEcosystemEngineering@apple.
Because posix_fadvise() isn't implemented on macOS,
DeveloperEcosystemEngineering demonstrated that creating a shared
mapping of a file and using using msync([...], MS_INVALIDATE) on it can
be used to discard covered page cache pages instead - ingenious! This
commit uses that technique to create a macOS posix_fadvise([...],
POSIX_FADV_DONTNEED) shim.
To paraphrase commit
8300eba5 ("windowsaio: add best effort cache
invalidation") that was done for similar reasons:
This change may make default bandwidth speeds on macOS look lower
compared to older versions of fio but this matches the behaviour of fio
on other platforms with invalidation (such as Linux) because we are
trying to avoid measuring cache reuse (unless invalidate=0 is set).
The impact of invalidation is demonstrated by the bandwidths achieved by
the following jobs running on an SSD of an otherwise idle Intel Mac
laptop with 16GBytes of RAM:
./fio --stonewall --size=128M --ioengine=posixaio --filename=fio.tmp \
--iodepth=64 --bs=4k --direct=0 \
--name=create --rw=write \
--name=cached --rw=randread --loops=2 --invalidate=0 \
--name=invalidated --rw=randread --loops=2 --invalidate=1
[...]
cached: (groupid=1, jobs=1): err= 0: pid=7795: Tue Sep 2 22:34:12 2025
read: IOPS=228k, BW=889MiB/s (932MB/s)(256MiB/288msec)
[...]
invalidated: (groupid=2, jobs=1): err= 0: pid=7796: Tue Sep 2 22:34:12 2025
read: IOPS=46.8k, BW=183MiB/s (192MB/s)(256MiB/1399msec)
v2:
- Move platform specific code into its own file under os/mac/
- Don't do prior fsync() because msync([...], MS_INVALIDATE) doesn't
imply the dropping of dirty pages and will have the same effect
v3:
- Up the mmap chunk size to 16 GBytes to reduce the number of times we
mmap()/msync()/munmap() on large files
- Align offset and len to the system page size to prevent errors on jobs
like ./fio --name=n --offset=2k --size=30k
- Try and munmap() if msync() fails
- Make Rosetta comment clearer
- Drop some variables and rename some others
- Don't bother trying to restore errno after displaying an error message
because posix_fadvise() isn't defined as setting errno
Fixes: https://github.com/axboe/fio/issues/48
Suggested-by: DeveloperEcosystemEngineering <DeveloperEcosystemEngineering@apple.com>
Signed-off-by: Sitsofe Wheeler <sitsofe@yahoo.com>
endif
ifeq ($(CONFIG_TARGET_OS), Darwin)
LIBS += -lpthread -ldl
+ SOURCE += os/mac/posix.c
endif
ifneq (,$(findstring CYGWIN,$(CONFIG_TARGET_OS)))
SOURCE += os/windows/cpu-affinity.c os/windows/posix.c os/windows/dlls.c
--- /dev/null
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+
+#include "../../log.h"
+
+#include "posix.h"
+
+#define MMAP_CHUNK_SIZE (16LL * 1024 * 1024 * 1024)
+
+/*
+ * NB: performance of discard_pages() will be slower under Rosetta.
+ */
+static int discard_pages(int fd, off_t offset, off_t len)
+{
+ /* Align offset and len to page size */
+ long pagesize = sysconf(_SC_PAGESIZE);
+ long offset_pad = offset % pagesize;
+ offset -= offset_pad;
+ len += offset_pad;
+ len = (len + pagesize - 1) & -pagesize;
+
+ while (len > 0) {
+ int saved_errno;
+ size_t mmap_len = MIN(MMAP_CHUNK_SIZE, len);
+ void *addr = mmap(0, mmap_len, PROT_NONE, MAP_SHARED, fd,
+ offset);
+
+ if (addr == MAP_FAILED) {
+ saved_errno = errno;
+ log_err("discard_pages: failed to mmap (%s), "
+ "offset = %llu, len = %zu\n",
+ strerror(errno), offset, mmap_len);
+ return saved_errno;
+ }
+
+ if (msync(addr, mmap_len, MS_INVALIDATE)) {
+ saved_errno = errno;
+ log_err("discard_pages: msync failed to free cache "
+ "pages\n");
+
+ if (munmap(addr, mmap_len) < 0)
+ log_err("discard_pages: munmap failed (%s)\n",
+ strerror(errno));
+ return saved_errno;
+ }
+
+ if (munmap(addr, mmap_len) < 0) {
+ saved_errno = errno;
+ log_err("discard_pages: munmap failed (%s), "
+ "len = %zu)\n", strerror(errno), mmap_len);
+ return saved_errno;
+ }
+
+ len -= mmap_len;
+ offset += mmap_len;
+ }
+
+ return 0;
+}
+
+int posix_fadvise(int fd, off_t offset, off_t len, int advice)
+{
+ int ret;
+
+ switch(advice) {
+ case POSIX_FADV_NORMAL:
+ case POSIX_FADV_RANDOM:
+ case POSIX_FADV_SEQUENTIAL:
+ ret = 0;
+ break;
+ case POSIX_FADV_DONTNEED:
+ ret = discard_pages(fd, offset, len);
+ break;
+ default:
+ ret = EINVAL;
+ }
+
+ return ret;
+}
--- /dev/null
+#ifndef FIO_MAC_POSIX_H
+#define FIO_MAC_POSIX_H
+
+#define POSIX_FADV_NORMAL (0)
+#define POSIX_FADV_RANDOM (1)
+#define POSIX_FADV_SEQUENTIAL (2)
+#define POSIX_FADV_DONTNEED (4)
+
+extern int posix_fadvise(int fd, off_t offset, off_t len, int advice);
+
+#endif
#include "../arch/arch.h"
#include "../file.h"
+#include "mac/posix.h"
+
#define FIO_USE_GENERIC_INIT_RANDOM_STATE
#define FIO_HAVE_GETTID
#define FIO_HAVE_CHARDEV_SIZE
}
#endif
+
+#define CONFIG_POSIX_FADVISE