summaryrefslogtreecommitdiff
path: root/io_u.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2017-11-30 09:18:37 -0700
committerJens Axboe <axboe@kernel.dk>2017-11-30 09:18:37 -0700
commit319b073fda497d15183d7927a26c3612d3e210ec (patch)
tree5b43552c07611d838f58dd37c38ae398505f1cef /io_u.c
parentc9a7305461ffe8f22a4295a40e9cc919175d51e7 (diff)
downloadfio-319b073fda497d15183d7927a26c3612d3e210ec.tar.gz
fio-319b073fda497d15183d7927a26c3612d3e210ec.tar.bz2
io_u: speed up small_content_scramble()
This is a hot path for write workloads, since we don't want to send the same buffers to the device again and again. The idea is to defeat basic dedupe/compression, but slightly modifying the buffer for each write. small_content_scramble() does this by filling in the io_u offset into a random spot in each 512b chunk of an io buffer, and filling in the start time (sec,nsec) at the end of each 512b chunk. With this change, we still do those two things, but we generate a random cacheline within each 512b chunk, and fill the offset at the beginning of the cacheline, and the time at the end of it. This means that instead of potentially dirtying 2 cachelines for each 512b chunk in an IO buffer, we dirty just 1. The results should still be random enough that small_content_scramble() fullfils the promise to defeat basic dedupe and compression, but it is lighter to run. Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_u.c')
-rw-r--r--io_u.c36
1 files changed, 22 insertions, 14 deletions
diff --git a/io_u.c b/io_u.c
index 086384a1..6bb9eabf 100644
--- a/io_u.c
+++ b/io_u.c
@@ -1669,32 +1669,40 @@ static bool check_get_verify(struct thread_data *td, struct io_u *io_u)
*/
static void small_content_scramble(struct io_u *io_u)
{
- unsigned int i, nr_blocks = io_u->buflen / 512;
+ unsigned int i, nr_blocks = io_u->buflen >> 9;
unsigned int offset;
- uint64_t boffset;
- char *p, *end;
+ uint64_t boffset, *iptr;
+ char *p;
if (!nr_blocks)
return;
p = io_u->xfer_buf;
boffset = io_u->offset;
- io_u->buf_filled_len = 0;
+
+ if (io_u->buf_filled_len)
+ io_u->buf_filled_len = 0;
+
+ /*
+ * Generate random index between 0..7. We do chunks of 512b, if
+ * we assume a cacheline is 64 bytes, then we have 8 of those.
+ * Scramble content within the blocks in the same cacheline to
+ * speed things up.
+ */
+ offset = (io_u->start_time.tv_nsec ^ boffset) & 7;
for (i = 0; i < nr_blocks; i++) {
/*
- * Fill the byte offset into a "random" start offset of
- * the first half of the buffer.
+ * Fill offset into start of cacheline, time into end
+ * of cacheline
*/
- offset = (io_u->start_time.tv_nsec ^ boffset) & 255;
- offset &= ~(sizeof(boffset) - 1);
- memcpy(p + offset, &boffset, sizeof(boffset));
+ iptr = (void *) p + (offset << 6);
+ *iptr = boffset;
+
+ iptr = (void *) p + 64 - 2 * sizeof(uint64_t);
+ iptr[0] = io_u->start_time.tv_sec;
+ iptr[1] = io_u->start_time.tv_nsec;
- /*
- * Fill the start time into the end of the buffer
- */
- end = p + 512 - sizeof(io_u->start_time);
- memcpy(end, &io_u->start_time, sizeof(io_u->start_time));
p += 512;
boffset += 512;
}