t/dedupe: avoid div-by-zero for all identical chunks
[fio.git] / t / dedupe.c
CommitLineData
bf481692
JA
1/*
2 * Small tool to check for dedupable blocks in a file or device. Basically
3 * just scans the filename for extents of the given size, checksums them,
4 * and orders them up.
5 */
6#include <stdio.h>
7#include <stdio.h>
8#include <unistd.h>
9#include <inttypes.h>
10#include <assert.h>
11#include <sys/types.h>
12#include <sys/stat.h>
13#include <sys/ioctl.h>
14#include <linux/fs.h>
15#include <fcntl.h>
16#include <string.h>
17
18#include "../lib/rbtree.h"
19#include "../flist.h"
20#include "../log.h"
21#include "../mutex.h"
22#include "../smalloc.h"
23#include "../minmax.h"
24#include "../crc/md5.h"
25#include "../memalign.h"
26#include "../os/os.h"
5a155943
JA
27#include "../gettime.h"
28#include "../fio_time.h"
bf481692 29
fa88cd09 30#include "../lib/bloom.h"
9077ee4a 31#include "debug.h"
bf481692
JA
32
33struct worker_thread {
34 pthread_t thread;
35
4d53d6c0
JA
36 volatile int done;
37
bf481692
JA
38 int fd;
39 uint64_t cur_offset;
40 uint64_t size;
41
42 unsigned long items;
fa88cd09 43 unsigned long dupes;
bf481692
JA
44 int err;
45};
46
47struct extent {
48 struct flist_head list;
49 uint64_t offset;
50};
51
52struct chunk {
53 struct rb_node rb_node;
bf481692
JA
54 uint64_t count;
55 uint32_t hash[MD5_HASH_WORDS];
5a155943 56 struct flist_head extent_list[0];
bf481692
JA
57};
58
59struct item {
60 uint64_t offset;
61 uint32_t hash[MD5_HASH_WORDS];
62};
63
64static struct rb_root rb_root;
fa88cd09 65static struct bloom *bloom;
bf481692
JA
66static struct fio_mutex *rb_lock;
67
68static unsigned int blocksize = 4096;
69static unsigned int num_threads;
70static unsigned int chunk_size = 1048576;
71static unsigned int dump_output;
72static unsigned int odirect;
73static unsigned int collision_check;
4d53d6c0 74static unsigned int print_progress = 1;
fa88cd09 75static unsigned int use_bloom = 1;
bf481692
JA
76
77static uint64_t total_size;
78static uint64_t cur_offset;
79static struct fio_mutex *size_lock;
80
81static int dev_fd;
82
83static uint64_t get_size(int fd, struct stat *sb)
84{
85 uint64_t ret;
86
87 if (S_ISBLK(sb->st_mode)) {
88 if (ioctl(fd, BLKGETSIZE64, &ret) < 0) {
89 perror("ioctl");
90 return 0;
91 }
92 } else
93 ret = sb->st_size;
94
95 return (ret & ~((uint64_t)blocksize - 1));
96}
97
98static int get_work(uint64_t *offset, uint64_t *size)
99{
100 uint64_t this_chunk;
101 int ret = 1;
102
103 fio_mutex_down(size_lock);
104
105 if (cur_offset < total_size) {
106 *offset = cur_offset;
107 this_chunk = min((uint64_t)chunk_size, total_size - cur_offset);
108 *size = this_chunk;
109 cur_offset += this_chunk;
110 ret = 0;
111 }
112
113 fio_mutex_up(size_lock);
114 return ret;
115}
116
13793d02 117static int __read_block(int fd, void *buf, off_t offset, size_t count)
bf481692
JA
118{
119 ssize_t ret;
120
13793d02 121 ret = pread(fd, buf, count, offset);
bf481692
JA
122 if (ret < 0) {
123 perror("pread");
124 return 1;
125 } else if (!ret)
126 return 1;
13793d02 127 else if (ret != count) {
bf481692
JA
128 log_err("dedupe: short read on block\n");
129 return 1;
130 }
131
132 return 0;
133}
134
13793d02
JA
135static int read_block(int fd, void *buf, off_t offset)
136{
137 return __read_block(fd, buf, offset, blocksize);
138}
139
bf481692
JA
140static void add_item(struct chunk *c, struct item *i)
141{
87818659
JA
142 /*
143 * Save some memory and don't add extent items, if we don't
144 * use them.
145 */
146 if (dump_output || collision_check) {
147 struct extent *e;
148
149 e = malloc(sizeof(*e));
150 e->offset = i->offset;
5a155943 151 flist_add_tail(&e->list, &c->extent_list[0]);
87818659 152 }
bf481692 153
bf481692
JA
154 c->count++;
155}
156
157static int col_check(struct chunk *c, struct item *i)
158{
159 struct extent *e;
160 char *cbuf, *ibuf;
161 int ret = 1;
162
163 cbuf = fio_memalign(blocksize, blocksize);
164 ibuf = fio_memalign(blocksize, blocksize);
165
5a155943 166 e = flist_entry(c->extent_list[0].next, struct extent, list);
bf481692
JA
167 if (read_block(dev_fd, cbuf, e->offset))
168 goto out;
169
170 if (read_block(dev_fd, ibuf, i->offset))
171 goto out;
172
173 ret = memcmp(ibuf, cbuf, blocksize);
174out:
175 fio_memfree(cbuf, blocksize);
176 fio_memfree(ibuf, blocksize);
177 return ret;
178}
179
5a155943
JA
180static struct chunk *alloc_chunk(void)
181{
182 struct chunk *c;
183
184 if (collision_check || dump_output) {
185 c = malloc(sizeof(struct chunk) + sizeof(struct flist_head));
186 INIT_FLIST_HEAD(&c->extent_list[0]);
187 } else
188 c = malloc(sizeof(struct chunk));
189
190 return c;
191}
192
bf481692
JA
193static void insert_chunk(struct item *i)
194{
195 struct rb_node **p, *parent;
196 struct chunk *c;
197 int diff;
198
199 p = &rb_root.rb_node;
200 parent = NULL;
201 while (*p) {
202 parent = *p;
203
204 c = rb_entry(parent, struct chunk, rb_node);
205 diff = memcmp(i->hash, c->hash, sizeof(i->hash));
206 if (diff < 0)
207 p = &(*p)->rb_left;
208 else if (diff > 0)
209 p = &(*p)->rb_right;
210 else {
211 int ret;
212
213 if (!collision_check)
214 goto add;
215
216 fio_mutex_up(rb_lock);
217 ret = col_check(c, i);
218 fio_mutex_down(rb_lock);
219
220 if (!ret)
221 goto add;
222
223 p = &(*p)->rb_right;
224 }
225 }
226
5a155943 227 c = alloc_chunk();
bf481692 228 RB_CLEAR_NODE(&c->rb_node);
bf481692
JA
229 c->count = 0;
230 memcpy(c->hash, i->hash, sizeof(i->hash));
231 rb_link_node(&c->rb_node, parent, p);
232 rb_insert_color(&c->rb_node, &rb_root);
233add:
234 add_item(c, i);
235}
236
fa88cd09
JA
237static void insert_chunks(struct item *items, unsigned int nitems,
238 uint64_t *ndupes)
bf481692
JA
239{
240 int i;
241
242 fio_mutex_down(rb_lock);
243
fa88cd09
JA
244 for (i = 0; i < nitems; i++) {
245 if (bloom) {
246 unsigned int s;
247 int r;
248
249 s = sizeof(items[i].hash) / sizeof(uint32_t);
250 r = bloom_set(bloom, items[i].hash, s);
251 *ndupes += r;
252 } else
253 insert_chunk(&items[i]);
254 }
bf481692
JA
255
256 fio_mutex_up(rb_lock);
257}
258
259static void crc_buf(void *buf, uint32_t *hash)
260{
261 struct fio_md5_ctx ctx = { .hash = hash };
262
263 fio_md5_init(&ctx);
264 fio_md5_update(&ctx, buf, blocksize);
265 fio_md5_final(&ctx);
266}
267
13793d02
JA
268static unsigned int read_blocks(int fd, void *buf, off_t offset, size_t size)
269{
270 if (__read_block(fd, buf, offset, size))
271 return 0;
272
273 return size / blocksize;
274}
275
bf481692
JA
276static int do_work(struct worker_thread *thread, void *buf)
277{
278 unsigned int nblocks, i;
279 off_t offset;
13793d02 280 int nitems = 0;
fa88cd09 281 uint64_t ndupes = 0;
bf481692
JA
282 struct item *items;
283
bf481692 284 offset = thread->cur_offset;
13793d02
JA
285
286 nblocks = read_blocks(thread->fd, buf, offset, min(thread->size, (uint64_t)chunk_size));
287 if (!nblocks)
288 return 1;
289
bf481692
JA
290 items = malloc(sizeof(*items) * nblocks);
291
292 for (i = 0; i < nblocks; i++) {
13793d02
JA
293 void *thisptr = buf + (i * blocksize);
294
fa88cd09
JA
295 if (items)
296 items[i].offset = offset;
13793d02 297 crc_buf(thisptr, items[i].hash);
bf481692
JA
298 offset += blocksize;
299 nitems++;
300 }
301
fa88cd09
JA
302 insert_chunks(items, nitems, &ndupes);
303
bf481692 304 free(items);
fa88cd09
JA
305 thread->items += nitems;
306 thread->dupes += ndupes;
13793d02 307 return 0;
bf481692
JA
308}
309
310static void *thread_fn(void *data)
311{
312 struct worker_thread *thread = data;
313 void *buf;
314
13793d02 315 buf = fio_memalign(blocksize, chunk_size);
bf481692
JA
316
317 do {
318 if (get_work(&thread->cur_offset, &thread->size)) {
319 thread->err = 1;
320 break;
321 }
322 if (do_work(thread, buf)) {
323 thread->err = 1;
324 break;
325 }
326 } while (1);
327
4d53d6c0 328 thread->done = 1;
13793d02 329 fio_memfree(buf, chunk_size);
bf481692
JA
330 return NULL;
331}
332
5a155943
JA
333static void show_progress(struct worker_thread *threads, unsigned long total)
334{
335 unsigned long last_nitems = 0;
336 struct timeval last_tv;
337
338 fio_gettime(&last_tv, NULL);
339
340 while (print_progress) {
341 unsigned long this_items;
342 unsigned long nitems = 0;
343 uint64_t tdiff;
344 float perc;
423adcc7 345 int some_done = 0;
5a155943
JA
346 int i;
347
348 for (i = 0; i < num_threads; i++) {
349 nitems += threads[i].items;
350 some_done = threads[i].done;
351 if (some_done)
352 break;
353 }
354
355 if (some_done)
356 break;
357
358 perc = (float) nitems / (float) total;
359 perc *= 100.0;
360 this_items = nitems - last_nitems;
361 this_items *= blocksize;
362 tdiff = mtime_since_now(&last_tv);
363 if (tdiff) {
45b56027 364 this_items = (this_items * 1000) / (tdiff * 1024);
5a155943
JA
365 printf("%3.2f%% done (%luKB/sec)\r", perc, this_items);
366 last_nitems = nitems;
367 fio_gettime(&last_tv, NULL);
368 } else
369 printf("%3.2f%% done\r", perc);
370 fflush(stdout);
371 usleep(250000);
372 };
373}
374
fa88cd09
JA
375static int run_dedupe_threads(int fd, uint64_t dev_size, uint64_t *nextents,
376 uint64_t *nchunks)
bf481692
JA
377{
378 struct worker_thread *threads;
4d53d6c0 379 unsigned long nitems, total_items;
bf481692
JA
380 int i, err = 0;
381
382 total_size = dev_size;
4d53d6c0 383 total_items = dev_size / blocksize;
bf481692
JA
384 cur_offset = 0;
385 size_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
386
387 threads = malloc(num_threads * sizeof(struct worker_thread));
388 for (i = 0; i < num_threads; i++) {
389 threads[i].fd = fd;
390 threads[i].items = 0;
391 threads[i].err = 0;
4d53d6c0 392 threads[i].done = 0;
bf481692
JA
393
394 err = pthread_create(&threads[i].thread, NULL, thread_fn, &threads[i]);
395 if (err) {
396 log_err("fio: thread startup failed\n");
397 break;
398 }
399 }
400
5a155943 401 show_progress(threads, total_items);
4d53d6c0 402
bf481692 403 nitems = 0;
fa88cd09
JA
404 *nextents = 0;
405 *nchunks = 1;
bf481692
JA
406 for (i = 0; i < num_threads; i++) {
407 void *ret;
408 pthread_join(threads[i].thread, &ret);
409 nitems += threads[i].items;
fa88cd09 410 *nchunks += threads[i].dupes;
bf481692
JA
411 }
412
413 printf("Threads(%u): %lu items processed\n", num_threads, nitems);
414
fa88cd09
JA
415 *nextents = nitems;
416 *nchunks = nitems - *nchunks;
417
bf481692 418 fio_mutex_remove(size_lock);
5a155943 419 free(threads);
bf481692
JA
420 return err;
421}
422
fa88cd09
JA
423static int dedupe_check(const char *filename, uint64_t *nextents,
424 uint64_t *nchunks)
bf481692
JA
425{
426 uint64_t dev_size;
427 struct stat sb;
428 int flags;
429
430 flags = O_RDONLY;
431 if (odirect)
432 flags |= O_DIRECT;
433
434 dev_fd = open(filename, flags);
435 if (dev_fd == -1) {
436 perror("open");
437 return 1;
438 }
439
440 if (fstat(dev_fd, &sb) < 0) {
441 perror("fstat");
442 close(dev_fd);
443 return 1;
444 }
445
446 dev_size = get_size(dev_fd, &sb);
447 if (!dev_size) {
448 close(dev_fd);
449 return 1;
450 }
451
fa88cd09
JA
452 if (use_bloom) {
453 uint64_t bloom_entries;
454
bc095aab 455 bloom_entries = 8 * (dev_size / blocksize);
fa88cd09
JA
456 bloom = bloom_new(bloom_entries);
457 }
458
f7bd62dd 459 printf("Will check <%s>, size <%llu>, using %u threads\n", filename, (unsigned long long) dev_size, num_threads);
bf481692 460
fa88cd09 461 return run_dedupe_threads(dev_fd, dev_size, nextents, nchunks);
bf481692
JA
462}
463
464static void show_chunk(struct chunk *c)
465{
466 struct flist_head *n;
467 struct extent *e;
468
761c2729 469 printf("c hash %8x %8x %8x %8x, count %lu\n", c->hash[0], c->hash[1], c->hash[2], c->hash[3], (unsigned long) c->count);
5a155943 470 flist_for_each(n, &c->extent_list[0]) {
bf481692 471 e = flist_entry(n, struct extent, list);
761c2729 472 printf("\toffset %llu\n", (unsigned long long) e->offset);
bf481692
JA
473 }
474}
475
fa88cd09 476static void show_stat(uint64_t nextents, uint64_t nchunks)
bf481692 477{
9ca144fe 478 double perc, ratio;
bf481692 479
fa88cd09 480 printf("Extents=%lu, Unique extents=%lu\n", (unsigned long) nextents, (unsigned long) nchunks);
15e3ca5b
JA
481
482 if (nchunks) {
483 ratio = (double) nextents / (double) nchunks;
484 printf("De-dupe ratio: 1:%3.2f\n", ratio - 1.0);
485 } else
486 printf("De-dupe ratio: 1:infinite\n");
fa88cd09
JA
487
488 perc = 1.00 - ((double) nchunks / (double) nextents);
489 perc *= 100.0;
490 printf("Fio setting: dedupe_percentage=%u\n", (int) (perc + 0.50));
491
492}
493
494static void iter_rb_tree(uint64_t *nextents, uint64_t *nchunks)
495{
496 struct rb_node *n;
497
498 *nchunks = *nextents = 0;
bf481692
JA
499
500 n = rb_first(&rb_root);
501 if (!n)
502 return;
503
504 do {
505 struct chunk *c;
506
507 c = rb_entry(n, struct chunk, rb_node);
fa88cd09
JA
508 (*nchunks)++;
509 *nextents += c->count;
bf481692
JA
510
511 if (dump_output)
512 show_chunk(c);
513
514 } while ((n = rb_next(n)) != NULL);
bf481692
JA
515}
516
517static int usage(char *argv[])
518{
519 log_err("Check for dedupable blocks on a device/file\n\n");
520 log_err("%s: [options] <device or file>\n", argv[0]);
521 log_err("\t-b\tChunk size to use\n");
522 log_err("\t-t\tNumber of threads to use\n");
523 log_err("\t-d\tFull extent/chunk debug output\n");
524 log_err("\t-o\tUse O_DIRECT\n");
525 log_err("\t-c\tFull collision check\n");
fa88cd09 526 log_err("\t-B\tUse probabilistic bloom filter\n");
4d53d6c0 527 log_err("\t-p\tPrint progress indicator\n");
bf481692
JA
528 return 1;
529}
530
531int main(int argc, char *argv[])
532{
9b36c122 533 uint64_t nextents = 0, nchunks = 0;
bf481692
JA
534 int c, ret;
535
9077ee4a
JA
536 debug_init();
537
fa88cd09 538 while ((c = getopt(argc, argv, "b:t:d:o:c:p:B:")) != -1) {
bf481692
JA
539 switch (c) {
540 case 'b':
541 blocksize = atoi(optarg);
542 break;
543 case 't':
544 num_threads = atoi(optarg);
545 break;
546 case 'd':
547 dump_output = atoi(optarg);
548 break;
549 case 'o':
550 odirect = atoi(optarg);
551 break;
552 case 'c':
553 collision_check = atoi(optarg);
554 break;
4d53d6c0
JA
555 case 'p':
556 print_progress = atoi(optarg);
557 break;
fa88cd09
JA
558 case 'B':
559 use_bloom = atoi(optarg);
560 break;
bf481692
JA
561 case '?':
562 default:
563 return usage(argv);
564 }
565 }
566
fa88cd09
JA
567 if (collision_check || dump_output)
568 use_bloom = 0;
569
bf481692
JA
570 if (!num_threads)
571 num_threads = cpus_online();
572
573 if (argc == optind)
574 return usage(argv);
575
576 sinit();
577
578 rb_root = RB_ROOT;
579 rb_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
580
fa88cd09
JA
581 ret = dedupe_check(argv[optind], &nextents, &nchunks);
582
179b3ead
JA
583 if (!ret) {
584 if (!bloom)
585 iter_rb_tree(&nextents, &nchunks);
bf481692 586
179b3ead
JA
587 show_stat(nextents, nchunks);
588 }
bf481692 589
1956a9e5 590 fio_mutex_remove(rb_lock);
9b36c122
JA
591 if (bloom)
592 bloom_free(bloom);
bf481692
JA
593 scleanup();
594 return ret;
595}