Commit | Line | Data |
---|---|---|
bf481692 JA |
1 | /* |
2 | * Small tool to check for dedupable blocks in a file or device. Basically | |
3 | * just scans the filename for extents of the given size, checksums them, | |
4 | * and orders them up. | |
5 | */ | |
6 | #include <stdio.h> | |
7 | #include <stdio.h> | |
8 | #include <unistd.h> | |
9 | #include <inttypes.h> | |
10 | #include <assert.h> | |
11 | #include <sys/types.h> | |
12 | #include <sys/stat.h> | |
13 | #include <sys/ioctl.h> | |
bf481692 JA |
14 | #include <fcntl.h> |
15 | #include <string.h> | |
16 | ||
bf481692 JA |
17 | #include "../flist.h" |
18 | #include "../log.h" | |
19 | #include "../mutex.h" | |
20 | #include "../smalloc.h" | |
21 | #include "../minmax.h" | |
22 | #include "../crc/md5.h" | |
f7690c4a | 23 | #include "../lib/memalign.h" |
bf481692 | 24 | #include "../os/os.h" |
5a155943 JA |
25 | #include "../gettime.h" |
26 | #include "../fio_time.h" | |
55660f42 | 27 | #include "../lib/rbtree.h" |
bf481692 | 28 | |
fa88cd09 | 29 | #include "../lib/bloom.h" |
9077ee4a | 30 | #include "debug.h" |
bf481692 JA |
31 | |
32 | struct worker_thread { | |
33 | pthread_t thread; | |
34 | ||
4d53d6c0 JA |
35 | volatile int done; |
36 | ||
bf481692 JA |
37 | int fd; |
38 | uint64_t cur_offset; | |
39 | uint64_t size; | |
40 | ||
41 | unsigned long items; | |
fa88cd09 | 42 | unsigned long dupes; |
bf481692 JA |
43 | int err; |
44 | }; | |
45 | ||
46 | struct extent { | |
47 | struct flist_head list; | |
48 | uint64_t offset; | |
49 | }; | |
50 | ||
51 | struct chunk { | |
e96c0125 | 52 | struct fio_rb_node rb_node; |
bf481692 JA |
53 | uint64_t count; |
54 | uint32_t hash[MD5_HASH_WORDS]; | |
5a155943 | 55 | struct flist_head extent_list[0]; |
bf481692 JA |
56 | }; |
57 | ||
58 | struct item { | |
59 | uint64_t offset; | |
60 | uint32_t hash[MD5_HASH_WORDS]; | |
61 | }; | |
62 | ||
63 | static struct rb_root rb_root; | |
fa88cd09 | 64 | static struct bloom *bloom; |
bf481692 JA |
65 | static struct fio_mutex *rb_lock; |
66 | ||
67 | static unsigned int blocksize = 4096; | |
68 | static unsigned int num_threads; | |
69 | static unsigned int chunk_size = 1048576; | |
70 | static unsigned int dump_output; | |
71 | static unsigned int odirect; | |
72 | static unsigned int collision_check; | |
4d53d6c0 | 73 | static unsigned int print_progress = 1; |
fa88cd09 | 74 | static unsigned int use_bloom = 1; |
bf481692 JA |
75 | |
76 | static uint64_t total_size; | |
77 | static uint64_t cur_offset; | |
78 | static struct fio_mutex *size_lock; | |
79 | ||
d08a6886 | 80 | static struct fio_file file; |
bf481692 | 81 | |
d08a6886 | 82 | static uint64_t get_size(struct fio_file *f, struct stat *sb) |
bf481692 JA |
83 | { |
84 | uint64_t ret; | |
85 | ||
86 | if (S_ISBLK(sb->st_mode)) { | |
75adbcf5 | 87 | unsigned long long bytes = 0; |
d08a6886 JA |
88 | |
89 | if (blockdev_size(f, &bytes)) { | |
90 | log_err("dedupe: failed getting bdev size\n"); | |
bf481692 JA |
91 | return 0; |
92 | } | |
d08a6886 | 93 | ret = bytes; |
bf481692 JA |
94 | } else |
95 | ret = sb->st_size; | |
96 | ||
97 | return (ret & ~((uint64_t)blocksize - 1)); | |
98 | } | |
99 | ||
100 | static int get_work(uint64_t *offset, uint64_t *size) | |
101 | { | |
102 | uint64_t this_chunk; | |
103 | int ret = 1; | |
104 | ||
105 | fio_mutex_down(size_lock); | |
106 | ||
107 | if (cur_offset < total_size) { | |
108 | *offset = cur_offset; | |
109 | this_chunk = min((uint64_t)chunk_size, total_size - cur_offset); | |
110 | *size = this_chunk; | |
111 | cur_offset += this_chunk; | |
112 | ret = 0; | |
113 | } | |
114 | ||
115 | fio_mutex_up(size_lock); | |
116 | return ret; | |
117 | } | |
118 | ||
13793d02 | 119 | static int __read_block(int fd, void *buf, off_t offset, size_t count) |
bf481692 JA |
120 | { |
121 | ssize_t ret; | |
122 | ||
13793d02 | 123 | ret = pread(fd, buf, count, offset); |
bf481692 JA |
124 | if (ret < 0) { |
125 | perror("pread"); | |
126 | return 1; | |
127 | } else if (!ret) | |
128 | return 1; | |
13793d02 | 129 | else if (ret != count) { |
bf481692 JA |
130 | log_err("dedupe: short read on block\n"); |
131 | return 1; | |
132 | } | |
133 | ||
134 | return 0; | |
135 | } | |
136 | ||
13793d02 JA |
137 | static int read_block(int fd, void *buf, off_t offset) |
138 | { | |
139 | return __read_block(fd, buf, offset, blocksize); | |
140 | } | |
141 | ||
bf481692 JA |
142 | static void add_item(struct chunk *c, struct item *i) |
143 | { | |
87818659 JA |
144 | /* |
145 | * Save some memory and don't add extent items, if we don't | |
146 | * use them. | |
147 | */ | |
148 | if (dump_output || collision_check) { | |
149 | struct extent *e; | |
150 | ||
151 | e = malloc(sizeof(*e)); | |
152 | e->offset = i->offset; | |
5a155943 | 153 | flist_add_tail(&e->list, &c->extent_list[0]); |
87818659 | 154 | } |
bf481692 | 155 | |
bf481692 JA |
156 | c->count++; |
157 | } | |
158 | ||
159 | static int col_check(struct chunk *c, struct item *i) | |
160 | { | |
161 | struct extent *e; | |
162 | char *cbuf, *ibuf; | |
163 | int ret = 1; | |
164 | ||
165 | cbuf = fio_memalign(blocksize, blocksize); | |
166 | ibuf = fio_memalign(blocksize, blocksize); | |
167 | ||
5a155943 | 168 | e = flist_entry(c->extent_list[0].next, struct extent, list); |
d08a6886 | 169 | if (read_block(file.fd, cbuf, e->offset)) |
bf481692 JA |
170 | goto out; |
171 | ||
d08a6886 | 172 | if (read_block(file.fd, ibuf, i->offset)) |
bf481692 JA |
173 | goto out; |
174 | ||
175 | ret = memcmp(ibuf, cbuf, blocksize); | |
176 | out: | |
177 | fio_memfree(cbuf, blocksize); | |
178 | fio_memfree(ibuf, blocksize); | |
179 | return ret; | |
180 | } | |
181 | ||
5a155943 JA |
182 | static struct chunk *alloc_chunk(void) |
183 | { | |
184 | struct chunk *c; | |
185 | ||
186 | if (collision_check || dump_output) { | |
187 | c = malloc(sizeof(struct chunk) + sizeof(struct flist_head)); | |
188 | INIT_FLIST_HEAD(&c->extent_list[0]); | |
189 | } else | |
190 | c = malloc(sizeof(struct chunk)); | |
191 | ||
192 | return c; | |
193 | } | |
194 | ||
bf481692 JA |
195 | static void insert_chunk(struct item *i) |
196 | { | |
e96c0125 | 197 | struct fio_rb_node **p, *parent; |
bf481692 JA |
198 | struct chunk *c; |
199 | int diff; | |
200 | ||
201 | p = &rb_root.rb_node; | |
202 | parent = NULL; | |
203 | while (*p) { | |
204 | parent = *p; | |
205 | ||
206 | c = rb_entry(parent, struct chunk, rb_node); | |
207 | diff = memcmp(i->hash, c->hash, sizeof(i->hash)); | |
208 | if (diff < 0) | |
209 | p = &(*p)->rb_left; | |
210 | else if (diff > 0) | |
211 | p = &(*p)->rb_right; | |
212 | else { | |
213 | int ret; | |
214 | ||
215 | if (!collision_check) | |
216 | goto add; | |
217 | ||
218 | fio_mutex_up(rb_lock); | |
219 | ret = col_check(c, i); | |
220 | fio_mutex_down(rb_lock); | |
221 | ||
222 | if (!ret) | |
223 | goto add; | |
224 | ||
225 | p = &(*p)->rb_right; | |
226 | } | |
227 | } | |
228 | ||
5a155943 | 229 | c = alloc_chunk(); |
bf481692 | 230 | RB_CLEAR_NODE(&c->rb_node); |
bf481692 JA |
231 | c->count = 0; |
232 | memcpy(c->hash, i->hash, sizeof(i->hash)); | |
233 | rb_link_node(&c->rb_node, parent, p); | |
234 | rb_insert_color(&c->rb_node, &rb_root); | |
235 | add: | |
236 | add_item(c, i); | |
237 | } | |
238 | ||
fa88cd09 JA |
239 | static void insert_chunks(struct item *items, unsigned int nitems, |
240 | uint64_t *ndupes) | |
bf481692 JA |
241 | { |
242 | int i; | |
243 | ||
244 | fio_mutex_down(rb_lock); | |
245 | ||
fa88cd09 JA |
246 | for (i = 0; i < nitems; i++) { |
247 | if (bloom) { | |
248 | unsigned int s; | |
249 | int r; | |
250 | ||
251 | s = sizeof(items[i].hash) / sizeof(uint32_t); | |
252 | r = bloom_set(bloom, items[i].hash, s); | |
253 | *ndupes += r; | |
254 | } else | |
255 | insert_chunk(&items[i]); | |
256 | } | |
bf481692 JA |
257 | |
258 | fio_mutex_up(rb_lock); | |
259 | } | |
260 | ||
261 | static void crc_buf(void *buf, uint32_t *hash) | |
262 | { | |
263 | struct fio_md5_ctx ctx = { .hash = hash }; | |
264 | ||
265 | fio_md5_init(&ctx); | |
266 | fio_md5_update(&ctx, buf, blocksize); | |
267 | fio_md5_final(&ctx); | |
268 | } | |
269 | ||
13793d02 JA |
270 | static unsigned int read_blocks(int fd, void *buf, off_t offset, size_t size) |
271 | { | |
272 | if (__read_block(fd, buf, offset, size)) | |
273 | return 0; | |
274 | ||
275 | return size / blocksize; | |
276 | } | |
277 | ||
bf481692 JA |
278 | static int do_work(struct worker_thread *thread, void *buf) |
279 | { | |
280 | unsigned int nblocks, i; | |
281 | off_t offset; | |
13793d02 | 282 | int nitems = 0; |
fa88cd09 | 283 | uint64_t ndupes = 0; |
bf481692 JA |
284 | struct item *items; |
285 | ||
bf481692 | 286 | offset = thread->cur_offset; |
13793d02 JA |
287 | |
288 | nblocks = read_blocks(thread->fd, buf, offset, min(thread->size, (uint64_t)chunk_size)); | |
289 | if (!nblocks) | |
290 | return 1; | |
291 | ||
bf481692 JA |
292 | items = malloc(sizeof(*items) * nblocks); |
293 | ||
294 | for (i = 0; i < nblocks; i++) { | |
13793d02 JA |
295 | void *thisptr = buf + (i * blocksize); |
296 | ||
356c552d | 297 | items[i].offset = offset; |
13793d02 | 298 | crc_buf(thisptr, items[i].hash); |
bf481692 JA |
299 | offset += blocksize; |
300 | nitems++; | |
301 | } | |
302 | ||
fa88cd09 JA |
303 | insert_chunks(items, nitems, &ndupes); |
304 | ||
bf481692 | 305 | free(items); |
fa88cd09 JA |
306 | thread->items += nitems; |
307 | thread->dupes += ndupes; | |
13793d02 | 308 | return 0; |
bf481692 JA |
309 | } |
310 | ||
311 | static void *thread_fn(void *data) | |
312 | { | |
313 | struct worker_thread *thread = data; | |
314 | void *buf; | |
315 | ||
13793d02 | 316 | buf = fio_memalign(blocksize, chunk_size); |
bf481692 JA |
317 | |
318 | do { | |
319 | if (get_work(&thread->cur_offset, &thread->size)) { | |
320 | thread->err = 1; | |
321 | break; | |
322 | } | |
323 | if (do_work(thread, buf)) { | |
324 | thread->err = 1; | |
325 | break; | |
326 | } | |
327 | } while (1); | |
328 | ||
4d53d6c0 | 329 | thread->done = 1; |
13793d02 | 330 | fio_memfree(buf, chunk_size); |
bf481692 JA |
331 | return NULL; |
332 | } | |
333 | ||
5a155943 JA |
334 | static void show_progress(struct worker_thread *threads, unsigned long total) |
335 | { | |
336 | unsigned long last_nitems = 0; | |
8b6a404c | 337 | struct timespec last_tv; |
5a155943 JA |
338 | |
339 | fio_gettime(&last_tv, NULL); | |
340 | ||
341 | while (print_progress) { | |
342 | unsigned long this_items; | |
343 | unsigned long nitems = 0; | |
344 | uint64_t tdiff; | |
345 | float perc; | |
423adcc7 | 346 | int some_done = 0; |
5a155943 JA |
347 | int i; |
348 | ||
349 | for (i = 0; i < num_threads; i++) { | |
350 | nitems += threads[i].items; | |
351 | some_done = threads[i].done; | |
352 | if (some_done) | |
353 | break; | |
354 | } | |
355 | ||
356 | if (some_done) | |
357 | break; | |
358 | ||
359 | perc = (float) nitems / (float) total; | |
360 | perc *= 100.0; | |
361 | this_items = nitems - last_nitems; | |
362 | this_items *= blocksize; | |
363 | tdiff = mtime_since_now(&last_tv); | |
364 | if (tdiff) { | |
45b56027 | 365 | this_items = (this_items * 1000) / (tdiff * 1024); |
4870138d | 366 | printf("%3.2f%% done (%luKiB/sec)\r", perc, this_items); |
5a155943 JA |
367 | last_nitems = nitems; |
368 | fio_gettime(&last_tv, NULL); | |
369 | } else | |
370 | printf("%3.2f%% done\r", perc); | |
371 | fflush(stdout); | |
372 | usleep(250000); | |
373 | }; | |
374 | } | |
375 | ||
d08a6886 JA |
376 | static int run_dedupe_threads(struct fio_file *f, uint64_t dev_size, |
377 | uint64_t *nextents, uint64_t *nchunks) | |
bf481692 JA |
378 | { |
379 | struct worker_thread *threads; | |
4d53d6c0 | 380 | unsigned long nitems, total_items; |
bf481692 JA |
381 | int i, err = 0; |
382 | ||
383 | total_size = dev_size; | |
4d53d6c0 | 384 | total_items = dev_size / blocksize; |
bf481692 JA |
385 | cur_offset = 0; |
386 | size_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); | |
387 | ||
388 | threads = malloc(num_threads * sizeof(struct worker_thread)); | |
389 | for (i = 0; i < num_threads; i++) { | |
60a5a18b | 390 | memset(&threads[i], 0, sizeof(struct worker_thread)); |
d08a6886 | 391 | threads[i].fd = f->fd; |
bf481692 JA |
392 | |
393 | err = pthread_create(&threads[i].thread, NULL, thread_fn, &threads[i]); | |
394 | if (err) { | |
395 | log_err("fio: thread startup failed\n"); | |
396 | break; | |
397 | } | |
398 | } | |
399 | ||
5a155943 | 400 | show_progress(threads, total_items); |
4d53d6c0 | 401 | |
bf481692 | 402 | nitems = 0; |
fa88cd09 JA |
403 | *nextents = 0; |
404 | *nchunks = 1; | |
bf481692 JA |
405 | for (i = 0; i < num_threads; i++) { |
406 | void *ret; | |
407 | pthread_join(threads[i].thread, &ret); | |
408 | nitems += threads[i].items; | |
fa88cd09 | 409 | *nchunks += threads[i].dupes; |
bf481692 JA |
410 | } |
411 | ||
412 | printf("Threads(%u): %lu items processed\n", num_threads, nitems); | |
413 | ||
fa88cd09 JA |
414 | *nextents = nitems; |
415 | *nchunks = nitems - *nchunks; | |
416 | ||
bf481692 | 417 | fio_mutex_remove(size_lock); |
5a155943 | 418 | free(threads); |
bf481692 JA |
419 | return err; |
420 | } | |
421 | ||
fa88cd09 JA |
422 | static int dedupe_check(const char *filename, uint64_t *nextents, |
423 | uint64_t *nchunks) | |
bf481692 JA |
424 | { |
425 | uint64_t dev_size; | |
426 | struct stat sb; | |
427 | int flags; | |
428 | ||
429 | flags = O_RDONLY; | |
430 | if (odirect) | |
a0ae50a6 | 431 | flags |= OS_O_DIRECT; |
bf481692 | 432 | |
d08a6886 JA |
433 | memset(&file, 0, sizeof(file)); |
434 | file.file_name = strdup(filename); | |
435 | ||
436 | file.fd = open(filename, flags); | |
437 | if (file.fd == -1) { | |
bf481692 | 438 | perror("open"); |
d08a6886 | 439 | goto err; |
bf481692 JA |
440 | } |
441 | ||
d08a6886 | 442 | if (fstat(file.fd, &sb) < 0) { |
bf481692 | 443 | perror("fstat"); |
d08a6886 | 444 | goto err; |
bf481692 JA |
445 | } |
446 | ||
d08a6886 JA |
447 | dev_size = get_size(&file, &sb); |
448 | if (!dev_size) | |
449 | goto err; | |
bf481692 | 450 | |
fa88cd09 JA |
451 | if (use_bloom) { |
452 | uint64_t bloom_entries; | |
453 | ||
bc095aab | 454 | bloom_entries = 8 * (dev_size / blocksize); |
fa88cd09 JA |
455 | bloom = bloom_new(bloom_entries); |
456 | } | |
457 | ||
f7bd62dd | 458 | printf("Will check <%s>, size <%llu>, using %u threads\n", filename, (unsigned long long) dev_size, num_threads); |
bf481692 | 459 | |
d08a6886 JA |
460 | return run_dedupe_threads(&file, dev_size, nextents, nchunks); |
461 | err: | |
462 | if (file.fd != -1) | |
463 | close(file.fd); | |
464 | free(file.file_name); | |
465 | return 1; | |
bf481692 JA |
466 | } |
467 | ||
468 | static void show_chunk(struct chunk *c) | |
469 | { | |
470 | struct flist_head *n; | |
471 | struct extent *e; | |
472 | ||
761c2729 | 473 | printf("c hash %8x %8x %8x %8x, count %lu\n", c->hash[0], c->hash[1], c->hash[2], c->hash[3], (unsigned long) c->count); |
5a155943 | 474 | flist_for_each(n, &c->extent_list[0]) { |
bf481692 | 475 | e = flist_entry(n, struct extent, list); |
761c2729 | 476 | printf("\toffset %llu\n", (unsigned long long) e->offset); |
bf481692 JA |
477 | } |
478 | } | |
479 | ||
fa88cd09 | 480 | static void show_stat(uint64_t nextents, uint64_t nchunks) |
bf481692 | 481 | { |
9ca144fe | 482 | double perc, ratio; |
bf481692 | 483 | |
fa88cd09 | 484 | printf("Extents=%lu, Unique extents=%lu\n", (unsigned long) nextents, (unsigned long) nchunks); |
15e3ca5b JA |
485 | |
486 | if (nchunks) { | |
487 | ratio = (double) nextents / (double) nchunks; | |
488 | printf("De-dupe ratio: 1:%3.2f\n", ratio - 1.0); | |
489 | } else | |
490 | printf("De-dupe ratio: 1:infinite\n"); | |
fa88cd09 JA |
491 | |
492 | perc = 1.00 - ((double) nchunks / (double) nextents); | |
493 | perc *= 100.0; | |
494 | printf("Fio setting: dedupe_percentage=%u\n", (int) (perc + 0.50)); | |
495 | ||
496 | } | |
497 | ||
498 | static void iter_rb_tree(uint64_t *nextents, uint64_t *nchunks) | |
499 | { | |
e96c0125 | 500 | struct fio_rb_node *n; |
fa88cd09 JA |
501 | |
502 | *nchunks = *nextents = 0; | |
bf481692 JA |
503 | |
504 | n = rb_first(&rb_root); | |
505 | if (!n) | |
506 | return; | |
507 | ||
508 | do { | |
509 | struct chunk *c; | |
510 | ||
511 | c = rb_entry(n, struct chunk, rb_node); | |
fa88cd09 JA |
512 | (*nchunks)++; |
513 | *nextents += c->count; | |
bf481692 JA |
514 | |
515 | if (dump_output) | |
516 | show_chunk(c); | |
517 | ||
518 | } while ((n = rb_next(n)) != NULL); | |
bf481692 JA |
519 | } |
520 | ||
521 | static int usage(char *argv[]) | |
522 | { | |
523 | log_err("Check for dedupable blocks on a device/file\n\n"); | |
524 | log_err("%s: [options] <device or file>\n", argv[0]); | |
525 | log_err("\t-b\tChunk size to use\n"); | |
526 | log_err("\t-t\tNumber of threads to use\n"); | |
527 | log_err("\t-d\tFull extent/chunk debug output\n"); | |
528 | log_err("\t-o\tUse O_DIRECT\n"); | |
529 | log_err("\t-c\tFull collision check\n"); | |
fa88cd09 | 530 | log_err("\t-B\tUse probabilistic bloom filter\n"); |
4d53d6c0 | 531 | log_err("\t-p\tPrint progress indicator\n"); |
bf481692 JA |
532 | return 1; |
533 | } | |
534 | ||
535 | int main(int argc, char *argv[]) | |
536 | { | |
9b36c122 | 537 | uint64_t nextents = 0, nchunks = 0; |
bf481692 JA |
538 | int c, ret; |
539 | ||
71471cb1 | 540 | arch_init(argv); |
9077ee4a JA |
541 | debug_init(); |
542 | ||
fa88cd09 | 543 | while ((c = getopt(argc, argv, "b:t:d:o:c:p:B:")) != -1) { |
bf481692 JA |
544 | switch (c) { |
545 | case 'b': | |
546 | blocksize = atoi(optarg); | |
547 | break; | |
548 | case 't': | |
549 | num_threads = atoi(optarg); | |
550 | break; | |
551 | case 'd': | |
552 | dump_output = atoi(optarg); | |
553 | break; | |
554 | case 'o': | |
555 | odirect = atoi(optarg); | |
556 | break; | |
557 | case 'c': | |
558 | collision_check = atoi(optarg); | |
559 | break; | |
4d53d6c0 JA |
560 | case 'p': |
561 | print_progress = atoi(optarg); | |
562 | break; | |
fa88cd09 JA |
563 | case 'B': |
564 | use_bloom = atoi(optarg); | |
565 | break; | |
bf481692 JA |
566 | case '?': |
567 | default: | |
568 | return usage(argv); | |
569 | } | |
570 | } | |
571 | ||
fa88cd09 JA |
572 | if (collision_check || dump_output) |
573 | use_bloom = 0; | |
574 | ||
bf481692 JA |
575 | if (!num_threads) |
576 | num_threads = cpus_online(); | |
577 | ||
578 | if (argc == optind) | |
579 | return usage(argv); | |
580 | ||
581 | sinit(); | |
582 | ||
583 | rb_root = RB_ROOT; | |
584 | rb_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); | |
585 | ||
fa88cd09 JA |
586 | ret = dedupe_check(argv[optind], &nextents, &nchunks); |
587 | ||
179b3ead JA |
588 | if (!ret) { |
589 | if (!bloom) | |
590 | iter_rb_tree(&nextents, &nchunks); | |
bf481692 | 591 | |
179b3ead JA |
592 | show_stat(nextents, nchunks); |
593 | } | |
bf481692 | 594 | |
1956a9e5 | 595 | fio_mutex_remove(rb_lock); |
9b36c122 JA |
596 | if (bloom) |
597 | bloom_free(bloom); | |
bf481692 JA |
598 | scleanup(); |
599 | return ret; | |
600 | } |