* just scans the filename for extents of the given size, checksums them,
* and orders them up.
*/
+#include <fcntl.h>
+#include <inttypes.h>
#include <stdio.h>
-#include <stdio.h>
+#include <string.h>
#include <unistd.h>
-#include <inttypes.h>
-#include <assert.h>
-#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/ioctl.h>
-#include <fcntl.h>
-#include <string.h>
-#include "../lib/rbtree.h"
+#include "../fio.h"
#include "../flist.h"
#include "../log.h"
-#include "../mutex.h"
+#include "../fio_sem.h"
#include "../smalloc.h"
#include "../minmax.h"
#include "../crc/md5.h"
-#include "../memalign.h"
#include "../os/os.h"
#include "../gettime.h"
#include "../fio_time.h"
+#include "../lib/rbtree.h"
#include "../lib/bloom.h"
#include "debug.h"
};
struct chunk {
- struct rb_node rb_node;
+ struct fio_rb_node rb_node;
uint64_t count;
uint32_t hash[MD5_HASH_WORDS];
struct flist_head extent_list[0];
static struct rb_root rb_root;
static struct bloom *bloom;
-static struct fio_mutex *rb_lock;
+static struct fio_sem *rb_lock;
static unsigned int blocksize = 4096;
static unsigned int num_threads;
static uint64_t total_size;
static uint64_t cur_offset;
-static struct fio_mutex *size_lock;
+static struct fio_sem *size_lock;
static struct fio_file file;
uint64_t ret;
if (S_ISBLK(sb->st_mode)) {
- unsigned long long bytes;
+ unsigned long long bytes = 0;
if (blockdev_size(f, &bytes)) {
log_err("dedupe: failed getting bdev size\n");
uint64_t this_chunk;
int ret = 1;
- fio_mutex_down(size_lock);
+ fio_sem_down(size_lock);
if (cur_offset < total_size) {
*offset = cur_offset;
ret = 0;
}
- fio_mutex_up(size_lock);
+ fio_sem_up(size_lock);
return ret;
}
char *cbuf, *ibuf;
int ret = 1;
- cbuf = fio_memalign(blocksize, blocksize);
- ibuf = fio_memalign(blocksize, blocksize);
+ cbuf = fio_memalign(blocksize, blocksize, false);
+ ibuf = fio_memalign(blocksize, blocksize, false);
e = flist_entry(c->extent_list[0].next, struct extent, list);
if (read_block(file.fd, cbuf, e->offset))
ret = memcmp(ibuf, cbuf, blocksize);
out:
- fio_memfree(cbuf, blocksize);
- fio_memfree(ibuf, blocksize);
+ fio_memfree(cbuf, blocksize, false);
+ fio_memfree(ibuf, blocksize, false);
return ret;
}
static void insert_chunk(struct item *i)
{
- struct rb_node **p, *parent;
+ struct fio_rb_node **p, *parent;
struct chunk *c;
int diff;
if (!collision_check)
goto add;
- fio_mutex_up(rb_lock);
+ fio_sem_up(rb_lock);
ret = col_check(c, i);
- fio_mutex_down(rb_lock);
+ fio_sem_down(rb_lock);
if (!ret)
goto add;
{
int i;
- fio_mutex_down(rb_lock);
+ fio_sem_down(rb_lock);
for (i = 0; i < nitems; i++) {
if (bloom) {
insert_chunk(&items[i]);
}
- fio_mutex_up(rb_lock);
+ fio_sem_up(rb_lock);
}
static void crc_buf(void *buf, uint32_t *hash)
for (i = 0; i < nblocks; i++) {
void *thisptr = buf + (i * blocksize);
- if (items)
- items[i].offset = offset;
+ items[i].offset = offset;
crc_buf(thisptr, items[i].hash);
offset += blocksize;
nitems++;
struct worker_thread *thread = data;
void *buf;
- buf = fio_memalign(blocksize, chunk_size);
+ buf = fio_memalign(blocksize, chunk_size, false);
do {
if (get_work(&thread->cur_offset, &thread->size)) {
} while (1);
thread->done = 1;
- fio_memfree(buf, chunk_size);
+ fio_memfree(buf, chunk_size, false);
return NULL;
}
static void show_progress(struct worker_thread *threads, unsigned long total)
{
unsigned long last_nitems = 0;
- struct timeval last_tv;
+ struct timespec last_tv;
fio_gettime(&last_tv, NULL);
tdiff = mtime_since_now(&last_tv);
if (tdiff) {
this_items = (this_items * 1000) / (tdiff * 1024);
- printf("%3.2f%% done (%luKB/sec)\r", perc, this_items);
+ printf("%3.2f%% done (%luKiB/sec)\r", perc, this_items);
last_nitems = nitems;
fio_gettime(&last_tv, NULL);
} else
total_size = dev_size;
total_items = dev_size / blocksize;
cur_offset = 0;
- size_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
+ size_lock = fio_sem_init(FIO_SEM_UNLOCKED);
threads = malloc(num_threads * sizeof(struct worker_thread));
for (i = 0; i < num_threads; i++) {
+ memset(&threads[i], 0, sizeof(struct worker_thread));
threads[i].fd = f->fd;
- threads[i].items = 0;
- threads[i].err = 0;
- threads[i].done = 0;
err = pthread_create(&threads[i].thread, NULL, thread_fn, &threads[i]);
if (err) {
*nextents = nitems;
*nchunks = nitems - *nchunks;
- fio_mutex_remove(size_lock);
+ fio_sem_remove(size_lock);
free(threads);
return err;
}
}
}
-static void show_stat(uint64_t nextents, uint64_t nchunks)
+static void show_stat(uint64_t nextents, uint64_t nchunks, uint64_t ndupextents)
{
double perc, ratio;
- printf("Extents=%lu, Unique extents=%lu\n", (unsigned long) nextents, (unsigned long) nchunks);
+ printf("Extents=%lu, Unique extents=%lu", (unsigned long) nextents, (unsigned long) nchunks);
+ if (!bloom)
+ printf(" Duplicated extents=%lu", (unsigned long) ndupextents);
+ printf("\n");
if (nchunks) {
ratio = (double) nextents / (double) nchunks;
} else
printf("De-dupe ratio: 1:infinite\n");
+ if (ndupextents)
+ printf("De-dupe working set at least: %3.2f%%\n", 100.0 * (double) ndupextents / (double) nextents);
+
perc = 1.00 - ((double) nchunks / (double) nextents);
perc *= 100.0;
printf("Fio setting: dedupe_percentage=%u\n", (int) (perc + 0.50));
}
-static void iter_rb_tree(uint64_t *nextents, uint64_t *nchunks)
+static void iter_rb_tree(uint64_t *nextents, uint64_t *nchunks, uint64_t *ndupextents)
{
- struct rb_node *n;
+ struct fio_rb_node *n;
- *nchunks = *nextents = 0;
+ *nchunks = *nextents = *ndupextents = 0;
n = rb_first(&rb_root);
if (!n)
c = rb_entry(n, struct chunk, rb_node);
(*nchunks)++;
*nextents += c->count;
+ *ndupextents += (c->count > 1);
if (dump_output)
show_chunk(c);
int main(int argc, char *argv[])
{
- uint64_t nextents = 0, nchunks = 0;
+ uint64_t nextents = 0, nchunks = 0, ndupextents = 0;
int c, ret;
+ arch_init(argv);
debug_init();
while ((c = getopt(argc, argv, "b:t:d:o:c:p:B:")) != -1) {
sinit();
rb_root = RB_ROOT;
- rb_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
+ rb_lock = fio_sem_init(FIO_SEM_UNLOCKED);
ret = dedupe_check(argv[optind], &nextents, &nchunks);
if (!ret) {
if (!bloom)
- iter_rb_tree(&nextents, &nchunks);
+ iter_rb_tree(&nextents, &nchunks, &ndupextents);
- show_stat(nextents, nchunks);
+ show_stat(nextents, nchunks, ndupextents);
}
- fio_mutex_remove(rb_lock);
+ fio_sem_remove(rb_lock);
if (bloom)
bloom_free(bloom);
scleanup();