Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unaligned Deduplication support #1747

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,11 @@ int convert_thread_options_to_cpu(struct thread_options *o,
o->compress_percentage = le32_to_cpu(top->compress_percentage);
o->compress_chunk = le32_to_cpu(top->compress_chunk);
o->dedupe_percentage = le32_to_cpu(top->dedupe_percentage);
o->dedupe_unaligned_percentage = le32_to_cpu(top->dedupe_unaligned_percentage);
o->dedupe_mode = le32_to_cpu(top->dedupe_mode);
o->dedupe_working_set_percentage = le32_to_cpu(top->dedupe_working_set_percentage);
o->dedupe_global = le32_to_cpu(top->dedupe_global);
o->dedupe_working_set_ua_percentage = le32_to_cpu(top->dedupe_working_set_ua_percentage);
o->block_error_hist = le32_to_cpu(top->block_error_hist);
o->replay_align = le32_to_cpu(top->replay_align);
o->replay_scale = le32_to_cpu(top->replay_scale);
Expand Down Expand Up @@ -544,9 +546,11 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
top->compress_percentage = cpu_to_le32(o->compress_percentage);
top->compress_chunk = cpu_to_le32(o->compress_chunk);
top->dedupe_percentage = cpu_to_le32(o->dedupe_percentage);
top->dedupe_unaligned_percentage = cpu_to_le32(o->dedupe_unaligned_percentage);
top->dedupe_mode = cpu_to_le32(o->dedupe_mode);
top->dedupe_working_set_percentage = cpu_to_le32(o->dedupe_working_set_percentage);
top->dedupe_global = cpu_to_le32(o->dedupe_global);
top->dedupe_working_set_ua_percentage = cpu_to_le32(o->dedupe_working_set_ua_percentage);
top->block_error_hist = cpu_to_le32(o->block_error_hist);
top->replay_align = cpu_to_le32(o->replay_align);
top->replay_scale = cpu_to_le32(o->replay_scale);
Expand Down
71 changes: 59 additions & 12 deletions dedupe.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "fio.h"
#include "dedupe.h"

/**
* initializes the global dedup workset.
Expand All @@ -18,13 +19,49 @@ int init_global_dedupe_working_set_seeds(void)
return 0;
}

int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup)
static int init_dedupe_working_set_seeds_int2(struct thread_data *td, bool global_dedup, struct frand_state *dedupe_working_set_state)
{
int tindex;
struct thread_data *td_seed;
unsigned long long i, j, num_seed_advancements, pages_per_seed;
struct frand_state dedupe_working_set_state = {0};
unsigned long long i, j, num_seed_advancements;

if (!td->o.dedupe_unaligned_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
return 0;

num_seed_advancements = td->o.min_bs[DDIR_WRITE] /
min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk);
/*
* The dedupe working set keeps seeds of unique data (generated by buf_state).
* Dedupe-ed pages will be generated using those seeds.
*/
td->num_unique_pages_ua = (td->o.size * (unsigned long long)td->o.dedupe_working_set_ua_percentage / 100) / td->o.min_bs[DDIR_WRITE];

td->dedupe_working_set_states_ua = malloc(sizeof(struct frand_state) * td->num_unique_pages_ua);
if (!td->dedupe_working_set_states_ua) {
log_err("fio: could not allocate UA dedupe working set\n");
return 1;
}

for (i = 0; i < td->num_unique_pages_ua; i++) {
frand_copy(&td->dedupe_working_set_states_ua[i], dedupe_working_set_state);
/*
* When compression is used the seed is advanced multiple times to
* generate the buffer. We want to regenerate the same buffer when
* deduping against this page
*/
for (j = 0; j < num_seed_advancements; j++){
__get_next_seed(dedupe_working_set_state);
}
}

return 0;

}


static int init_dedupe_working_set_seeds_int1(struct thread_data *td, bool global_dedup, struct frand_state *dedupe_working_set_state)
{
unsigned long long i, j, num_seed_advancements, pages_per_seed;
unsigned int tindex;
struct thread_data *td_seed;
if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
return 0;

Expand All @@ -42,17 +79,16 @@ int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup)
return 1;
}

frand_copy(&dedupe_working_set_state, &td->buf_state);
frand_copy(&td->dedupe_working_set_states[0], &dedupe_working_set_state);
pages_per_seed = max(td->num_unique_pages / thread_number, 1ull);
for (i = 1; i < td->num_unique_pages; i++) {
for (i = 0; i < td->num_unique_pages; i++) {
frand_copy(&td->dedupe_working_set_states[i], dedupe_working_set_state);
/*
* When compression is used the seed is advanced multiple times to
* generate the buffer. We want to regenerate the same buffer when
* deduping against this page
*/
for (j = 0; j < num_seed_advancements; j++)
__get_next_seed(&dedupe_working_set_state);
__get_next_seed(dedupe_working_set_state);

/*
* When global dedup is used, we rotate the seeds to allow
Expand All @@ -61,11 +97,22 @@ int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup)
*/
if (global_dedup && i % pages_per_seed == 0) {
td_seed = tnumber_to_td(++tindex % thread_number);
frand_copy(&dedupe_working_set_state, &td_seed->buf_state);
frand_copy(dedupe_working_set_state, &td_seed->buf_state);
}

frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);

}

return 0;

}
int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup)
{
struct frand_state dedupe_working_set_state = {0};

frand_copy(&dedupe_working_set_state, &td->buf_state);

init_dedupe_working_set_seeds_int1(td, global_dedup, &dedupe_working_set_state);
init_dedupe_working_set_seeds_int2(td, global_dedup, &dedupe_working_set_state);

return 0;
}
4 changes: 4 additions & 0 deletions fio.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ enum {
FIO_RAND_POISSON2_OFF,
FIO_RAND_POISSON3_OFF,
FIO_RAND_PRIO_CMDS,
FIO_RAND_DEDUPE_UNALIGNED,
FIO_RAND_DEDUPE_WORKING_SET_IX,
FIO_RAND_FDP_OFF,
FIO_RAND_NR_OFFS,
Expand Down Expand Up @@ -281,10 +282,13 @@ struct thread_data {
struct frand_state dedupe_state;
struct frand_state zone_state;
struct frand_state prio_state;
struct frand_state dedupe_unaligned_state;
struct frand_state dedupe_working_set_index_state;
struct frand_state *dedupe_working_set_states;
struct frand_state *dedupe_working_set_states_ua;

unsigned long long num_unique_pages;
unsigned long long num_unique_pages_ua;

struct zone_split_index **zone_state_index;
unsigned int num_write_zones;
Expand Down
1 change: 1 addition & 0 deletions init.c
Original file line number Diff line number Diff line change
Expand Up @@ -1083,6 +1083,7 @@ void td_fill_rand_seeds(struct thread_data *td)
init_rand_seed(&td->dedupe_state, td->rand_seeds[FIO_DEDUPE_OFF], false);
init_rand_seed(&td->zone_state, td->rand_seeds[FIO_RAND_ZONE_OFF], false);
init_rand_seed(&td->prio_state, td->rand_seeds[FIO_RAND_PRIO_CMDS], false);
init_rand_seed(&td->dedupe_unaligned_state, td->rand_seeds[FIO_RAND_DEDUPE_UNALIGNED], false);
init_rand_seed(&td->dedupe_working_set_index_state, td->rand_seeds[FIO_RAND_DEDUPE_WORKING_SET_IX], use64);

init_rand_seed(&td->random_state, td->rand_seeds[FIO_RAND_BLOCK_OFF], use64);
Expand Down
60 changes: 54 additions & 6 deletions io_u.c
Original file line number Diff line number Diff line change
Expand Up @@ -2321,7 +2321,7 @@ static struct frand_state *get_buf_state(struct thread_data *td)

v = rand_between(&td->dedupe_state, 1, 100);

if (v <= td->o.dedupe_percentage)
if (v <= td->o.dedupe_percentage) {
switch (td->o.dedupe_mode) {
case DEDUPE_MODE_REPEAT:
/*
Expand All @@ -2340,7 +2340,7 @@ static struct frand_state *get_buf_state(struct thread_data *td)
log_err("unexpected dedupe mode %u\n", td->o.dedupe_mode);
assert(0);
}

}
return &td->buf_state;
}

Expand All @@ -2352,6 +2352,45 @@ static void save_buf_state(struct thread_data *td, struct frand_state *rs)
frand_copy(&td->buf_state_prev, rs);
}

/**
* shifts the buffer
*/
static unsigned long long shift_buf_unaligned_dedup(struct thread_data *td,
void **buf, unsigned long long min_write, unsigned long long left)
{
unsigned long long skip;
unsigned int v;
unsigned long long this_write;
unsigned long long i;

v = rand_between(&td->dedupe_unaligned_state, 1, 100);
if (v <= td->o.dedupe_unaligned_percentage) {

i = rand_between(&td->dedupe_working_set_index_state, 0, td->num_unique_pages_ua - 1);
frand_copy(&td->buf_state_ret, &td->dedupe_working_set_states_ua[i]);

skip = rand_between(&td->dedupe_unaligned_state, 1 , 4095);
do {
min_write = min(min_write, left);
min_write = min(min_write, skip);

this_write = min_not_zero(min_write,
(unsigned long long) td->o.compress_chunk);

fill_random_buf_percentage(&td->dedupe_unaligned_state, *buf, td->o.compress_percentage,
this_write, this_write,
td->o.buffer_pattern,
td->o.buffer_pattern_bytes);

*buf += this_write;
left -= this_write;
skip -= this_write;
} while (left && skip);
}

return left;
}

void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_write,
unsigned long long max_bs)
{
Expand All @@ -2360,12 +2399,24 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_wr
if (o->mem_type == MEM_CUDA_MALLOC)
return;

if (o->compress_percentage || o->dedupe_percentage) {
if (o->compress_percentage || o->dedupe_percentage || o->dedupe_unaligned_percentage) {
unsigned int perc = td->o.compress_percentage;
struct frand_state *rs = NULL;
unsigned long long left = max_bs;
unsigned long long this_write;

rs = get_buf_state(td);

// if aligned didnt work try ua
if (o->dedupe_unaligned_percentage && rs != &td->buf_state_ret) {
unsigned long long shift = left - shift_buf_unaligned_dedup(td, &buf, min_write, left);
left -= shift;

// if we did ua ddp lets use the seed it generated from the ua working set
if (shift){
rs = &td->buf_state_ret;
}
}
do {
/*
* Buffers are either entirely dedupe-able or not.
Expand All @@ -2374,9 +2425,6 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_wr
* means we should retrack the steps we took for compression
* as well.
*/
if (!rs)
rs = get_buf_state(td);

min_write = min(min_write, left);

this_write = min_not_zero(min_write,
Expand Down
25 changes: 25 additions & 0 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -4821,6 +4821,18 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_IO_BUF,
},
{
.name = "dedupe_unaligned_percentage",
.lname = "Dedupe unaligned percentage",
.type = FIO_OPT_INT,
.off1 = offsetof(struct thread_options, dedupe_unaligned_percentage),
.maxval = 100,
.minval = 0,
.help = "Percentage of buffers that are unaligned dedupable (calculated from dedup percentage)",
.interval = 1,
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_IO_BUF,
},
{
.name = "dedupe_mode",
.lname = "Dedupe mode",
Expand All @@ -4842,6 +4854,19 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
},
},
},
{
.name = "dedupe_working_set_ua_percentage",
.lname = "Dedupe working set unaligned percentage",
.help = "Dedupe working set unaligned size in percentages from file or device size used to generate dedupe patterns from",
.type = FIO_OPT_INT,
.off1 = offsetof(struct thread_options, dedupe_working_set_ua_percentage),
.parent = "dedupe_unaligned_percentage",
.def = "5",
.maxval = 100,
.minval = 0,
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_IO_BUF,
},
{
.name = "dedupe_working_set_percentage",
.lname = "Dedupe working set percentage",
Expand Down
4 changes: 4 additions & 0 deletions thread_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,9 @@ struct thread_options {
unsigned int compress_percentage;
unsigned int compress_chunk;
unsigned int dedupe_percentage;
unsigned int dedupe_unaligned_percentage;
unsigned int dedupe_mode;
unsigned int dedupe_working_set_ua_percentage;
unsigned int dedupe_working_set_percentage;
unsigned int dedupe_global;
unsigned int time_based;
Expand Down Expand Up @@ -582,9 +584,11 @@ struct thread_options_pack {
uint32_t compress_percentage;
uint32_t compress_chunk;
uint32_t dedupe_percentage;
uint32_t dedupe_unaligned_percentage;
uint32_t dedupe_mode;
uint32_t dedupe_working_set_percentage;
uint32_t dedupe_global;
uint32_t dedupe_working_set_ua_percentage;
uint32_t time_based;
uint32_t disable_lat;
uint32_t disable_clat;
Expand Down