21
21
#include < chopper/sketch/check_filenames.hpp>
22
22
#include < chopper/sketch/output.hpp>
23
23
#include < chopper/sketch/read_data_file.hpp>
24
+ #include < chopper/sketch/sketch_file.hpp>
24
25
25
26
#include < hibf/sketch/compute_sketches.hpp>
26
27
27
28
namespace chopper
28
29
{
29
30
31
+ void validate_configuration (sharg::parser & parser,
32
+ chopper::configuration & config,
33
+ chopper::configuration const & sketch_config)
34
+ {
35
+ if (parser.is_option_set (" sketch-bits" ))
36
+ throw sharg::parser_error{" You cannot set --sketch-bits when using a sketch file as input." };
37
+
38
+ if (parser.is_option_set (" kmer" ) && config.k != sketch_config.k )
39
+ {
40
+ std::cerr << sharg::detail::to_string (
41
+ " [WARNING] Given k-mer size (" ,
42
+ config.k ,
43
+ " ) differs from k-mer size in the sketch file (" ,
44
+ config.k ,
45
+ " ). The results may be suboptimal. If this was a conscious decision, you can ignore this warning.\n " );
46
+ }
47
+ }
48
+
30
49
int chopper_layout (chopper::configuration & config, sharg::parser & parser)
31
50
{
32
51
parser.parse ();
@@ -36,41 +55,70 @@ int chopper_layout(chopper::configuration & config, sharg::parser & parser)
36
55
else if (config.k > config.window_size )
37
56
throw sharg::parser_error{" The k-mer size cannot be bigger than the window size." };
38
57
58
+ auto has_sketch_file_extension = [](std::filesystem::path const & path)
59
+ {
60
+ return path.string ().ends_with (" .sketch" ) || path.string ().ends_with (" .sketches" );
61
+ };
62
+
39
63
config.disable_sketch_output = !parser.is_option_set (" output-sketches-to" );
64
+ if (!config.disable_sketch_output && !has_sketch_file_extension (config.sketch_directory ))
65
+ throw sharg::parser_error{" The sketch output file must have the extension \" .sketch\" or \" .sketches\" ." };
66
+
67
+ bool const input_is_a_sketch_file = has_sketch_file_extension (config.data_file );
40
68
41
69
int exit_code{};
42
70
43
71
std::vector<std::vector<std::string>> filenames{};
72
+ std::vector<seqan::hibf::sketch::hyperloglog> sketches{};
44
73
45
- chopper::sketch::read_data_file (config, filenames);
74
+ if (input_is_a_sketch_file)
75
+ {
76
+ chopper::sketch::sketch_file sin {};
77
+
78
+ { // Deserialization is guaranteed to be complete when going out of scope.
79
+ std::ifstream is{config.data_file };
80
+ cereal::BinaryInputArchive iarchive{is};
81
+ iarchive (sin );
82
+ }
46
83
47
- std::vector<seqan::hibf::sketch::hyperloglog> sketches;
84
+ filenames = std::move (sin .filenames ); // No need to call check_filenames because the files are not read.
85
+ sketches = std::move (sin .hll_sketches );
86
+ validate_configuration (parser, config, sin .chopper_config );
87
+ }
88
+ else
89
+ {
90
+ chopper::sketch::read_data_file (config, filenames);
48
91
49
- if (filenames.empty ())
50
- throw sharg::parser_error{
51
- sharg::detail::to_string (" The file " , config.data_file .string (), " appears to be empty." )};
92
+ if (filenames.empty ())
93
+ throw sharg::parser_error{
94
+ sharg::detail::to_string (" The file " , config.data_file .string (), " appears to be empty." )};
52
95
53
- chopper::sketch::check_filenames (filenames, config);
96
+ // Files need to exist because they will be read for sketching.
97
+ chopper::sketch::check_filenames (filenames, config);
98
+ }
54
99
55
100
config.hibf_config .input_fn =
56
101
chopper::input_functor{filenames, config.precomputed_files , config.k , config.window_size };
57
102
config.hibf_config .number_of_user_bins = filenames.size ();
58
103
config.hibf_config .validate_and_set_defaults ();
59
104
60
- config.compute_sketches_timer .start ();
61
- seqan::hibf::sketch::compute_sketches (config.hibf_config , sketches);
62
- config.compute_sketches_timer .stop ();
105
+ if (!input_is_a_sketch_file)
106
+ {
107
+ config.compute_sketches_timer .start ();
108
+ seqan::hibf::sketch::compute_sketches (config.hibf_config , sketches);
109
+ config.compute_sketches_timer .stop ();
110
+ }
63
111
64
112
exit_code |= chopper::layout::execute (config, filenames, sketches);
65
113
66
114
if (!config.disable_sketch_output )
67
115
{
68
- if (! std::filesystem::exists (config. sketch_directory ))
69
- std::filesystem::create_directory (config. sketch_directory );
70
-
71
- assert (filenames. size () == sketches. size ()) ;
72
- for ( size_t i = 0 ; i < filenames. size (); ++i)
73
- chopper::sketch::write_sketch_file (filenames[i][ 0 ], sketches[i], config );
116
+ chopper::sketch::sketch_file sout{. chopper_config = config,
117
+ . filenames = std::move (filenames),
118
+ . hll_sketches = std::move (sketches)};
119
+ std::ofstream os{config. sketch_directory , std::ios::binary} ;
120
+ cereal::BinaryOutputArchive oarchive{os};
121
+ oarchive (sout );
74
122
}
75
123
76
124
if (!config.output_timings .empty ())
0 commit comments