|
| 1 | +use std::fs; |
| 2 | +use std::io::{self, prelude::*}; |
| 3 | +use std::path; |
| 4 | +use std::process; |
| 5 | + |
| 6 | +use bytesize::ByteSize; |
| 7 | +use clap::ArgEnum; |
| 8 | +use env_logger; |
| 9 | +use log; |
| 10 | + |
| 11 | +use ext_sort::buffer::mem::MemoryLimitedBufferBuilder; |
| 12 | +use ext_sort::{ExternalSorter, ExternalSorterBuilder}; |
| 13 | + |
| 14 | +fn main() { |
| 15 | + let arg_parser = build_arg_parser(); |
| 16 | + |
| 17 | + let log_level: LogLevel = arg_parser.value_of_t_or_exit("log_level"); |
| 18 | + init_logger(log_level); |
| 19 | + |
| 20 | + let order: Order = arg_parser.value_of_t_or_exit("sort"); |
| 21 | + let tmp_dir: Option<&str> = arg_parser.value_of("tmp_dir"); |
| 22 | + let chunk_size = arg_parser.value_of("chunk_size").expect("value is required"); |
| 23 | + let threads: Option<usize> = arg_parser |
| 24 | + .is_present("threads") |
| 25 | + .then(|| arg_parser.value_of_t_or_exit("threads")); |
| 26 | + |
| 27 | + let input = arg_parser.value_of("input").expect("value is required"); |
| 28 | + let input_stream = match fs::File::open(input) { |
| 29 | + Ok(file) => io::BufReader::new(file), |
| 30 | + Err(err) => { |
| 31 | + log::error!("input file opening error: {}", err); |
| 32 | + process::exit(1); |
| 33 | + } |
| 34 | + }; |
| 35 | + |
| 36 | + let output = arg_parser.value_of("output").expect("value is required"); |
| 37 | + let mut output_stream = match fs::File::create(output) { |
| 38 | + Ok(file) => io::BufWriter::new(file), |
| 39 | + Err(err) => { |
| 40 | + log::error!("output file creation error: {}", err); |
| 41 | + process::exit(1); |
| 42 | + } |
| 43 | + }; |
| 44 | + |
| 45 | + let mut sorter_builder = ExternalSorterBuilder::new(); |
| 46 | + if let Some(threads) = threads { |
| 47 | + sorter_builder = sorter_builder.with_threads_number(threads); |
| 48 | + } |
| 49 | + |
| 50 | + if let Some(tmp_dir) = tmp_dir { |
| 51 | + sorter_builder = sorter_builder.with_tmp_dir(path::Path::new(tmp_dir)); |
| 52 | + } |
| 53 | + |
| 54 | + sorter_builder = sorter_builder.with_buffer(MemoryLimitedBufferBuilder::new( |
| 55 | + chunk_size.parse::<ByteSize>().expect("value is pre-validated").as_u64(), |
| 56 | + )); |
| 57 | + |
| 58 | + let sorter: ExternalSorter<String, io::Error, _> = match sorter_builder.build() { |
| 59 | + Ok(sorter) => sorter, |
| 60 | + Err(err) => { |
| 61 | + log::error!("sorter initialization error: {}", err); |
| 62 | + process::exit(1); |
| 63 | + } |
| 64 | + }; |
| 65 | + |
| 66 | + let compare = |a: &String, b: &String| { |
| 67 | + if order == Order::Asc { |
| 68 | + a.cmp(&b) |
| 69 | + } else { |
| 70 | + a.cmp(&b).reverse() |
| 71 | + } |
| 72 | + }; |
| 73 | + |
| 74 | + let sorted_stream = match sorter.sort_by(input_stream.lines(), compare) { |
| 75 | + Ok(sorted_stream) => sorted_stream, |
| 76 | + Err(err) => { |
| 77 | + log::error!("data sorting error: {}", err); |
| 78 | + process::exit(1); |
| 79 | + } |
| 80 | + }; |
| 81 | + |
| 82 | + for line in sorted_stream { |
| 83 | + let line = match line { |
| 84 | + Ok(line) => line, |
| 85 | + Err(err) => { |
| 86 | + log::error!("sorting stream error: {}", err); |
| 87 | + process::exit(1); |
| 88 | + } |
| 89 | + }; |
| 90 | + if let Err(err) = output_stream.write_all(format!("{}\n", line).as_bytes()) { |
| 91 | + log::error!("data saving error: {}", err); |
| 92 | + process::exit(1); |
| 93 | + }; |
| 94 | + } |
| 95 | + |
| 96 | + if let Err(err) = output_stream.flush() { |
| 97 | + log::error!("data flushing error: {}", err); |
| 98 | + process::exit(1); |
| 99 | + } |
| 100 | +} |
| 101 | + |
| 102 | +#[derive(Copy, Clone, clap::ArgEnum)] |
| 103 | +enum LogLevel { |
| 104 | + Off, |
| 105 | + Error, |
| 106 | + Warn, |
| 107 | + Info, |
| 108 | + Debug, |
| 109 | + Trace, |
| 110 | +} |
| 111 | + |
| 112 | +impl LogLevel { |
| 113 | + pub fn possible_values() -> impl Iterator<Item = clap::PossibleValue<'static>> { |
| 114 | + Self::value_variants().iter().filter_map(|v| v.to_possible_value()) |
| 115 | + } |
| 116 | +} |
| 117 | + |
| 118 | +impl std::str::FromStr for LogLevel { |
| 119 | + type Err = String; |
| 120 | + |
| 121 | + fn from_str(s: &str) -> Result<Self, Self::Err> { |
| 122 | + <LogLevel as clap::ArgEnum>::from_str(s, false) |
| 123 | + } |
| 124 | +} |
| 125 | + |
| 126 | +#[derive(Copy, Clone, PartialEq, clap::ArgEnum)] |
| 127 | +enum Order { |
| 128 | + Asc, |
| 129 | + Desc, |
| 130 | +} |
| 131 | + |
| 132 | +impl Order { |
| 133 | + pub fn possible_values() -> impl Iterator<Item = clap::PossibleValue<'static>> { |
| 134 | + Order::value_variants().iter().filter_map(|v| v.to_possible_value()) |
| 135 | + } |
| 136 | +} |
| 137 | + |
| 138 | +impl std::str::FromStr for Order { |
| 139 | + type Err = String; |
| 140 | + |
| 141 | + fn from_str(s: &str) -> Result<Self, Self::Err> { |
| 142 | + <Order as clap::ArgEnum>::from_str(s, false) |
| 143 | + } |
| 144 | +} |
| 145 | + |
| 146 | +fn build_arg_parser() -> clap::ArgMatches { |
| 147 | + clap::App::new("ext-sort") |
| 148 | + .author("Dmitry P. <[email protected]>") |
| 149 | + .about("external sorter") |
| 150 | + .arg( |
| 151 | + clap::Arg::new("input") |
| 152 | + .short('i') |
| 153 | + .long("input") |
| 154 | + .help("file to be sorted") |
| 155 | + .required(true) |
| 156 | + .takes_value(true), |
| 157 | + ) |
| 158 | + .arg( |
| 159 | + clap::Arg::new("output") |
| 160 | + .short('o') |
| 161 | + .long("output") |
| 162 | + .help("result file") |
| 163 | + .required(true) |
| 164 | + .takes_value(true), |
| 165 | + ) |
| 166 | + .arg( |
| 167 | + clap::Arg::new("sort") |
| 168 | + .short('s') |
| 169 | + .long("sort") |
| 170 | + .help("sorting order") |
| 171 | + .takes_value(true) |
| 172 | + .default_value("asc") |
| 173 | + .possible_values(Order::possible_values()), |
| 174 | + ) |
| 175 | + .arg( |
| 176 | + clap::Arg::new("log_level") |
| 177 | + .short('l') |
| 178 | + .long("loglevel") |
| 179 | + .help("logging level") |
| 180 | + .takes_value(true) |
| 181 | + .default_value("info") |
| 182 | + .possible_values(LogLevel::possible_values()), |
| 183 | + ) |
| 184 | + .arg( |
| 185 | + clap::Arg::new("threads") |
| 186 | + .short('t') |
| 187 | + .long("threads") |
| 188 | + .help("number of threads to use for parallel sorting") |
| 189 | + .takes_value(true), |
| 190 | + ) |
| 191 | + .arg( |
| 192 | + clap::Arg::new("tmp_dir") |
| 193 | + .short('d') |
| 194 | + .long("tmp-dir") |
| 195 | + .help("directory to be used to store temporary data") |
| 196 | + .takes_value(true), |
| 197 | + ) |
| 198 | + .arg( |
| 199 | + clap::Arg::new("chunk_size") |
| 200 | + .short('c') |
| 201 | + .long("chunk-size") |
| 202 | + .help("chunk size") |
| 203 | + .required(true) |
| 204 | + .takes_value(true) |
| 205 | + .validator(|v| match v.parse::<ByteSize>() { |
| 206 | + Ok(_) => Ok(()), |
| 207 | + Err(err) => Err(format!("Chunk size format incorrect: {}", err)), |
| 208 | + }), |
| 209 | + ) |
| 210 | + .get_matches() |
| 211 | +} |
| 212 | + |
| 213 | +fn init_logger(log_level: LogLevel) { |
| 214 | + env_logger::Builder::new() |
| 215 | + .filter_level(match log_level { |
| 216 | + LogLevel::Off => log::LevelFilter::Off, |
| 217 | + LogLevel::Error => log::LevelFilter::Error, |
| 218 | + LogLevel::Warn => log::LevelFilter::Warn, |
| 219 | + LogLevel::Info => log::LevelFilter::Info, |
| 220 | + LogLevel::Debug => log::LevelFilter::Debug, |
| 221 | + LogLevel::Trace => log::LevelFilter::Trace, |
| 222 | + }) |
| 223 | + .format_timestamp_millis() |
| 224 | + .init(); |
| 225 | +} |
0 commit comments