Skip to content

Commit 2362cad

Browse files
authored
Merge pull request #6 from dapper91/dev
- ExternalChunkError Display trait buf fixed. - binary crate added. - sorting custom comparator feature implemented.
2 parents 41960de + ac54edf commit 2362cad

File tree

6 files changed

+371
-37
lines changed

6 files changed

+371
-37
lines changed

Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ext-sort"
3-
version = "0.1.1"
3+
version = "0.1.2"
44
edition = "2021"
55
license = "Unlicense"
66
description = "rust external sort algorithm implementation"
@@ -15,6 +15,7 @@ keywords = ["algorithms", "sort", "sorting", "external-sort", "external"]
1515

1616
[dependencies]
1717
bytesize = { version = "^1.1", optional = true }
18+
clap = { version = "^3.0", features = ["derive"], optional = true }
1819
deepsize = { version = "^0.2", optional = true }
1920
env_logger = { version = "^0.9", optional = true}
2021
log = "^0.4"
@@ -30,6 +31,10 @@ rand = "^0.8"
3031
[features]
3132
memory-limit = ["deepsize"]
3233

34+
[[bin]]
35+
name = "ext-sort"
36+
required-features = ["bytesize", "clap", "env_logger"]
37+
3338
[[example]]
3439
name = "quickstart"
3540
required-features = ["bytesize", "env_logger"]

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ Activate `memory-limit` feature of the ext-sort crate on Cargo.toml:
4646

4747
```toml
4848
[dependencies]
49-
ext-sort = { version = "^0.1.1", features = ["memory-limit"] }
49+
ext-sort = { version = "^0.1.2", features = ["memory-limit"] }
5050
```
5151

5252
``` rust

src/chunk.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ impl<S: Error> Error for ExternalChunkError<S> {}
2222

2323
impl<S: Error> Display for ExternalChunkError<S> {
2424
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25-
write!(f, "{}", self)
25+
match self {
26+
ExternalChunkError::IO(err) => write!(f, "{}", err),
27+
ExternalChunkError::SerializationError(err) => write!(f, "{}", err),
28+
}
2629
}
2730
}
2831

src/main.rs

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
use std::fs;
2+
use std::io::{self, prelude::*};
3+
use std::path;
4+
use std::process;
5+
6+
use bytesize::ByteSize;
7+
use clap::ArgEnum;
8+
use env_logger;
9+
use log;
10+
11+
use ext_sort::buffer::mem::MemoryLimitedBufferBuilder;
12+
use ext_sort::{ExternalSorter, ExternalSorterBuilder};
13+
14+
fn main() {
15+
let arg_parser = build_arg_parser();
16+
17+
let log_level: LogLevel = arg_parser.value_of_t_or_exit("log_level");
18+
init_logger(log_level);
19+
20+
let order: Order = arg_parser.value_of_t_or_exit("sort");
21+
let tmp_dir: Option<&str> = arg_parser.value_of("tmp_dir");
22+
let chunk_size = arg_parser.value_of("chunk_size").expect("value is required");
23+
let threads: Option<usize> = arg_parser
24+
.is_present("threads")
25+
.then(|| arg_parser.value_of_t_or_exit("threads"));
26+
27+
let input = arg_parser.value_of("input").expect("value is required");
28+
let input_stream = match fs::File::open(input) {
29+
Ok(file) => io::BufReader::new(file),
30+
Err(err) => {
31+
log::error!("input file opening error: {}", err);
32+
process::exit(1);
33+
}
34+
};
35+
36+
let output = arg_parser.value_of("output").expect("value is required");
37+
let mut output_stream = match fs::File::create(output) {
38+
Ok(file) => io::BufWriter::new(file),
39+
Err(err) => {
40+
log::error!("output file creation error: {}", err);
41+
process::exit(1);
42+
}
43+
};
44+
45+
let mut sorter_builder = ExternalSorterBuilder::new();
46+
if let Some(threads) = threads {
47+
sorter_builder = sorter_builder.with_threads_number(threads);
48+
}
49+
50+
if let Some(tmp_dir) = tmp_dir {
51+
sorter_builder = sorter_builder.with_tmp_dir(path::Path::new(tmp_dir));
52+
}
53+
54+
sorter_builder = sorter_builder.with_buffer(MemoryLimitedBufferBuilder::new(
55+
chunk_size.parse::<ByteSize>().expect("value is pre-validated").as_u64(),
56+
));
57+
58+
let sorter: ExternalSorter<String, io::Error, _> = match sorter_builder.build() {
59+
Ok(sorter) => sorter,
60+
Err(err) => {
61+
log::error!("sorter initialization error: {}", err);
62+
process::exit(1);
63+
}
64+
};
65+
66+
let compare = |a: &String, b: &String| {
67+
if order == Order::Asc {
68+
a.cmp(&b)
69+
} else {
70+
a.cmp(&b).reverse()
71+
}
72+
};
73+
74+
let sorted_stream = match sorter.sort_by(input_stream.lines(), compare) {
75+
Ok(sorted_stream) => sorted_stream,
76+
Err(err) => {
77+
log::error!("data sorting error: {}", err);
78+
process::exit(1);
79+
}
80+
};
81+
82+
for line in sorted_stream {
83+
let line = match line {
84+
Ok(line) => line,
85+
Err(err) => {
86+
log::error!("sorting stream error: {}", err);
87+
process::exit(1);
88+
}
89+
};
90+
if let Err(err) = output_stream.write_all(format!("{}\n", line).as_bytes()) {
91+
log::error!("data saving error: {}", err);
92+
process::exit(1);
93+
};
94+
}
95+
96+
if let Err(err) = output_stream.flush() {
97+
log::error!("data flushing error: {}", err);
98+
process::exit(1);
99+
}
100+
}
101+
102+
#[derive(Copy, Clone, clap::ArgEnum)]
103+
enum LogLevel {
104+
Off,
105+
Error,
106+
Warn,
107+
Info,
108+
Debug,
109+
Trace,
110+
}
111+
112+
impl LogLevel {
113+
pub fn possible_values() -> impl Iterator<Item = clap::PossibleValue<'static>> {
114+
Self::value_variants().iter().filter_map(|v| v.to_possible_value())
115+
}
116+
}
117+
118+
impl std::str::FromStr for LogLevel {
119+
type Err = String;
120+
121+
fn from_str(s: &str) -> Result<Self, Self::Err> {
122+
<LogLevel as clap::ArgEnum>::from_str(s, false)
123+
}
124+
}
125+
126+
#[derive(Copy, Clone, PartialEq, clap::ArgEnum)]
127+
enum Order {
128+
Asc,
129+
Desc,
130+
}
131+
132+
impl Order {
133+
pub fn possible_values() -> impl Iterator<Item = clap::PossibleValue<'static>> {
134+
Order::value_variants().iter().filter_map(|v| v.to_possible_value())
135+
}
136+
}
137+
138+
impl std::str::FromStr for Order {
139+
type Err = String;
140+
141+
fn from_str(s: &str) -> Result<Self, Self::Err> {
142+
<Order as clap::ArgEnum>::from_str(s, false)
143+
}
144+
}
145+
146+
fn build_arg_parser() -> clap::ArgMatches {
147+
clap::App::new("ext-sort")
148+
.author("Dmitry P. <[email protected]>")
149+
.about("external sorter")
150+
.arg(
151+
clap::Arg::new("input")
152+
.short('i')
153+
.long("input")
154+
.help("file to be sorted")
155+
.required(true)
156+
.takes_value(true),
157+
)
158+
.arg(
159+
clap::Arg::new("output")
160+
.short('o')
161+
.long("output")
162+
.help("result file")
163+
.required(true)
164+
.takes_value(true),
165+
)
166+
.arg(
167+
clap::Arg::new("sort")
168+
.short('s')
169+
.long("sort")
170+
.help("sorting order")
171+
.takes_value(true)
172+
.default_value("asc")
173+
.possible_values(Order::possible_values()),
174+
)
175+
.arg(
176+
clap::Arg::new("log_level")
177+
.short('l')
178+
.long("loglevel")
179+
.help("logging level")
180+
.takes_value(true)
181+
.default_value("info")
182+
.possible_values(LogLevel::possible_values()),
183+
)
184+
.arg(
185+
clap::Arg::new("threads")
186+
.short('t')
187+
.long("threads")
188+
.help("number of threads to use for parallel sorting")
189+
.takes_value(true),
190+
)
191+
.arg(
192+
clap::Arg::new("tmp_dir")
193+
.short('d')
194+
.long("tmp-dir")
195+
.help("directory to be used to store temporary data")
196+
.takes_value(true),
197+
)
198+
.arg(
199+
clap::Arg::new("chunk_size")
200+
.short('c')
201+
.long("chunk-size")
202+
.help("chunk size")
203+
.required(true)
204+
.takes_value(true)
205+
.validator(|v| match v.parse::<ByteSize>() {
206+
Ok(_) => Ok(()),
207+
Err(err) => Err(format!("Chunk size format incorrect: {}", err)),
208+
}),
209+
)
210+
.get_matches()
211+
}
212+
213+
fn init_logger(log_level: LogLevel) {
214+
env_logger::Builder::new()
215+
.filter_level(match log_level {
216+
LogLevel::Off => log::LevelFilter::Off,
217+
LogLevel::Error => log::LevelFilter::Error,
218+
LogLevel::Warn => log::LevelFilter::Warn,
219+
LogLevel::Info => log::LevelFilter::Info,
220+
LogLevel::Debug => log::LevelFilter::Debug,
221+
LogLevel::Trace => log::LevelFilter::Trace,
222+
})
223+
.format_timestamp_millis()
224+
.init();
225+
}

0 commit comments

Comments
 (0)