Skip to content

Commit d836d7e

Browse files
authored
Merge pull request #37 from snipsco/release/0.7.1
Release/0.7.1
2 parents a1e79b7 + 3c0e564 commit d836d7e

File tree

5 files changed

+201
-60
lines changed

5 files changed

+201
-60
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# Changelog
22
All notable changes to this project will be documented in this file.
33

4+
## [0.7.1]
5+
### Added
6+
- Add a license file to the gazetteer entity parser [#36](https://github.com/snipsco/gazetteer-entity-parser/pull/36)
7+
48
## [0.7.0] - 2019-04-16
59
### Added
610
- Add API to prepend entity values [#31](https://github.com/snipsco/gazetteer-entity-parser/pull/31)
@@ -19,6 +23,7 @@ All notable changes to this project will be documented in this file.
1923
### Changed
2024
- Clearer `ParserBuilder`'s API
2125

26+
[0.7.1]: https://github.com/snipsco/gazetteer-entity-parser/compare/0.7.0...0.7.1
2227
[0.7.0]: https://github.com/snipsco/gazetteer-entity-parser/compare/0.6.0...0.7.0
2328
[0.6.0]: https://github.com/snipsco/gazetteer-entity-parser/compare/0.5.1...0.6.0
2429
[0.5.1]: https://github.com/snipsco/gazetteer-entity-parser/compare/0.5.0...0.5.1

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "gazetteer-entity-parser"
3-
version = "0.7.0"
3+
version = "0.7.1"
44
authors = ["Alaa Saade <[email protected]>"]
55

66
[profile.bench]

examples/interactive_parsing_cli.rs

Lines changed: 84 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,93 @@
11
extern crate clap;
2-
extern crate serde_json;
32
extern crate gazetteer_entity_parser;
3+
extern crate serde_json;
44

5-
use clap::{Arg, App};
6-
use std::io;
75
use std::io::Write;
8-
use gazetteer_entity_parser::Parser;
6+
use std::{fs, io};
7+
8+
use clap::{App, Arg};
9+
10+
use gazetteer_entity_parser::{Gazetteer, Parser, ParserBuilder};
911

1012
fn main() {
11-
let matches = App::new("gazetteer-entity-parser-demo")
13+
let mut app = App::new("gazetteer-entity-parser-demo")
1214
.about("Interactive CLI for parsing gazetteer entities")
13-
.arg(Arg::with_name("PARSER_DIR")
14-
.required(true)
15-
.takes_value(true)
16-
.index(1)
17-
.help("path to the parser directory"))
18-
.get_matches();
19-
20-
let parser_dir = matches.value_of("PARSER_DIR").unwrap();
21-
println!("\nLoading the parser...");
22-
let parser = Parser::from_folder(parser_dir).unwrap();
23-
loop {
24-
print!("> ");
25-
io::stdout().flush().unwrap();
26-
let mut query = String::new();
27-
io::stdin().read_line(&mut query).unwrap();
28-
let result = parser.run(query.trim()).unwrap();
29-
println!("{:?}", result);
15+
.arg(
16+
Arg::with_name("parser")
17+
.short("p")
18+
.long("--parser")
19+
.takes_value(true)
20+
.help("path to the parser directory"),
21+
)
22+
.arg(
23+
Arg::with_name("gazetteer")
24+
.short("g")
25+
.long("--gazetteer")
26+
.takes_value(true)
27+
.help("path to the json gazetteer file"),
28+
)
29+
.arg(
30+
Arg::with_name("opt_nb_stop_words")
31+
.short("n")
32+
.long("--nb-stop-words")
33+
.takes_value(true)
34+
.help("number of stop words to use"),
35+
)
36+
.arg(
37+
Arg::with_name("opt_tokens_ratio")
38+
.short("r")
39+
.long("--ratio")
40+
.takes_value(true)
41+
.help("minimum tokens ratio for the parser"),
42+
);
43+
let matches = app.clone().get_matches();
44+
45+
let opt_nb_stop_words = matches
46+
.value_of("opt_nb_stop_words")
47+
.map(|nb_str| nb_str.to_string().parse::<usize>().unwrap());
48+
49+
let opt_tokens_ratio = matches
50+
.value_of("opt_tokens_ratio")
51+
.map(|ratio_str| ratio_str.to_string().parse::<f32>().unwrap());
52+
53+
if let Some(parser) = matches
54+
.value_of("parser")
55+
.map(|parser_dir| {
56+
println!("\nLoading the parser...");
57+
let mut parser = Parser::from_folder(parser_dir).unwrap();
58+
if let Some(ratio) = opt_tokens_ratio {
59+
parser.set_threshold(ratio);
60+
};
61+
if let Some(nb_stop_words) = opt_nb_stop_words {
62+
parser.set_stop_words(nb_stop_words, None);
63+
};
64+
parser
65+
})
66+
.or_else(|| {
67+
matches.value_of("gazetteer").map(|gazetteer_path| {
68+
println!("\nLoading the gazetteer...");
69+
let gazetteer_file = fs::File::open(&gazetteer_path).unwrap();
70+
let gazetteer: Gazetteer = serde_json::from_reader(gazetteer_file).unwrap();
71+
72+
println!("\nBuilding the parser...");
73+
ParserBuilder::default()
74+
.gazetteer(gazetteer)
75+
.n_stop_words(opt_nb_stop_words.unwrap_or(0))
76+
.minimum_tokens_ratio(opt_tokens_ratio.unwrap_or(1.0))
77+
.build()
78+
.unwrap()
79+
})
80+
})
81+
{
82+
loop {
83+
print!("> ");
84+
io::stdout().flush().unwrap();
85+
let mut query = String::new();
86+
io::stdin().read_line(&mut query).unwrap();
87+
let result = parser.run(query.trim()).unwrap();
88+
println!("{:?}", result);
89+
}
90+
} else {
91+
app.print_long_help().unwrap();
3092
}
3193
}

src/parser.rs

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ pub struct Parser {
4343
injected_values: HashSet<String>,
4444
// Parsing threshold giving minimal fraction of tokens necessary to parse a value
4545
threshold: f32,
46+
// License information associated to the parser's data
47+
#[serde(default)]
48+
license_info: Option<LicenseInfo>,
49+
}
50+
51+
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
52+
pub struct LicenseInfo {
53+
pub filename: String,
54+
pub content: String,
4655
}
4756

4857
#[derive(Serialize, Deserialize)]
@@ -239,6 +248,11 @@ impl Parser {
239248
.collect();
240249
}
241250

251+
/// Set the license info
252+
pub fn set_license_info<T: Into<Option<LicenseInfo>>>(&mut self, license_info: T) {
253+
self.license_info = license_info.into();
254+
}
255+
242256
/// Get the set of stop words
243257
pub fn get_stop_words(&self) -> HashSet<String> {
244258
self.stop_words
@@ -370,6 +384,13 @@ impl Parser {
370384

371385
self.serialize(&mut Serializer::new(&mut writer))
372386
.with_context(|_| format_err!("Error when serializing the parser"))?;
387+
388+
if let Some(license_info) = &self.license_info {
389+
let license_path = folder_name.as_ref().join(&license_info.filename);
390+
fs::write(license_path, &license_info.content)
391+
.with_context(|_| format_err!("Error when writing the license"))?
392+
}
393+
373394
Ok(())
374395
}
375396

@@ -813,6 +834,16 @@ mod tests {
813834
use parser_builder::ParserBuilder;
814835
use std::time::Instant;
815836

837+
fn get_license_info() -> LicenseInfo {
838+
let license_content = "Some content here".to_string();
839+
let license_filename = "LICENSE".to_string();
840+
let license_info = LicenseInfo {
841+
filename: license_filename,
842+
content: license_content,
843+
};
844+
license_info
845+
}
846+
816847
#[test]
817848
fn test_serialization_deserialization() {
818849
let tdir = tempdir().unwrap();
@@ -829,14 +860,28 @@ mod tests {
829860
resolved_value: "The Rolling Stones".to_string(),
830861
raw_value: "the stones".to_string(),
831862
});
863+
864+
let license_info = get_license_info();
865+
832866
let parser = ParserBuilder::default()
833867
.minimum_tokens_ratio(0.5)
834868
.gazetteer(gazetteer)
835869
.n_stop_words(2)
836870
.additional_stop_words(vec!["hello".to_string()])
871+
.license_info(license_info)
837872
.build()
838873
.unwrap();
839-
parser.dump(tdir.as_ref().join("parser")).unwrap();
874+
875+
let serialized_parser_path = tdir.as_ref().join("parser");
876+
let license_path = serialized_parser_path.join("LICENSE");
877+
parser.dump(serialized_parser_path).unwrap();
878+
879+
assert!(license_path.exists());
880+
881+
let expected_content = "Some content here".to_string();
882+
let content = fs::read_to_string(license_path).unwrap();
883+
assert_eq!(content, expected_content);
884+
840885
let reloaded_parser = Parser::from_folder(tdir.as_ref().join("parser")).unwrap();
841886

842887
assert_eq!(parser, reloaded_parser);

0 commit comments

Comments
 (0)