Skip to content

Commit c640af8

Browse files
committed
support regex matching against specific keys
Signed-off-by: Jean Mertz <[email protected]>
1 parent bb787fd commit c640af8

File tree

2 files changed

+97
-43
lines changed

2 files changed

+97
-43
lines changed

aw-query/src/datatype.rs

Lines changed: 67 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::fmt;
44
use super::functions;
55
use super::QueryError;
66
use aw_models::Event;
7-
use aw_transform::classify::{RegexRule, Rule};
7+
use aw_transform::classify::{KeyValueRule, RegexRule, Rule};
88

99
use serde::{Serialize, Serializer};
1010
use serde_json::value::Value;
@@ -297,50 +297,74 @@ impl TryFrom<&DataType> for Rule {
297297
))
298298
}
299299
};
300-
if rtype == "none" {
301-
Ok(Self::None)
302-
} else if rtype == "regex" {
303-
let regex_val = match obj.get("regex") {
304-
Some(regex_val) => regex_val,
305-
None => {
306-
return Err(QueryError::InvalidFunctionParameters(
307-
"regex rule is missing the 'regex' field".to_string(),
308-
))
309-
}
310-
};
311-
let regex_str = match regex_val {
312-
DataType::String(s) => s,
313-
_ => {
314-
return Err(QueryError::InvalidFunctionParameters(
315-
"the regex field of the regex rule is not a string".to_string(),
316-
))
317-
}
318-
};
319-
let ignore_case_val = match obj.get("ignore_case") {
320-
Some(case_val) => case_val,
321-
None => &DataType::Bool(false),
322-
};
323-
let ignore_case = match ignore_case_val {
324-
DataType::Bool(b) => b,
325-
_ => {
300+
301+
match rtype.as_str() {
302+
"none" => Ok(Self::None),
303+
"regex" => parse_regex_rule(obj),
304+
"keyvalue" => {
305+
let Some(rules) = obj.get("rules") else {
326306
return Err(QueryError::InvalidFunctionParameters(
327-
"the ignore_case field of the regex rule is not a bool".to_string(),
328-
))
329-
}
330-
};
331-
let regex_rule = match RegexRule::new(regex_str, *ignore_case) {
332-
Ok(regex_rule) => regex_rule,
333-
Err(err) => {
334-
return Err(QueryError::RegexCompileError(format!(
335-
"Failed to compile regex string '{regex_str}': '{err:?}"
336-
)))
337-
}
338-
};
339-
Ok(Self::Regex(regex_rule))
340-
} else {
341-
Err(QueryError::InvalidFunctionParameters(format!(
307+
"keyval rule is missing the 'rules' field".to_string(),
308+
));
309+
};
310+
311+
let rules = match rules {
312+
DataType::Dict(rules) => rules
313+
.iter()
314+
.map(|(k, v)| Rule::try_from(v).map(|v| (k.to_owned(), v)))
315+
.collect::<Result<HashMap<_, _>, _>>()?,
316+
_ => {
317+
return Err(QueryError::InvalidFunctionParameters(
318+
"the rules field of the keyval rule is not a dict".to_string(),
319+
))
320+
}
321+
};
322+
323+
Ok(Rule::KeyValue(KeyValueRule::new(rules)))
324+
}
325+
_ => Err(QueryError::InvalidFunctionParameters(format!(
342326
"Unknown rule type '{rtype}'"
343-
)))
327+
))),
344328
}
345329
}
346330
}
331+
332+
fn parse_regex_rule(obj: &HashMap<String, DataType>) -> Result<Rule, QueryError> {
333+
let regex_val = match obj.get("regex") {
334+
Some(regex_val) => regex_val,
335+
None => {
336+
return Err(QueryError::InvalidFunctionParameters(
337+
"regex rule is missing the 'regex' field".to_string(),
338+
))
339+
}
340+
};
341+
let regex_str = match regex_val {
342+
DataType::String(s) => s,
343+
_ => {
344+
return Err(QueryError::InvalidFunctionParameters(
345+
"the regex field of the regex rule is not a string".to_string(),
346+
))
347+
}
348+
};
349+
let ignore_case_val = match obj.get("ignore_case") {
350+
Some(case_val) => case_val,
351+
None => &DataType::Bool(false),
352+
};
353+
let ignore_case = match ignore_case_val {
354+
DataType::Bool(b) => b,
355+
_ => {
356+
return Err(QueryError::InvalidFunctionParameters(
357+
"the ignore_case field of the regex rule is not a bool".to_string(),
358+
))
359+
}
360+
};
361+
let regex_rule = match RegexRule::new(regex_str, *ignore_case) {
362+
Ok(regex_rule) => regex_rule,
363+
Err(err) => {
364+
return Err(QueryError::RegexCompileError(format!(
365+
"Failed to compile regex string '{regex_str}': '{err:?}"
366+
)))
367+
}
368+
};
369+
Ok(Rule::Regex(regex_rule))
370+
}

aw-transform/src/classify.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::collections::HashMap;
2+
13
/// Transforms for classifying (tagging and categorizing) events.
24
///
35
/// Based on code in aw_research: https://github.com/ActivityWatch/aw-research/blob/master/aw_research/classify.py
@@ -7,13 +9,15 @@ use fancy_regex::Regex;
79
pub enum Rule {
810
None,
911
Regex(RegexRule),
12+
KeyValue(KeyValueRule),
1013
}
1114

1215
impl RuleTrait for Rule {
1316
fn matches(&self, event: &Event) -> bool {
1417
match self {
1518
Rule::None => false,
1619
Rule::Regex(rule) => rule.matches(event),
20+
Rule::KeyValue(rule) => rule.matches(event),
1721
}
1822
}
1923
}
@@ -62,6 +66,32 @@ impl From<Regex> for Rule {
6266
}
6367
}
6468

69+
pub struct KeyValueRule {
70+
rules: HashMap<String, Rule>,
71+
}
72+
73+
impl KeyValueRule {
74+
pub fn new(rules: HashMap<String, Rule>) -> Self {
75+
Self { rules }
76+
}
77+
}
78+
79+
impl RuleTrait for KeyValueRule {
80+
fn matches(&self, event: &Event) -> bool {
81+
self.rules.iter().all(|(key, rule)| {
82+
event
83+
.data
84+
.get(key)
85+
.filter(|_| {
86+
let mut ev = event.clone();
87+
ev.data.retain(|k, _| k == key);
88+
rule.matches(&ev)
89+
})
90+
.is_some()
91+
})
92+
}
93+
}
94+
6595
/// Categorizes a list of events
6696
///
6797
/// An event can only have one category, although the category may have a hierarchy,

0 commit comments

Comments
 (0)