Skip to content

Commit 783bc21

Browse files
authored
Merge pull request apache#328 from b41sh/regexp_match
Add support for PostgreSQL regex match
2 parents e548d38 + d312837 commit 783bc21

File tree

4 files changed

+102
-2
lines changed

4 files changed

+102
-2
lines changed

src/ast/operator.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ pub enum BinaryOperator {
8181
PGBitwiseXor,
8282
PGBitwiseShiftLeft,
8383
PGBitwiseShiftRight,
84+
PGRegexMatch,
85+
PGRegexIMatch,
86+
PGRegexNotMatch,
87+
PGRegexNotIMatch,
8488
}
8589

8690
impl fmt::Display for BinaryOperator {
@@ -111,6 +115,10 @@ impl fmt::Display for BinaryOperator {
111115
BinaryOperator::PGBitwiseXor => "#",
112116
BinaryOperator::PGBitwiseShiftLeft => "<<",
113117
BinaryOperator::PGBitwiseShiftRight => ">>",
118+
BinaryOperator::PGRegexMatch => "~",
119+
BinaryOperator::PGRegexIMatch => "~*",
120+
BinaryOperator::PGRegexNotMatch => "!~",
121+
BinaryOperator::PGRegexNotIMatch => "!~*",
114122
})
115123
}
116124
}

src/parser.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,10 @@ impl<'a> Parser<'a> {
862862
Token::Sharp if dialect_of!(self is PostgreSqlDialect) => {
863863
Some(BinaryOperator::PGBitwiseXor)
864864
}
865+
Token::Tilde => Some(BinaryOperator::PGRegexMatch),
866+
Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch),
867+
Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch),
868+
Token::ExclamationMarkTildeAsterisk => Some(BinaryOperator::PGRegexNotIMatch),
865869
Token::Word(w) => match w.keyword {
866870
Keyword::AND => Some(BinaryOperator::And),
867871
Keyword::OR => Some(BinaryOperator::Or),
@@ -1020,6 +1024,10 @@ impl<'a> Parser<'a> {
10201024
| Token::Gt
10211025
| Token::GtEq
10221026
| Token::DoubleEq
1027+
| Token::Tilde
1028+
| Token::TildeAsterisk
1029+
| Token::ExclamationMarkTilde
1030+
| Token::ExclamationMarkTildeAsterisk
10231031
| Token::Spaceship => Ok(20),
10241032
Token::Pipe => Ok(21),
10251033
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),

src/tokenizer.rs

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,14 @@ pub enum Token {
109109
RArrow,
110110
/// Sharp `#` used for PostgreSQL Bitwise XOR operator
111111
Sharp,
112-
/// Tilde `~` used for PostgreSQL Bitwise NOT operator
112+
/// Tilde `~` used for PostgreSQL Bitwise NOT operator or case sensitive match regular expression operator
113113
Tilde,
114+
/// `~*` , a case insensitive match regular expression operator in PostgreSQL
115+
TildeAsterisk,
116+
/// `!~` , a case sensitive not match regular expression operator in PostgreSQL
117+
ExclamationMarkTilde,
118+
/// `!~*` , a case insensitive not match regular expression operator in PostgreSQL
119+
ExclamationMarkTildeAsterisk,
114120
/// `<<`, a bitwise shift left operator in PostgreSQL
115121
ShiftLeft,
116122
/// `>>`, a bitwise shift right operator in PostgreSQL
@@ -172,6 +178,9 @@ impl fmt::Display for Token {
172178
Token::ExclamationMark => f.write_str("!"),
173179
Token::DoubleExclamationMark => f.write_str("!!"),
174180
Token::Tilde => f.write_str("~"),
181+
Token::TildeAsterisk => f.write_str("~*"),
182+
Token::ExclamationMarkTilde => f.write_str("!~"),
183+
Token::ExclamationMarkTildeAsterisk => f.write_str("!~*"),
175184
Token::AtSign => f.write_str("@"),
176185
Token::ShiftLeft => f.write_str("<<"),
177186
Token::ShiftRight => f.write_str(">>"),
@@ -489,6 +498,14 @@ impl<'a> Tokenizer<'a> {
489498
match chars.peek() {
490499
Some('=') => self.consume_and_return(chars, Token::Neq),
491500
Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
501+
Some('~') => {
502+
chars.next();
503+
match chars.peek() {
504+
Some('*') => self
505+
.consume_and_return(chars, Token::ExclamationMarkTildeAsterisk),
506+
_ => Ok(Some(Token::ExclamationMarkTilde)),
507+
}
508+
}
492509
_ => Ok(Some(Token::ExclamationMark)),
493510
}
494511
}
@@ -538,7 +555,13 @@ impl<'a> Tokenizer<'a> {
538555
comment,
539556
})))
540557
}
541-
'~' => self.consume_and_return(chars, Token::Tilde),
558+
'~' => {
559+
chars.next(); // consume
560+
match chars.peek() {
561+
Some('*') => self.consume_and_return(chars, Token::TildeAsterisk),
562+
_ => Ok(Some(Token::Tilde)),
563+
}
564+
}
542565
'#' => self.consume_and_return(chars, Token::Sharp),
543566
'@' => self.consume_and_return(chars, Token::AtSign),
544567
other => self.consume_and_return(chars, Token::Char(other)),
@@ -1114,6 +1137,45 @@ mod tests {
11141137
compare(expected, tokens);
11151138
}
11161139

1140+
#[test]
1141+
fn tokenize_pg_regex_match() {
1142+
let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'";
1143+
let dialect = GenericDialect {};
1144+
let mut tokenizer = Tokenizer::new(&dialect, sql);
1145+
let tokens = tokenizer.tokenize().unwrap();
1146+
let expected = vec![
1147+
Token::make_keyword("SELECT"),
1148+
Token::Whitespace(Whitespace::Space),
1149+
Token::make_word("col", None),
1150+
Token::Whitespace(Whitespace::Space),
1151+
Token::Tilde,
1152+
Token::Whitespace(Whitespace::Space),
1153+
Token::SingleQuotedString("^a".into()),
1154+
Token::Comma,
1155+
Token::Whitespace(Whitespace::Space),
1156+
Token::make_word("col", None),
1157+
Token::Whitespace(Whitespace::Space),
1158+
Token::TildeAsterisk,
1159+
Token::Whitespace(Whitespace::Space),
1160+
Token::SingleQuotedString("^a".into()),
1161+
Token::Comma,
1162+
Token::Whitespace(Whitespace::Space),
1163+
Token::make_word("col", None),
1164+
Token::Whitespace(Whitespace::Space),
1165+
Token::ExclamationMarkTilde,
1166+
Token::Whitespace(Whitespace::Space),
1167+
Token::SingleQuotedString("^a".into()),
1168+
Token::Comma,
1169+
Token::Whitespace(Whitespace::Space),
1170+
Token::make_word("col", None),
1171+
Token::Whitespace(Whitespace::Space),
1172+
Token::ExclamationMarkTildeAsterisk,
1173+
Token::Whitespace(Whitespace::Space),
1174+
Token::SingleQuotedString("^a".into()),
1175+
];
1176+
compare(expected, tokens);
1177+
}
1178+
11171179
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
11181180
//println!("------------------------------");
11191181
//println!("tokens = {:?}", actual);

tests/sqlparser_postgres.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,28 @@ fn parse_pg_postfix_factorial() {
647647
}
648648
}
649649

650+
#[test]
651+
fn parse_pg_regex_match_ops() {
652+
let pg_regex_match_ops = &[
653+
("~", BinaryOperator::PGRegexMatch),
654+
("~*", BinaryOperator::PGRegexIMatch),
655+
("!~", BinaryOperator::PGRegexNotMatch),
656+
("!~*", BinaryOperator::PGRegexNotIMatch),
657+
];
658+
659+
for (str_op, op) in pg_regex_match_ops {
660+
let select = pg().verified_only_select(&format!("SELECT 'abc' {} '^a'", &str_op));
661+
assert_eq!(
662+
SelectItem::UnnamedExpr(Expr::BinaryOp {
663+
left: Box::new(Expr::Value(Value::SingleQuotedString("abc".into()))),
664+
op: op.clone(),
665+
right: Box::new(Expr::Value(Value::SingleQuotedString("^a".into()))),
666+
}),
667+
select.projection[0]
668+
);
669+
}
670+
}
671+
650672
fn pg() -> TestedDialects {
651673
TestedDialects {
652674
dialects: vec![Box::new(PostgreSqlDialect {})],

0 commit comments

Comments
 (0)