Skip to content

Commit 80759a4

Browse files
committed
support regex match
1 parent e5991f3 commit 80759a4

File tree

4 files changed

+102
-2
lines changed

4 files changed

+102
-2
lines changed

src/ast/operator.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ pub enum BinaryOperator {
8080
PGBitwiseXor,
8181
PGBitwiseShiftLeft,
8282
PGBitwiseShiftRight,
83+
PGRegexMatch,
84+
PGRegexIMatch,
85+
PGRegexNotMatch,
86+
PGRegexNotIMatch,
8387
}
8488

8589
impl fmt::Display for BinaryOperator {
@@ -110,6 +114,10 @@ impl fmt::Display for BinaryOperator {
110114
BinaryOperator::PGBitwiseXor => "#",
111115
BinaryOperator::PGBitwiseShiftLeft => "<<",
112116
BinaryOperator::PGBitwiseShiftRight => ">>",
117+
BinaryOperator::PGRegexMatch => "~",
118+
BinaryOperator::PGRegexIMatch => "~*",
119+
BinaryOperator::PGRegexNotMatch => "!~",
120+
BinaryOperator::PGRegexNotIMatch => "!~*",
113121
})
114122
}
115123
}

src/parser.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,10 @@ impl<'a> Parser<'a> {
835835
Token::Sharp if dialect_of!(self is PostgreSqlDialect) => {
836836
Some(BinaryOperator::PGBitwiseXor)
837837
}
838+
Token::Tilde => Some(BinaryOperator::PGRegexMatch),
839+
Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch),
840+
Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch),
841+
Token::ExclamationMarkTildeAsterisk => Some(BinaryOperator::PGRegexNotIMatch),
838842
Token::Word(w) => match w.keyword {
839843
Keyword::AND => Some(BinaryOperator::And),
840844
Keyword::OR => Some(BinaryOperator::Or),
@@ -993,6 +997,10 @@ impl<'a> Parser<'a> {
993997
| Token::Gt
994998
| Token::GtEq
995999
| Token::DoubleEq
1000+
| Token::Tilde
1001+
| Token::TildeAsterisk
1002+
| Token::ExclamationMarkTilde
1003+
| Token::ExclamationMarkTildeAsterisk
9961004
| Token::Spaceship => Ok(20),
9971005
Token::Pipe => Ok(21),
9981006
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),

src/tokenizer.rs

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,14 @@ pub enum Token {
108108
RArrow,
109109
/// Sharp `#` used for PostgreSQL Bitwise XOR operator
110110
Sharp,
111-
/// Tilde `~` used for PostgreSQL Bitwise NOT operator
111+
/// Tilde `~` used for PostgreSQL Bitwise NOT operator or case sensitive match regular operator
112112
Tilde,
113+
/// `~*` , a case insensitive match regular operator in PostgreSQL
114+
TildeAsterisk,
115+
/// `!~` , a case sensitive not match regular operator in PostgreSQL
116+
ExclamationMarkTilde,
117+
/// `!~*` , a case insensitive not match regular operator in PostgreSQL
118+
ExclamationMarkTildeAsterisk,
113119
/// `<<`, a bitwise shift left operator in PostgreSQL
114120
ShiftLeft,
115121
/// `>>`, a bitwise shift right operator in PostgreSQL
@@ -171,6 +177,9 @@ impl fmt::Display for Token {
171177
Token::ExclamationMark => f.write_str("!"),
172178
Token::DoubleExclamationMark => f.write_str("!!"),
173179
Token::Tilde => f.write_str("~"),
180+
Token::TildeAsterisk => f.write_str("~*"),
181+
Token::ExclamationMarkTilde => f.write_str("!~"),
182+
Token::ExclamationMarkTildeAsterisk => f.write_str("!~*"),
174183
Token::AtSign => f.write_str("@"),
175184
Token::ShiftLeft => f.write_str("<<"),
176185
Token::ShiftRight => f.write_str(">>"),
@@ -486,6 +495,14 @@ impl<'a> Tokenizer<'a> {
486495
match chars.peek() {
487496
Some('=') => self.consume_and_return(chars, Token::Neq),
488497
Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
498+
Some('~') => {
499+
chars.next();
500+
match chars.peek() {
501+
Some('*') => self
502+
.consume_and_return(chars, Token::ExclamationMarkTildeAsterisk),
503+
_ => Ok(Some(Token::ExclamationMarkTilde)),
504+
}
505+
}
489506
_ => Ok(Some(Token::ExclamationMark)),
490507
}
491508
}
@@ -535,7 +552,13 @@ impl<'a> Tokenizer<'a> {
535552
comment,
536553
})))
537554
}
538-
'~' => self.consume_and_return(chars, Token::Tilde),
555+
'~' => {
556+
chars.next(); // consume
557+
match chars.peek() {
558+
Some('*') => self.consume_and_return(chars, Token::TildeAsterisk),
559+
_ => Ok(Some(Token::Tilde)),
560+
}
561+
}
539562
'#' => self.consume_and_return(chars, Token::Sharp),
540563
'@' => self.consume_and_return(chars, Token::AtSign),
541564
other => self.consume_and_return(chars, Token::Char(other)),
@@ -1111,6 +1134,45 @@ mod tests {
11111134
compare(expected, tokens);
11121135
}
11131136

1137+
#[test]
1138+
fn tokenize_pg_regex_match() {
1139+
let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'";
1140+
let dialect = GenericDialect {};
1141+
let mut tokenizer = Tokenizer::new(&dialect, sql);
1142+
let tokens = tokenizer.tokenize().unwrap();
1143+
let expected = vec![
1144+
Token::make_keyword("SELECT"),
1145+
Token::Whitespace(Whitespace::Space),
1146+
Token::make_word("col", None),
1147+
Token::Whitespace(Whitespace::Space),
1148+
Token::Tilde,
1149+
Token::Whitespace(Whitespace::Space),
1150+
Token::SingleQuotedString("^a".into()),
1151+
Token::Comma,
1152+
Token::Whitespace(Whitespace::Space),
1153+
Token::make_word("col", None),
1154+
Token::Whitespace(Whitespace::Space),
1155+
Token::TildeAsterisk,
1156+
Token::Whitespace(Whitespace::Space),
1157+
Token::SingleQuotedString("^a".into()),
1158+
Token::Comma,
1159+
Token::Whitespace(Whitespace::Space),
1160+
Token::make_word("col", None),
1161+
Token::Whitespace(Whitespace::Space),
1162+
Token::ExclamationMarkTilde,
1163+
Token::Whitespace(Whitespace::Space),
1164+
Token::SingleQuotedString("^a".into()),
1165+
Token::Comma,
1166+
Token::Whitespace(Whitespace::Space),
1167+
Token::make_word("col", None),
1168+
Token::Whitespace(Whitespace::Space),
1169+
Token::ExclamationMarkTildeAsterisk,
1170+
Token::Whitespace(Whitespace::Space),
1171+
Token::SingleQuotedString("^a".into()),
1172+
];
1173+
compare(expected, tokens);
1174+
}
1175+
11141176
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
11151177
//println!("------------------------------");
11161178
//println!("tokens = {:?}", actual);

tests/sqlparser_postgres.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,28 @@ fn parse_pg_postfix_factorial() {
647647
}
648648
}
649649

650+
#[test]
651+
fn parse_pg_regex_match_ops() {
652+
let pg_regex_match_ops = &[
653+
("~", BinaryOperator::PGRegexMatch),
654+
("~*", BinaryOperator::PGRegexIMatch),
655+
("!~", BinaryOperator::PGRegexNotMatch),
656+
("!~*", BinaryOperator::PGRegexNotIMatch),
657+
];
658+
659+
for (str_op, op) in pg_regex_match_ops {
660+
let select = pg().verified_only_select(&format!("SELECT 'abc' {} '^a'", &str_op));
661+
assert_eq!(
662+
SelectItem::UnnamedExpr(Expr::BinaryOp {
663+
left: Box::new(Expr::Value(Value::SingleQuotedString("abc".into()))),
664+
op: op.clone(),
665+
right: Box::new(Expr::Value(Value::SingleQuotedString("^a".into()))),
666+
}),
667+
select.projection[0]
668+
);
669+
}
670+
}
671+
650672
fn pg() -> TestedDialects {
651673
TestedDialects {
652674
dialects: vec![Box::new(PostgreSqlDialect {})],

0 commit comments

Comments
 (0)