Skip to content

Commit 70a6833

Browse files
committed
Add RE2 regex backend
1 parent 7b31d4c commit 70a6833

File tree

4 files changed

+178
-2
lines changed

4 files changed

+178
-2
lines changed

Diff for: src/Makefile.am

+2-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,8 @@ UTILS = \
255255

256256
REGEX = \
257257
regex/regex.cc \
258-
regex/backend/pcre.cc
258+
regex/backend/pcre.cc \
259+
regex/backend/re2.cc
259260

260261

261262
COLLECTION = \

Diff for: src/regex/backend/re2.cc

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* ModSecurity, http://www.modsecurity.org/
3+
* Copyright (c) 2019
4+
*
5+
* You may not use this file except in compliance with
6+
* the License. You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* If any of the files related to licensing are missing or if you have any
11+
* other questions related to licensing please contact Trustwave Holdings, Inc.
12+
* directly using the email address [email protected].
13+
*
14+
*/
15+
#include <iostream>
16+
#include <fstream>
17+
#include <string>
18+
#include <list>
19+
20+
#include "src/regex/backend/re2.h"
21+
#include "src/regex/regex_match.h"
22+
23+
namespace modsecurity {
24+
namespace regex {
25+
namespace backend {
26+
27+
#ifdef WITH_RE2
28+
29+
static RE2::Options get_re2_options() {
30+
RE2::Options res;
31+
32+
res.set_dot_nl(true);
33+
34+
return res;
35+
}
36+
37+
38+
Re2::Re2(const std::string& pattern_)
39+
: pattern(pattern_.empty() ? ".*" : pattern_),
40+
re(pattern, get_re2_options())
41+
{
42+
}
43+
44+
std::list<RegexMatch> Re2::searchAll(const std::string& s) const {
45+
std::list<RegexMatch> retList;
46+
47+
re2::StringPiece subject(s);
48+
49+
size_t offset = 0;
50+
while (offset <= s.size()) {
51+
int ngroups = re.NumberOfCapturingGroups() + 1;
52+
re2::StringPiece submatches[ngroups];
53+
54+
if (!re.Match(subject, offset, s.size(), RE2::UNANCHORED,
55+
&submatches[0], ngroups)) {
56+
break;
57+
}
58+
59+
for (int i = 0; i < ngroups; i++) {
60+
// N.B. StringPiece::as_string returns value, not reference
61+
auto match_string = submatches[i].as_string();
62+
auto start = &submatches[i][0] - &subject[0];
63+
retList.push_front(RegexMatch(std::move(match_string), start));
64+
}
65+
66+
offset = (&submatches[0][0] - &subject[0]) + submatches[0].length();
67+
if (submatches[0].size() == 0) {
68+
offset++;
69+
}
70+
}
71+
72+
return retList;
73+
}
74+
75+
bool Re2::searchOneMatch(const std::string& s, std::vector<RegexMatchCapture>& captures) const {
76+
re2::StringPiece subject(s);
77+
int ngroups = re.NumberOfCapturingGroups() + 1;
78+
re2::StringPiece submatches[ngroups];
79+
80+
if (re.Match(subject, 0, s.size(), RE2::UNANCHORED, &submatches[0], ngroups)) {
81+
for (int i = 0; i < ngroups; i++) {
82+
auto len = submatches[i].length();
83+
auto start = len != 0 ? &submatches[i][0] - &subject[0] : 0;
84+
captures.push_back(RegexMatchCapture(i, start, len));
85+
}
86+
return true;
87+
} else {
88+
return false;
89+
}
90+
}
91+
92+
int Re2::search(const std::string& s, RegexMatch *match) const {
93+
re2::StringPiece subject(s);
94+
re2::StringPiece submatches[1];
95+
if (re.Match(subject, 0, s.size(), RE2::UNANCHORED, &submatches[0], 1)) {
96+
// N.B. StringPiece::as_string returns value, not reference
97+
auto match_string = submatches[0].as_string();
98+
auto start = &submatches[0][0] - &subject[0];
99+
*match = RegexMatch(std::move(match_string), start);
100+
return 1;
101+
} else {
102+
return 0;
103+
}
104+
}
105+
106+
int Re2::search(const std::string& s) const {
107+
re2::StringPiece subject(s);
108+
return re.Match(subject, 0, s.size(), RE2::UNANCHORED, NULL, 0);
109+
}
110+
#endif
111+
112+
} // namespace backend
113+
} // namespace regex
114+
} // namespace modsecurity
115+

Diff for: src/regex/backend/re2.h

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* ModSecurity, http://www.modsecurity.org/
3+
* Copyright (c) 2019
4+
*
5+
* You may not use this file except in compliance with
6+
* the License. You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* If any of the files related to licensing are missing or if you have any
11+
* other questions related to licensing please contact Trustwave Holdings, Inc.
12+
* directly using the email address [email protected].
13+
*
14+
*/
15+
16+
#ifdef WITH_RE2
17+
#include <re2/re2.h>
18+
#endif
19+
20+
#include <string>
21+
#include <vector>
22+
#include <list>
23+
24+
#include "src/regex/regex_match.h"
25+
26+
#ifndef SRC_REGEX_BACKEND_RE2_H_
27+
#define SRC_REGEX_BACKEND_RE2_H_
28+
29+
namespace modsecurity {
30+
namespace regex {
31+
namespace backend {
32+
33+
#ifdef WITH_RE2
34+
35+
class Re2 {
36+
public:
37+
explicit Re2(const std::string& pattern_);
38+
39+
// RE2 class is not copyable, so neither is this
40+
Re2(const Re2&) = delete;
41+
Re2& operator=(const Re2&) = delete;
42+
43+
std::list<RegexMatch> searchAll(const std::string& s) const;
44+
bool searchOneMatch(const std::string& s, std::vector<RegexMatchCapture>& captures) const;
45+
int search(const std::string &s, RegexMatch *m) const;
46+
int search(const std::string &s) const;
47+
48+
const std::string pattern;
49+
private:
50+
const RE2 re;
51+
};
52+
53+
#endif
54+
55+
} // namespace backend
56+
} // namespace regex
57+
} // namespace modsecurity
58+
59+
#endif // SRC_REGEX_BACKEND_PCRE_H_

Diff for: src/regex/regex.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <vector>
2323

2424
#include "src/regex/backend/pcre.h"
25+
#include "src/regex/backend/re2.h"
2526
#include "src/regex/regex_match.h"
2627

2728
#ifndef SRC_REGEX_REGEX_H_
@@ -34,7 +35,7 @@ namespace regex {
3435
#ifdef WITH_PCRE
3536
using selectedBackend = backend::Pcre;
3637
#elif WITH_RE2
37-
//using selectedBackend = backend::Re2;
38+
using selectedBackend = backend::Re2;
3839
#else
3940
#error "no regex backend selected"
4041
#endif

0 commit comments

Comments
 (0)