Skip to content

Commit e0688aa

Browse files
committed
compile --rule-file pattern only once / extracted regular expressions code to separate file
1 parent 0eb4dd8 commit e0688aa

File tree

11 files changed

+766
-389
lines changed

11 files changed

+766
-389
lines changed

Makefile

Lines changed: 137 additions & 129 deletions
Large diffs are not rendered by default.

cli/cmdlineparser.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
#include <utility>
5858

5959
#ifdef HAVE_RULES
60+
#include "regex.h"
61+
6062
// xml is used for rules
6163
#include "xml.h"
6264
#endif
@@ -1258,6 +1260,13 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
12581260
return Result::Fail;
12591261
}
12601262

1263+
Regex regex;
1264+
const std::string regex_err = regex.compile(rule.pattern);
1265+
if (!regex_err.empty()) {
1266+
mLogger.printError("failed to compile rule pattern '" + rule.pattern + "' (" + regex_err + ").");
1267+
return Result::Fail;
1268+
}
1269+
rule.regex = std::move(regex);
12611270
mSettings.rules.emplace_back(std::move(rule));
12621271
#else
12631272
mLogger.printError("Option --rule cannot be used as Cppcheck has not been built with rules support.");
@@ -1335,6 +1344,12 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
13351344
return Result::Fail;
13361345
}
13371346

1347+
const std::string regex_err = rule.regex.compile(rule.pattern);
1348+
if (!regex_err.empty()) {
1349+
mLogger.printError("unable to load rule-file '" + ruleFile + "' - pattern '" + rule.pattern + "' failed to compile (" + regex_err + ").");
1350+
return Result::Fail;
1351+
}
1352+
13381353
if (rule.severity == Severity::none) {
13391354
mLogger.printError("unable to load rule-file '" + ruleFile + "' - a rule has an invalid severity.");
13401355
return Result::Fail;

lib/cppcheck.cpp

Lines changed: 17 additions & 212 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@
4343
#include "valueflow.h"
4444
#include "version.h"
4545

46+
#ifdef HAVE_RULES
47+
#include "regex.h"
48+
#endif
49+
4650
#include <algorithm>
4751
#include <cassert>
4852
#include <cstdio>
@@ -65,17 +69,9 @@
6569
#include <vector>
6670

6771
#include "json.h"
68-
69-
#include <simplecpp.h>
70-
7172
#include "xml.h"
7273

73-
#ifdef HAVE_RULES
74-
#ifdef _WIN32
75-
#define PCRE_STATIC
76-
#endif
77-
#include <pcre.h>
78-
#endif
74+
#include <simplecpp.h>
7975

8076
class SymbolDatabase;
8177

@@ -1433,135 +1429,6 @@ bool CppCheck::hasRule(const std::string &tokenlist) const
14331429
});
14341430
}
14351431

1436-
static const char * pcreErrorCodeToString(const int pcreExecRet)
1437-
{
1438-
switch (pcreExecRet) {
1439-
case PCRE_ERROR_NULL:
1440-
return "Either code or subject was passed as NULL, or ovector was NULL "
1441-
"and ovecsize was not zero (PCRE_ERROR_NULL)";
1442-
case PCRE_ERROR_BADOPTION:
1443-
return "An unrecognized bit was set in the options argument (PCRE_ERROR_BADOPTION)";
1444-
case PCRE_ERROR_BADMAGIC:
1445-
return "PCRE stores a 4-byte \"magic number\" at the start of the compiled code, "
1446-
"to catch the case when it is passed a junk pointer and to detect when a "
1447-
"pattern that was compiled in an environment of one endianness is run in "
1448-
"an environment with the other endianness. This is the error that PCRE "
1449-
"gives when the magic number is not present (PCRE_ERROR_BADMAGIC)";
1450-
case PCRE_ERROR_UNKNOWN_NODE:
1451-
return "While running the pattern match, an unknown item was encountered in the "
1452-
"compiled pattern. This error could be caused by a bug in PCRE or by "
1453-
"overwriting of the compiled pattern (PCRE_ERROR_UNKNOWN_NODE)";
1454-
case PCRE_ERROR_NOMEMORY:
1455-
return "If a pattern contains back references, but the ovector that is passed "
1456-
"to pcre_exec() is not big enough to remember the referenced substrings, "
1457-
"PCRE gets a block of memory at the start of matching to use for this purpose. "
1458-
"If the call via pcre_malloc() fails, this error is given. The memory is "
1459-
"automatically freed at the end of matching. This error is also given if "
1460-
"pcre_stack_malloc() fails in pcre_exec(). "
1461-
"This can happen only when PCRE has been compiled with "
1462-
"--disable-stack-for-recursion (PCRE_ERROR_NOMEMORY)";
1463-
case PCRE_ERROR_NOSUBSTRING:
1464-
return "This error is used by the pcre_copy_substring(), pcre_get_substring(), "
1465-
"and pcre_get_substring_list() functions (see below). "
1466-
"It is never returned by pcre_exec() (PCRE_ERROR_NOSUBSTRING)";
1467-
case PCRE_ERROR_MATCHLIMIT:
1468-
return "The backtracking limit, as specified by the match_limit field in a pcre_extra "
1469-
"structure (or defaulted) was reached. "
1470-
"See the description above (PCRE_ERROR_MATCHLIMIT)";
1471-
case PCRE_ERROR_CALLOUT:
1472-
return "This error is never generated by pcre_exec() itself. "
1473-
"It is provided for use by callout functions that want to yield a distinctive "
1474-
"error code. See the pcrecallout documentation for details (PCRE_ERROR_CALLOUT)";
1475-
case PCRE_ERROR_BADUTF8:
1476-
return "A string that contains an invalid UTF-8 byte sequence was passed as a subject, "
1477-
"and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector "
1478-
"(ovecsize) is at least 2, the byte offset to the start of the the invalid UTF-8 "
1479-
"character is placed in the first element, and a reason code is placed in the "
1480-
"second element. The reason codes are listed in the following section. For "
1481-
"backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a truncated "
1482-
"UTF-8 character at the end of the subject (reason codes 1 to 5), "
1483-
"PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8";
1484-
case PCRE_ERROR_BADUTF8_OFFSET:
1485-
return "The UTF-8 byte sequence that was passed as a subject was checked and found to "
1486-
"be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of "
1487-
"startoffset did not point to the beginning of a UTF-8 character or the end of "
1488-
"the subject (PCRE_ERROR_BADUTF8_OFFSET)";
1489-
case PCRE_ERROR_PARTIAL:
1490-
return "The subject string did not match, but it did match partially. See the "
1491-
"pcrepartial documentation for details of partial matching (PCRE_ERROR_PARTIAL)";
1492-
case PCRE_ERROR_BADPARTIAL:
1493-
return "This code is no longer in use. It was formerly returned when the PCRE_PARTIAL "
1494-
"option was used with a compiled pattern containing items that were not supported "
1495-
"for partial matching. From release 8.00 onwards, there are no restrictions on "
1496-
"partial matching (PCRE_ERROR_BADPARTIAL)";
1497-
case PCRE_ERROR_INTERNAL:
1498-
return "An unexpected internal error has occurred. This error could be caused by a bug "
1499-
"in PCRE or by overwriting of the compiled pattern (PCRE_ERROR_INTERNAL)";
1500-
case PCRE_ERROR_BADCOUNT:
1501-
return "This error is given if the value of the ovecsize argument is negative "
1502-
"(PCRE_ERROR_BADCOUNT)";
1503-
case PCRE_ERROR_RECURSIONLIMIT:
1504-
return "The internal recursion limit, as specified by the match_limit_recursion "
1505-
"field in a pcre_extra structure (or defaulted) was reached. "
1506-
"See the description above (PCRE_ERROR_RECURSIONLIMIT)";
1507-
case PCRE_ERROR_DFA_UITEM:
1508-
return "PCRE_ERROR_DFA_UITEM";
1509-
case PCRE_ERROR_DFA_UCOND:
1510-
return "PCRE_ERROR_DFA_UCOND";
1511-
case PCRE_ERROR_DFA_WSSIZE:
1512-
return "PCRE_ERROR_DFA_WSSIZE";
1513-
case PCRE_ERROR_DFA_RECURSE:
1514-
return "PCRE_ERROR_DFA_RECURSE";
1515-
case PCRE_ERROR_NULLWSLIMIT:
1516-
return "PCRE_ERROR_NULLWSLIMIT";
1517-
case PCRE_ERROR_BADNEWLINE:
1518-
return "An invalid combination of PCRE_NEWLINE_xxx options was "
1519-
"given (PCRE_ERROR_BADNEWLINE)";
1520-
case PCRE_ERROR_BADOFFSET:
1521-
return "The value of startoffset was negative or greater than the length "
1522-
"of the subject, that is, the value in length (PCRE_ERROR_BADOFFSET)";
1523-
case PCRE_ERROR_SHORTUTF8:
1524-
return "This error is returned instead of PCRE_ERROR_BADUTF8 when the subject "
1525-
"string ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set. "
1526-
"Information about the failure is returned as for PCRE_ERROR_BADUTF8. "
1527-
"It is in fact sufficient to detect this case, but this special error code for "
1528-
"PCRE_PARTIAL_HARD precedes the implementation of returned information; "
1529-
"it is retained for backwards compatibility (PCRE_ERROR_SHORTUTF8)";
1530-
case PCRE_ERROR_RECURSELOOP:
1531-
return "This error is returned when pcre_exec() detects a recursion loop "
1532-
"within the pattern. Specifically, it means that either the whole pattern "
1533-
"or a subpattern has been called recursively for the second time at the same "
1534-
"position in the subject string. Some simple patterns that might do this "
1535-
"are detected and faulted at compile time, but more complicated cases, "
1536-
"in particular mutual recursions between two different subpatterns, "
1537-
"cannot be detected until run time (PCRE_ERROR_RECURSELOOP)";
1538-
case PCRE_ERROR_JIT_STACKLIMIT:
1539-
return "This error is returned when a pattern that was successfully studied "
1540-
"using a JIT compile option is being matched, but the memory available "
1541-
"for the just-in-time processing stack is not large enough. See the pcrejit "
1542-
"documentation for more details (PCRE_ERROR_JIT_STACKLIMIT)";
1543-
case PCRE_ERROR_BADMODE:
1544-
return "This error is given if a pattern that was compiled by the 8-bit library "
1545-
"is passed to a 16-bit or 32-bit library function, or vice versa (PCRE_ERROR_BADMODE)";
1546-
case PCRE_ERROR_BADENDIANNESS:
1547-
return "This error is given if a pattern that was compiled and saved is reloaded on a "
1548-
"host with different endianness. The utility function pcre_pattern_to_host_byte_order() "
1549-
"can be used to convert such a pattern so that it runs on the new host (PCRE_ERROR_BADENDIANNESS)";
1550-
case PCRE_ERROR_DFA_BADRESTART:
1551-
return "PCRE_ERROR_DFA_BADRESTART";
1552-
#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32
1553-
case PCRE_ERROR_BADLENGTH:
1554-
return "This error is given if pcre_exec() is called with a negative value for the length argument (PCRE_ERROR_BADLENGTH)";
1555-
case PCRE_ERROR_JIT_BADOPTION:
1556-
return "This error is returned when a pattern that was successfully studied using a JIT compile "
1557-
"option is being matched, but the matching mode (partial or complete match) does not correspond "
1558-
"to any JIT compilation mode. When the JIT fast path function is used, this error may be "
1559-
"also given for invalid options. See the pcrejit documentation for more details (PCRE_ERROR_JIT_BADOPTION)";
1560-
#endif
1561-
}
1562-
return "";
1563-
}
1564-
15651432
void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list)
15661433
{
15671434
// There is no rule to execute
@@ -1583,73 +1450,7 @@ void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list)
15831450
mErrorLogger.reportOut("Processing rule: " + rule.pattern, Color::FgGreen);
15841451
}
15851452

1586-
const char *pcreCompileErrorStr = nullptr;
1587-
int erroffset = 0;
1588-
pcre * const re = pcre_compile(rule.pattern.c_str(),0,&pcreCompileErrorStr,&erroffset,nullptr);
1589-
if (!re) {
1590-
if (pcreCompileErrorStr) {
1591-
const std::string msg = "pcre_compile failed: " + std::string(pcreCompileErrorStr);
1592-
const ErrorMessage errmsg({},
1593-
"",
1594-
Severity::error,
1595-
msg,
1596-
"pcre_compile",
1597-
Certainty::normal);
1598-
1599-
mErrorLogger.reportErr(errmsg);
1600-
}
1601-
continue;
1602-
}
1603-
1604-
// Optimize the regex, but only if PCRE_CONFIG_JIT is available
1605-
#ifdef PCRE_CONFIG_JIT
1606-
const char *pcreStudyErrorStr = nullptr;
1607-
pcre_extra * const pcreExtra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &pcreStudyErrorStr);
1608-
// pcre_study() returns NULL for both errors and when it can not optimize the regex.
1609-
// The last argument is how one checks for errors.
1610-
// It is NULL if everything works, and points to an error string otherwise.
1611-
if (pcreStudyErrorStr) {
1612-
const std::string msg = "pcre_study failed: " + std::string(pcreStudyErrorStr);
1613-
const ErrorMessage errmsg({},
1614-
"",
1615-
Severity::error,
1616-
msg,
1617-
"pcre_study",
1618-
Certainty::normal);
1619-
1620-
mErrorLogger.reportErr(errmsg);
1621-
// pcre_compile() worked, but pcre_study() returned an error. Free the resources allocated by pcre_compile().
1622-
pcre_free(re);
1623-
continue;
1624-
}
1625-
#else
1626-
const pcre_extra * const pcreExtra = nullptr;
1627-
#endif
1628-
1629-
int pos = 0;
1630-
int ovector[30]= {0};
1631-
while (pos < static_cast<int>(str.size())) {
1632-
const int pcreExecRet = pcre_exec(re, pcreExtra, str.c_str(), static_cast<int>(str.size()), pos, 0, ovector, 30);
1633-
if (pcreExecRet < 0) {
1634-
const std::string errorMessage = pcreErrorCodeToString(pcreExecRet);
1635-
if (!errorMessage.empty()) {
1636-
const ErrorMessage errmsg({},
1637-
"",
1638-
Severity::error,
1639-
std::string("pcre_exec failed: ") + errorMessage,
1640-
"pcre_exec",
1641-
Certainty::normal);
1642-
1643-
mErrorLogger.reportErr(errmsg);
1644-
}
1645-
break;
1646-
}
1647-
const auto pos1 = static_cast<unsigned int>(ovector[0]);
1648-
const auto pos2 = static_cast<unsigned int>(ovector[1]);
1649-
1650-
// jump to the end of the match for the next pcre_exec
1651-
pos = static_cast<int>(pos2);
1652-
1453+
auto f = [&](int pos1, int pos2) {
16531454
// determine location..
16541455
int fileIndex = 0;
16551456
int line = 0;
@@ -1678,15 +1479,19 @@ void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list)
16781479

16791480
// Report error
16801481
mErrorLogger.reportErr(errmsg);
1681-
}
1482+
};
16821483

1683-
pcre_free(re);
1684-
#ifdef PCRE_CONFIG_JIT
1685-
// Free up the EXTRA PCRE value (may be NULL at this point)
1686-
if (pcreExtra) {
1687-
pcre_free_study(pcreExtra);
1484+
const std::string err = rule.regex.match(str, f);
1485+
if (!err.empty()) {
1486+
const ErrorMessage errmsg(std::list<ErrorMessage::FileLocation>(),
1487+
emptyString,
1488+
Severity::error,
1489+
err,
1490+
"pcre_exec",
1491+
Certainty::normal);
1492+
1493+
mErrorLogger.reportErr(errmsg);
16881494
}
1689-
#endif
16901495
}
16911496
}
16921497
#endif

lib/cppcheck.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
<ClCompile Include="platform.cpp" />
8080
<ClCompile Include="preprocessor.cpp" />
8181
<ClCompile Include="programmemory.cpp" />
82+
<ClCompile Include="regex.cpp" />
8283
<ClCompile Include="reverseanalyzer.cpp" />
8384
<ClCompile Include="settings.cpp" />
8485
<ClCompile Include="standards.cpp" />
@@ -155,6 +156,7 @@
155156
<ClInclude Include="precompiled.h" />
156157
<ClInclude Include="preprocessor.h" />
157158
<ClInclude Include="programmemory.h" />
159+
<ClInclude Include="regex.h" />
158160
<ClInclude Include="reverseanalyzer.h" />
159161
<ClInclude Include="settings.h" />
160162
<ClInclude Include="smallvector.h" />

0 commit comments

Comments
 (0)