Skip to content

Commit 2263d30

Browse files
authored
refs #10692 - added command-line option --cpp-header-probe to probe headers and extension-less files for Emacs C++ marker (#6324)
1 parent e853d86 commit 2263d30

14 files changed

+298
-52
lines changed

cli/cmdlineparser.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,10 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
544544
}
545545
}
546546

547+
else if (std::strcmp(argv[i], "--cpp-header-probe") == 0) {
548+
mSettings.cppHeaderProbe = true;
549+
}
550+
547551
// Show --debug output after the first simplifications
548552
else if (std::strcmp(argv[i], "--debug") == 0 ||
549553
std::strcmp(argv[i], "--debug-normal") == 0)
@@ -887,6 +891,10 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
887891
return Result::Fail;
888892
}
889893

894+
else if (std::strcmp(argv[i], "--no-cpp-header-probe") == 0) {
895+
mSettings.cppHeaderProbe = false;
896+
}
897+
890898
// Write results in file
891899
else if (std::strncmp(argv[i], "--output-file=", 14) == 0)
892900
mSettings.outputFile = Path::simplifyPath(Path::fromNativeSeparators(argv[i] + 14));

lib/cppcheck.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ static void createDumpFile(const Settings& settings,
182182
case Standards::Language::None:
183183
{
184184
// TODO: error out on unknown language?
185-
const Standards::Language lang = Path::identify(filename);
185+
const Standards::Language lang = Path::identify(filename, settings.cppHeaderProbe);
186186
if (lang == Standards::Language::CPP)
187187
language = " language=\"cpp\"";
188188
else if (lang == Standards::Language::C)
@@ -420,7 +420,7 @@ unsigned int CppCheck::checkClang(const std::string &path)
420420
mErrorLogger.reportOut(std::string("Checking ") + path + " ...", Color::FgGreen);
421421

422422
// TODO: this ignores the configured language
423-
const bool isCpp = Path::identify(path) == Standards::Language::CPP;
423+
const bool isCpp = Path::identify(path, mSettings.cppHeaderProbe) == Standards::Language::CPP;
424424
const std::string langOpt = isCpp ? "-x c++" : "-x c";
425425
const std::string analyzerInfo = mSettings.buildDir.empty() ? std::string() : AnalyzerInformation::getAnalyzerInfoFile(mSettings.buildDir, path, emptyString);
426426
const std::string clangcmd = analyzerInfo + ".clang-cmd";
@@ -784,7 +784,7 @@ unsigned int CppCheck::checkFile(const std::string& filename, const std::string
784784
TokenList tokenlist(&mSettings);
785785
std::istringstream istr2(code);
786786
// TODO: asserts when file has unknown extension
787-
tokenlist.createTokens(istr2, Path::identify(*files.begin())); // TODO: check result?
787+
tokenlist.createTokens(istr2, Path::identify(*files.begin(), false)); // TODO: check result?
788788
executeRules("define", tokenlist);
789789
}
790790
#endif

lib/path.cpp

Lines changed: 100 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,17 @@
2020
#undef __STRICT_ANSI__
2121
#endif
2222

23+
//#define LOG_EMACS_MARKER
24+
2325
#include "path.h"
2426
#include "utils.h"
2527

2628
#include <algorithm>
29+
#include <cstdio>
2730
#include <cstdlib>
31+
#ifdef LOG_EMACS_MARKER
32+
#include <iostream>
33+
#endif
2834
#include <sys/stat.h>
2935
#include <unordered_set>
3036
#include <utility>
@@ -210,16 +216,104 @@ static const std::unordered_set<std::string> header_exts = {
210216
bool Path::acceptFile(const std::string &path, const std::set<std::string> &extra)
211217
{
212218
bool header = false;
213-
return (identify(path, &header) != Standards::Language::None && !header) || extra.find(getFilenameExtension(path)) != extra.end();
219+
return (identify(path, false, &header) != Standards::Language::None && !header) || extra.find(getFilenameExtension(path)) != extra.end();
214220
}
215221

216-
Standards::Language Path::identify(const std::string &path, bool *header)
222+
static bool hasEmacsCppMarker(const char* path)
223+
{
224+
// TODO: identify is called three times for each file
225+
// Preprocessor::loadFiles() -> createDUI()
226+
// Preprocessor::preprocess() -> createDUI()
227+
// TokenList::createTokens() -> TokenList::determineCppC()
228+
#ifdef LOG_EMACS_MARKER
229+
std::cout << path << '\n';
230+
#endif
231+
232+
FILE *fp = fopen(path, "rt");
233+
if (!fp)
234+
return false;
235+
std::string buf(128, '\0');
236+
{
237+
// TODO: read the whole first line only
238+
const char * const res = fgets(const_cast<char*>(buf.data()), buf.size(), fp);
239+
fclose(fp);
240+
fp = nullptr;
241+
if (!res)
242+
return false; // failed to read file
243+
}
244+
// TODO: replace with regular expression
245+
const auto pos1 = buf.find("-*-");
246+
if (pos1 == std::string::npos)
247+
return false; // no start marker
248+
const auto pos_nl = buf.find_first_of("\r\n");
249+
if (pos_nl != std::string::npos && (pos_nl < pos1)) {
250+
#ifdef LOG_EMACS_MARKER
251+
std::cout << path << " - Emacs marker not on the first line" << '\n';
252+
#endif
253+
return false; // not on first line
254+
}
255+
const auto pos2 = buf.find("-*-", pos1 + 3);
256+
// TODO: make sure we have read the whole line before bailing out
257+
if (pos2 == std::string::npos) {
258+
#ifdef LOG_EMACS_MARKER
259+
std::cout << path << " - Emacs marker not terminated" << '\n';
260+
#endif
261+
return false; // no end marker
262+
}
263+
#ifdef LOG_EMACS_MARKER
264+
std::cout << "Emacs marker: '" << buf.substr(pos1, (pos2 + 3) - pos1) << "'" << '\n';
265+
#endif
266+
// TODO: support /* */ comments
267+
const std::string buf_trim = trim(buf); // trim whitespaces
268+
if (buf_trim[0] != '/' || buf_trim[1] != '/') {
269+
#ifdef LOG_EMACS_MARKER
270+
std::cout << path << " - Emacs marker not in a comment: '" << buf.substr(pos1, (pos2 + 3) - pos1) << "'" << '\n';
271+
#endif
272+
return false; // not a comment
273+
}
274+
275+
// there are more variations with lowercase and no whitespaces
276+
// -*- C++ -*-
277+
// -*- Mode: C++; -*-
278+
// -*- Mode: C++; c-basic-offset: 8 -*-
279+
std::string marker = trim(buf.substr(pos1 + 3, pos2 - pos1 - 3), " ;");
280+
// cut off additional attributes
281+
const auto pos_semi = marker.find(';');
282+
if (pos_semi != std::string::npos)
283+
marker.resize(pos_semi);
284+
findAndReplace(marker, "mode:", "");
285+
findAndReplace(marker, "Mode:", "");
286+
marker = trim(marker);
287+
if (marker == "C++" || marker == "c++") {
288+
// NOLINTNEXTLINE(readability-simplify-boolean-expr) - TODO: FP
289+
return true; // C++ marker found
290+
}
291+
292+
//if (marker == "C" || marker == "c")
293+
// return false;
294+
#ifdef LOG_EMACS_MARKER
295+
std::cout << path << " - unmatched Emacs marker: '" << marker << "'" << '\n';
296+
#endif
297+
298+
return false; // marker is not a C++ one
299+
}
300+
301+
Standards::Language Path::identify(const std::string &path, bool cppHeaderProbe, bool *header)
217302
{
218303
// cppcheck-suppress uninitvar - TODO: FP
219304
if (header)
220305
*header = false;
221306

222307
std::string ext = getFilenameExtension(path);
308+
// standard library headers have no extension
309+
if (cppHeaderProbe && ext.empty()) {
310+
if (hasEmacsCppMarker(path.c_str())) {
311+
if (header)
312+
*header = true;
313+
return Standards::Language::CPP;
314+
}
315+
return Standards::Language::None;
316+
}
223317
if (ext == ".C")
224318
return Standards::Language::CPP;
225319
if (c_src_exts.find(ext) != c_src_exts.end())
@@ -230,7 +324,9 @@ Standards::Language Path::identify(const std::string &path, bool *header)
230324
if (ext == ".h") {
231325
if (header)
232326
*header = true;
233-
return Standards::Language::C; // treat as C for now
327+
if (cppHeaderProbe && hasEmacsCppMarker(path.c_str()))
328+
return Standards::Language::CPP;
329+
return Standards::Language::C;
234330
}
235331
if (cpp_src_exts.find(ext) != cpp_src_exts.end())
236332
return Standards::Language::CPP;
@@ -245,7 +341,7 @@ Standards::Language Path::identify(const std::string &path, bool *header)
245341
bool Path::isHeader(const std::string &path)
246342
{
247343
bool header;
248-
(void)Path::identify(path, &header);
344+
(void)identify(path, false, &header);
249345
return header;
250346
}
251347

lib/path.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,11 @@ class CPPCHECKLIB Path {
163163
/**
164164
* @brief Identify the language based on the file extension
165165
* @param path filename to check. path info is optional
166+
* @param cppHeaderProbe check optional Emacs marker to identify extension-less and *.h files as C++
166167
* @param header if provided indicates if the file is a header
167168
* @return the language type
168169
*/
169-
static Standards::Language identify(const std::string &path, bool *header = nullptr);
170+
static Standards::Language identify(const std::string &path, bool cppHeaderProbe, bool *header = nullptr);
170171

171172
/**
172173
* @brief Get filename without a directory path part.

lib/preprocessor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,7 @@ static simplecpp::DUI createDUI(const Settings &mSettings, const std::string &cf
684684
dui.includes = mSettings.userIncludes; // --include
685685
// TODO: use mSettings.standards.stdValue instead
686686
// TODO: error out on unknown language?
687-
const Standards::Language lang = Path::identify(filename);
687+
const Standards::Language lang = Path::identify(filename, mSettings.cppHeaderProbe);
688688
if (lang == Standards::Language::CPP) {
689689
dui.std = mSettings.standards.getCPP();
690690
splitcfg(mSettings.platform.getLimitsDefines(Standards::getCPP(dui.std)), dui.defines, "");

lib/settings.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ class CPPCHECKLIB WARN_UNUSED Settings {
167167
/** cppcheck.cfg: About text */
168168
std::string cppcheckCfgAbout;
169169

170+
/** @brief check Emacs marker to detect extension-less and *.h files as C++ */
171+
bool cppHeaderProbe{};
172+
170173
/** @brief Are we running from DACA script? */
171174
bool daca{};
172175

lib/tokenize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8067,7 +8067,7 @@ void Tokenizer::unmatchedToken(const Token *tok) const
80678067
void Tokenizer::syntaxErrorC(const Token *tok, const std::string &what) const
80688068
{
80698069
printDebugOutput(0);
8070-
throw InternalError(tok, "Code '"+what+"' is invalid C code. Use --std or --language to configure the language.", InternalError::SYNTAX);
8070+
throw InternalError(tok, "Code '"+what+"' is invalid C code.", "Use --std, -x or --language to enforce C++. Or --cpp-header-probe to identify C++ headers via the Emacs marker.", InternalError::SYNTAX);
80718071
}
80728072

80738073
void Tokenizer::unknownMacroError(const Token *tok1) const

lib/tokenlist.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ void TokenList::determineCppC()
9696
// only try to determine if it wasn't enforced
9797
if (mLang == Standards::Language::None) {
9898
ASSERT_LANG(!getSourceFilePath().empty());
99-
mLang = Path::identify(getSourceFilePath());
99+
mLang = Path::identify(getSourceFilePath(), mSettings ? mSettings->cppHeaderProbe : false);
100100
// TODO: cannot enable assert as this might occur for unknown extensions
101101
//ASSERT_LANG(mLang != Standards::Language::None);
102102
if (mLang == Standards::Language::None) {

releasenotes.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,6 @@ Deprecations:
1717
-
1818

1919
Other:
20-
- Add support for 'CLICOLOR_FORCE'/'NO_COLOR' environment variables to force/disable ANSI color output for diagnostics.
20+
- Add support for 'CLICOLOR_FORCE'/'NO_COLOR' environment variables to force/disable ANSI color output for diagnostics.
21+
- added command-line option `--cpp-header-probe` (and `--no-cpp-header-probe`) to probe headers and extension-less files for Emacs marker (see https://trac.cppcheck.net/ticket/10692 for more details)
22+
-

test/cli/other_test.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1487,3 +1487,32 @@ def test_markup_lang(tmpdir):
14871487

14881488
exitcode, stdout, _ = cppcheck(args)
14891489
assert exitcode == 0, stdout
1490+
1491+
1492+
def test_cpp_probe(tmpdir):
1493+
test_file = os.path.join(tmpdir, 'test.h')
1494+
with open(test_file, 'wt') as f:
1495+
f.writelines([
1496+
'class A {};'
1497+
])
1498+
1499+
args = ['-q', '--template=simple', '--cpp-header-probe', '--verbose', test_file]
1500+
err_lines = [
1501+
# TODO: fix that awkward format
1502+
"{}:1:1: error: Code 'classA{{' is invalid C code.: Use --std, -x or --language to enforce C++. Or --cpp-header-probe to identify C++ headers via the Emacs marker. [syntaxError]".format(test_file)
1503+
]
1504+
1505+
assert_cppcheck(args, ec_exp=0, err_exp=err_lines, out_exp=[])
1506+
1507+
1508+
def test_cpp_probe_2(tmpdir):
1509+
test_file = os.path.join(tmpdir, 'test.h')
1510+
with open(test_file, 'wt') as f:
1511+
f.writelines([
1512+
'// -*- C++ -*-',
1513+
'class A {};'
1514+
])
1515+
1516+
args = ['-q', '--template=simple', '--cpp-header-probe', test_file]
1517+
1518+
assert_cppcheck(args, ec_exp=0, err_exp=[], out_exp=[])

0 commit comments

Comments
 (0)