diff --git a/.circleci/install_validator_dependencies.sh b/.circleci/install_validator_dependencies.sh index e6e13bbcd490..8ebc5f94a164 100755 --- a/.circleci/install_validator_dependencies.sh +++ b/.circleci/install_validator_dependencies.sh @@ -13,7 +13,7 @@ sudo mv bazel.gpg /etc/apt/trusted.gpg.d/ echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | sudo tee /etc/apt/sources.list.d/bazel.list echo $(GREEN "Updating and installing apt packages...") -sudo apt update && sudo apt install bazel-5.4.0 clang python3 python3-pip protobuf-compiler +sudo apt update && sudo apt install bazel-7.1.0 clang python3 python3-pip protobuf-compiler echo $(GREEN "Installing protobuf python module...") pip3 install protobuf==3.19.4 diff --git a/build-system/tasks/validator.js b/build-system/tasks/validator.js index 405d18c0939f..8591431265e9 100644 --- a/build-system/tasks/validator.js +++ b/build-system/tasks/validator.js @@ -25,7 +25,7 @@ async function validator() { */ async function validatorCpp() { const bazelCmd = [ - 'bazel-5.4.0 test', + 'bazel-7.1.0 test', '--repo_env=CC=clang', "--cxxopt='-std=c++17'", '--discard_analysis_cache', diff --git a/validator/cpp/engine/BUILD b/validator/cpp/engine/BUILD index c60425d0d25c..5b4705354473 100644 --- a/validator/cpp/engine/BUILD +++ b/validator/cpp/engine/BUILD @@ -1,6 +1,7 @@ load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("embed_data.bzl", "embed_data") +load("//tools/build_defs/license:license.bzl", "license") # Requirements: # clang with c++17 support. @@ -14,7 +15,15 @@ load("embed_data.bzl", "embed_data") # # bazel test --cxxopt='-std=c++17' validator_test -package(default_visibility = ["//visibility:public"]) +package( + default_applicable_licenses = [":license"], + default_visibility = ["//visibility:public"], +) + +license( + name = "license", + package_name = "ampvalidator", +) licenses(["notice"]) diff --git a/validator/cpp/engine/validator-internal.cc b/validator/cpp/engine/validator-internal.cc index 63856a5eb540..799563cd4fb2 100644 --- a/validator/cpp/engine/validator-internal.cc +++ b/validator/cpp/engine/validator-internal.cc @@ -595,7 +595,7 @@ class ParsedAttrSpec { return value_property_by_name_; } - const unordered_map& + const absl::flat_hash_map& css_declaration_by_name() const { return css_declaration_by_name_; } @@ -620,7 +620,8 @@ class ParsedAttrSpec { // Name lookup for spec().value_properties().properties(). unordered_map value_property_by_name_; // Name lookup for spec().css_declaration(). - unordered_map css_declaration_by_name_; + absl::flat_hash_map + css_declaration_by_name_; // The mandatory spec().value_properties().properties(). vector mandatory_value_properties_; vector disabled_by_; @@ -4323,7 +4324,7 @@ void ValidateAttrDeclaration(const ParsedAttrSpec& parsed_attr_spec, // If there were errors parsing, exit from validating further. if (!css_errors.empty()) return; - const unordered_map& + const absl::flat_hash_map& css_declaration_by_name = parsed_attr_spec.css_declaration_by_name(); for (auto& declaration : declarations) { diff --git a/validator/cpp/engine/validator_test.cc b/validator/cpp/engine/validator_test.cc index 545f14abbe59..491684ede86c 100644 --- a/validator/cpp/engine/validator_test.cc +++ b/validator/cpp/engine/validator_test.cc @@ -1220,7 +1220,8 @@ TEST(ValidatorTest, RulesMakeSense) { HtmlFormat::UNKNOWN_CODE), html_format.cend()) << "tagSpec.htmlFormat should never contain UNKNOWN_CODE" - << ":\n" << tag_spec.DebugString(); + << ":\n" + << tag_spec; EXPECT_TRUE(tag_spec.has_tag_name()); EXPECT_TRUE(RE2::PartialMatch(tag_spec.tag_name(), tag_name_regex)); @@ -1562,12 +1563,12 @@ TEST(ValidatorTest, RulesMakeSense) { } for (const auto& attr_spec : tag_spec.attrs()) { - EXPECT_TRUE(attr_spec.has_name()) << attr_spec.DebugString(); + EXPECT_TRUE(attr_spec.has_name()) << attr_spec; // Attribute Spec names are matched against lowercased attributes, // so the rules *must* also be lower case or non-cased. EXPECT_TRUE(RE2::FullMatch(attr_spec.name(), RE2("[^A-Z]+"))) - << attr_spec.DebugString(); - EXPECT_NE(attr_spec.name(), "[style]") << attr_spec.DebugString(); + << attr_spec; + EXPECT_NE(attr_spec.name(), "[style]") << attr_spec; if (attr_spec.has_value_url()) { for (const std::string& protocol : attr_spec.value_url().protocol()) { // UrlSpec protocol is matched against lowercased protocol names, @@ -1582,27 +1583,24 @@ TEST(ValidatorTest, RulesMakeSense) { if (protocol == "http" && attr_spec.value_url().has_allow_relative()) { EXPECT_TRUE(attr_spec.value_url().allow_relative()) - << attr_spec.value_url().DebugString(); + << attr_spec.value_url(); } } } } if (attr_spec.has_value_regex()) { - EXPECT_TRUE(RE2(attr_spec.value_regex()).ok()) - << attr_spec.DebugString(); + EXPECT_TRUE(RE2(attr_spec.value_regex()).ok()) << attr_spec; } if (attr_spec.has_value_regex_casei()) { - EXPECT_TRUE(RE2(attr_spec.value_regex_casei()).ok()) - << attr_spec.DebugString(); + EXPECT_TRUE(RE2(attr_spec.value_regex_casei()).ok()) << attr_spec; } if (attr_spec.has_disallowed_value_regex()) { - EXPECT_TRUE(RE2(attr_spec.disallowed_value_regex()).ok()) - << attr_spec.DebugString(); + EXPECT_TRUE(RE2(attr_spec.disallowed_value_regex()).ok()) << attr_spec; } if (attr_spec.has_value_url()) { EXPECT_GT(attr_spec.value_url().protocol().size(), 0) << "value_url must have at least one protocol\n" - << attr_spec.DebugString(); + << attr_spec; } int num_values = 0; if (!attr_spec.value().empty()) { @@ -1627,12 +1625,12 @@ TEST(ValidatorTest, RulesMakeSense) { if (attr_spec.name() == "id" && num_values == 0) { EXPECT_TRUE(attr_spec.has_disallowed_value_regex()) << "'id' attribute must have 'disallowed_value_regex' set\n" - << attr_spec.DebugString(); + << attr_spec; } if (attr_spec.name() == "name" && num_values == 0) { EXPECT_TRUE(attr_spec.has_disallowed_value_regex()) << "'name' attribute must have 'disallowed_value_regex' set\n" - << attr_spec.DebugString(); + << attr_spec; } if (attr_spec.has_deprecation()) { EXPECT_TRUE(attr_spec.has_deprecation_url()); diff --git a/validator/cpp/htmlparser/BUILD b/validator/cpp/htmlparser/BUILD index 78de2920159d..61f4e2958f1e 100644 --- a/validator/cpp/htmlparser/BUILD +++ b/validator/cpp/htmlparser/BUILD @@ -195,10 +195,10 @@ cc_library( hdrs = [ "logging.h", ], + copts = ["-std=c++17"], deps = [ ":glog_polyfill", ], - copts = ["-std=c++17"], ) # Defines token type and token structures, used during tokenization. @@ -551,7 +551,7 @@ cc_test( ":strings", ":testconstants", ":tokenizer", - "@com_google_googletest//:gtest_main", "@com_google_absl//absl/flags:flag", + "@com_google_googletest//:gtest_main", ], ) diff --git a/validator/cpp/htmlparser/allocator.h b/validator/cpp/htmlparser/allocator.h index 0620c4ed32fb..913e3befbb5c 100644 --- a/validator/cpp/htmlparser/allocator.h +++ b/validator/cpp/htmlparser/allocator.h @@ -138,7 +138,7 @@ // IMPORTANT: Tree like structure must not destroy the child nodes or sibling // nodes. Allocator destroys all the objects and call its destructor, it is an // error to invoke destructors on objects allocated by this allocator. Allocator -// is the master owner of all the objects. Client treats all objects as const +// is the primary owner of all the objects. Client treats all objects as const // pointer as far as destruction goes. // // It is not possible to destroy random objects or free up the slots to be @@ -174,6 +174,7 @@ #include #include #include +#include #include namespace htmlparser { diff --git a/validator/cpp/htmlparser/allocator_test.cc b/validator/cpp/htmlparser/allocator_test.cc index 7922016aafca..3f20071b1017 100644 --- a/validator/cpp/htmlparser/allocator_test.cc +++ b/validator/cpp/htmlparser/allocator_test.cc @@ -1,5 +1,10 @@ #include "cpp/htmlparser/allocator.h" +#include +#include +#include +#include + #include "gtest/gtest.h" // Memory leaks will automatically be detected by the test framework. @@ -12,9 +17,9 @@ TEST(AllocatorTest, BasicTest) { int32_t a; int64_t b; std::string c; - short d; + uint16_t d; Data() {} - Data(int32_t a_, int64_t b_, std::string c_, short d_) + Data(int32_t a_, int64_t b_, std::string c_, uint16_t d_) : a(a_), b(b_), c(c_), d(d_) {} }; @@ -146,7 +151,7 @@ TEST(AllocatorTest, DestructorCalled) { TEST(AllocatorTest, BitFields) { struct HasBitFields { - short s; + uint16_t s; char c; int flip : 1; int nybble : 4; diff --git a/validator/cpp/htmlparser/atomutil.cc b/validator/cpp/htmlparser/atomutil.cc index 368b32848ebd..a5e1771aa7de 100644 --- a/validator/cpp/htmlparser/atomutil.cc +++ b/validator/cpp/htmlparser/atomutil.cc @@ -1,5 +1,8 @@ #include "cpp/htmlparser/atomutil.h" +#include +#include + #include "cpp/htmlparser/hash.h" namespace htmlparser { @@ -20,7 +23,7 @@ Atom AtomUtil::ToAtom(const std::string& s) { table_index = (hash >> 16) & (kNamesHashTable.size() - 1); atom_value = kNamesHashTable[table_index]; atom_len = atom_value & 0xff; - if (atom_len == s.size() && ToString(atom_value).compare(s) == 0) { + if (atom_len == s.size() && ToString(atom_value) == s) { return CastToAtom(atom_value); } diff --git a/validator/cpp/htmlparser/bin/atomgen.cc b/validator/cpp/htmlparser/bin/atomgen.cc index 54a41ab09934..9546f4dc0efb 100644 --- a/validator/cpp/htmlparser/bin/atomgen.cc +++ b/validator/cpp/htmlparser/bin/atomgen.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -55,12 +56,12 @@ class TableBuilder { bool Insert(const std::string& s) { std::pair hashes = Hash(s); - if (table_[hashes.first] == "") { + if (table_[hashes.first].empty()) { table_[hashes.first] = s; return true; } - if (table_[hashes.second] == "") { + if (table_[hashes.second].empty()) { table_[hashes.second] = s; return true; } @@ -100,7 +101,7 @@ class TableBuilder { std::pair hashes = Hash(entry); uint32_t new_location = hashes.first + hashes.second - i; - if (table_[new_location] != "" && !Push(new_location, depth + 1)) { + if (!table_[new_location].empty() && !Push(new_location, depth + 1)) { return false; } @@ -190,8 +191,8 @@ int main(int argc, char** argv) { // Find hash that minimizes table size. std::unique_ptr table{nullptr}; for (int i = 0; i < 1; i++) { - if (table.get() != nullptr - && (1 << (table->hash_num_bits() - 1)) < all_names.size()) { + if (table != nullptr && + (1 << (table->hash_num_bits() - 1)) < all_names.size()) { break; } @@ -199,8 +200,7 @@ int main(int argc, char** argv) { uint32_t rand_state; uint32_t hash0 = rand_r(&rand_state) % 1000000000; for (uint32_t k = 0; k <= 16; k++) { - if (table.get() != nullptr && k >= table->hash_num_bits()) - break; + if (table != nullptr && k >= table->hash_num_bits()) break; std::unique_ptr base(new TableBuilder()); if (base->Init(hash0, k, all_names)) { table.reset(base.release()); @@ -209,7 +209,7 @@ int main(int argc, char** argv) { } } - if (table.get() == nullptr) { + if (table == nullptr) { std::cerr << "Failed to construct string table." << std::endl; std::cerr << all_names.size() << ": elements." << std::endl; return EXIT_FAILURE; @@ -224,11 +224,11 @@ int main(int argc, char** argv) { while (changed) { changed = false; for (std::size_t i = 0; i < layout.size(); i++) { - if (layout[i] == "") continue; + if (layout[i].empty()) continue; for (std::size_t j = 0; j < layout.size(); j++) { - if (i != j && layout[j] != "" - && layout[i].find(layout[j]) != std::string::npos) { + if (i != j && !layout[j].empty() && + layout[i].find(layout[j]) != std::string::npos) { changed = true; layout[j] = ""; } @@ -244,7 +244,7 @@ int main(int argc, char** argv) { int bestj = -1; int bestk = 0; for (std::size_t i = 0; i < layout.size(); i++) { - if (layout[i] == "") continue; + if (layout[i].empty()) continue; for (std::size_t j = 0; j < layout.size(); j++) { if (i == j) continue; diff --git a/validator/cpp/htmlparser/bin/casetablegen.cc b/validator/cpp/htmlparser/bin/casetablegen.cc index a66464dc2bda..ad93231e961d 100644 --- a/validator/cpp/htmlparser/bin/casetablegen.cc +++ b/validator/cpp/htmlparser/bin/casetablegen.cc @@ -1,7 +1,9 @@ #include #include #include +#include #include +#include #include "cpp/htmlparser/defer.h" #include "cpp/htmlparser/fileutil.h" diff --git a/validator/cpp/htmlparser/bin/entitytablegen.cc b/validator/cpp/htmlparser/bin/entitytablegen.cc index 95810636a429..050dfffdebee 100644 --- a/validator/cpp/htmlparser/bin/entitytablegen.cc +++ b/validator/cpp/htmlparser/bin/entitytablegen.cc @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/validator/cpp/htmlparser/bin/validatorgen.cc b/validator/cpp/htmlparser/bin/validatorgen.cc index 8300004437ce..934938d85c29 100644 --- a/validator/cpp/htmlparser/bin/validatorgen.cc +++ b/validator/cpp/htmlparser/bin/validatorgen.cc @@ -15,6 +15,7 @@ // #include +#include #include "glog/logging.h" #include "absl/flags/flag.h" diff --git a/validator/cpp/htmlparser/casetable_test.cc b/validator/cpp/htmlparser/casetable_test.cc index ed20806894b3..fdffbb65a704 100644 --- a/validator/cpp/htmlparser/casetable_test.cc +++ b/validator/cpp/htmlparser/casetable_test.cc @@ -1,5 +1,7 @@ #include "cpp/htmlparser/casetable.h" +#include + #include "gtest/gtest.h" #include "cpp/htmlparser/strings.h" diff --git a/validator/cpp/htmlparser/css/amp4ads-parse-css.cc b/validator/cpp/htmlparser/css/amp4ads-parse-css.cc index 0fec958bcd1d..5072494d3a42 100644 --- a/validator/cpp/htmlparser/css/amp4ads-parse-css.cc +++ b/validator/cpp/htmlparser/css/amp4ads-parse-css.cc @@ -1,13 +1,14 @@ #include "cpp/htmlparser/css/amp4ads-parse-css.h" -#include "absl/memory/memory.h" +#include +#include +#include + #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "cpp/htmlparser/css/parse-css.h" #include "re2/re2.h" -using absl::make_unique; -using std::string_view; using amp::validator::ValidationError; using std::unique_ptr; @@ -18,7 +19,7 @@ namespace { unique_ptr CreateParseErrorTokenAt( const Token& position_token, ValidationError::Code code, const std::vector& params) { - auto token = make_unique(code, params); + auto token = std::make_unique(code, params); position_token.CopyStartPositionTo(token.get()); return token; } diff --git a/validator/cpp/htmlparser/css/parse-css-urls.cc b/validator/cpp/htmlparser/css/parse-css-urls.cc index 9d2ae3d3033d..9e029a376a26 100644 --- a/validator/cpp/htmlparser/css/parse-css-urls.cc +++ b/validator/cpp/htmlparser/css/parse-css-urls.cc @@ -1,6 +1,8 @@ #include "cpp/htmlparser/css/parse-css-urls.h" #include +#include +#include #include "absl/algorithm/container.h" #include "absl/memory/memory.h" diff --git a/validator/cpp/htmlparser/css/parse-css-urls_test.cc b/validator/cpp/htmlparser/css/parse-css-urls_test.cc index 844412770512..3198d3c0d044 100644 --- a/validator/cpp/htmlparser/css/parse-css-urls_test.cc +++ b/validator/cpp/htmlparser/css/parse-css-urls_test.cc @@ -1,13 +1,14 @@ #include "cpp/htmlparser/css/parse-css-urls.h" #include +#include +#include #include #include "gtest/gtest.h" #include "absl/strings/str_cat.h" #include "cpp/htmlparser/strings.h" -using testing::Eq; using testing::Pointwise; namespace htmlparser::css::url { diff --git a/validator/cpp/htmlparser/css/parse-css.cc b/validator/cpp/htmlparser/css/parse-css.cc index 4df12b588e33..86c96098da2d 100644 --- a/validator/cpp/htmlparser/css/parse-css.cc +++ b/validator/cpp/htmlparser/css/parse-css.cc @@ -2,6 +2,11 @@ #include #include +#include +#include +#include +#include +#include #include "absl/algorithm/container.h" #include "absl/memory/memory.h" diff --git a/validator/cpp/htmlparser/css/parse-css_test.cc b/validator/cpp/htmlparser/css/parse-css_test.cc index 850a67f4ed76..3226c0f9d936 100644 --- a/validator/cpp/htmlparser/css/parse-css_test.cc +++ b/validator/cpp/htmlparser/css/parse-css_test.cc @@ -1,6 +1,10 @@ #include "cpp/htmlparser/css/parse-css.h" +#include #include +#include +#include +#include #include #include diff --git a/validator/cpp/htmlparser/data/CaseFolding.txt b/validator/cpp/htmlparser/data/CaseFolding.txt index 65aa0fcd6b32..69c5c64b4c6a 100644 --- a/validator/cpp/htmlparser/data/CaseFolding.txt +++ b/validator/cpp/htmlparser/data/CaseFolding.txt @@ -1,6 +1,6 @@ -# CaseFolding-15.0.0.txt -# Date: 2022-02-02, 23:35:35 GMT -# © 2022 Unicode®, Inc. +# CaseFolding-15.1.0.txt +# Date: 2023-05-12, 21:53:10 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -929,6 +929,7 @@ 1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD3; S; 0390; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI 1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY @@ -937,6 +938,7 @@ 1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA 1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE3; S; 03B0; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI 1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI @@ -1328,6 +1330,7 @@ FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T +FB05; S; FB06; # LATIN SMALL LIGATURE LONG S T FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH diff --git a/validator/cpp/htmlparser/doctype.cc b/validator/cpp/htmlparser/doctype.cc index 13cb08531323..1cb49ccb3054 100644 --- a/validator/cpp/htmlparser/doctype.cc +++ b/validator/cpp/htmlparser/doctype.cc @@ -1,6 +1,8 @@ #include "cpp/htmlparser/doctype.h" #include +#include +#include #include "cpp/htmlparser/strings.h" diff --git a/validator/cpp/htmlparser/document.cc b/validator/cpp/htmlparser/document.cc index 4f54b2713f38..337dc73ece53 100644 --- a/validator/cpp/htmlparser/document.cc +++ b/validator/cpp/htmlparser/document.cc @@ -1,6 +1,10 @@ -#include "absl/flags/flag.h" #include "cpp/htmlparser/document.h" +#include +#include + +#include "absl/flags/flag.h" + ABSL_FLAG(std::size_t, htmlparser_nodes_allocator_block_size, 256 << 10 /* 256k */, "Allocator block size for html nodes."); diff --git a/validator/cpp/htmlparser/error.cc b/validator/cpp/htmlparser/error.cc index 36f8f31355c8..1b8491d2d84a 100644 --- a/validator/cpp/htmlparser/error.cc +++ b/validator/cpp/htmlparser/error.cc @@ -1,5 +1,8 @@ #include "cpp/htmlparser/error.h" +#include +#include + namespace htmlparser { std::optional error(const std::string& error_msg) { diff --git a/validator/cpp/htmlparser/fileutil.cc b/validator/cpp/htmlparser/fileutil.cc index 58b5834bf9b1..ff21f1b6cec2 100644 --- a/validator/cpp/htmlparser/fileutil.cc +++ b/validator/cpp/htmlparser/fileutil.cc @@ -3,10 +3,15 @@ #include #include +#include #include #include #include #include +#include +#include +#include +#include #include "cpp/htmlparser/defer.h" #include "cpp/htmlparser/strings.h" diff --git a/validator/cpp/htmlparser/fileutil_test.cc b/validator/cpp/htmlparser/fileutil_test.cc index 5d4b4f807839..2ac532717d0d 100644 --- a/validator/cpp/htmlparser/fileutil_test.cc +++ b/validator/cpp/htmlparser/fileutil_test.cc @@ -1,5 +1,8 @@ #include "cpp/htmlparser/fileutil.h" +#include +#include +#include #include #include "gtest/gtest.h" diff --git a/validator/cpp/htmlparser/foreign.cc b/validator/cpp/htmlparser/foreign.cc index bd425bdd9003..a1d8e9f63ac8 100644 --- a/validator/cpp/htmlparser/foreign.cc +++ b/validator/cpp/htmlparser/foreign.cc @@ -1,6 +1,10 @@ #include "cpp/htmlparser/foreign.h" #include +#include +#include +#include +#include #include "cpp/htmlparser/comparators.h" #include "cpp/htmlparser/strings.h" diff --git a/validator/cpp/htmlparser/grammar/tablebuilder.cc b/validator/cpp/htmlparser/grammar/tablebuilder.cc index 69368df66893..019b45b6b716 100644 --- a/validator/cpp/htmlparser/grammar/tablebuilder.cc +++ b/validator/cpp/htmlparser/grammar/tablebuilder.cc @@ -6,6 +6,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include #include "absl/strings/match.h" #include "cpp/htmlparser/defer.h" diff --git a/validator/cpp/htmlparser/htmldataset_test.cc b/validator/cpp/htmlparser/htmldataset_test.cc index 443238b623e1..80fa4e69ca90 100644 --- a/validator/cpp/htmlparser/htmldataset_test.cc +++ b/validator/cpp/htmlparser/htmldataset_test.cc @@ -1,15 +1,17 @@ // Runs webkit html5 test datasets and validates parser. +#include #include #include #include +#include +#include #include #include #include #include #include "gtest/gtest.h" -#include "absl/flags/flag.h" #include "cpp/htmlparser/atomutil.h" #include "cpp/htmlparser/defer.h" #include "cpp/htmlparser/fileutil.h" @@ -22,7 +24,8 @@ ABSL_FLAG(std::string, test_srcdir, "", "Testdata directory"); -using namespace htmlparser; + +namespace htmlparser { // Represents a single test case. struct TestCaseData { @@ -113,7 +116,7 @@ TestCaseData ReadParseTest(std::ifstream* fd) { line = ReadUntil(fd, "\n"); std::string trimmed(line); Strings::Trim(&trimmed, "| \n"); - if (trimmed.size() > 0) { + if (!trimmed.empty()) { if (line.front() == '|' && trimmed.front() == '"') { in_quote = true; } @@ -191,7 +194,7 @@ std::optional DumpLevel(Node* node, std::stringbuf* buffer, std::string v = attr.value; buffer->sputc('\n'); DumpIndent(buffer, level); - if (ns != "") { + if (!ns.empty()) { buffer->sputn(ns.c_str(), ns.size()); buffer->sputc(' '); buffer->sputn(k.c_str(), k.size()); @@ -305,9 +308,7 @@ TEST(HTMLDatasetTest, WebkitData) { }; int num_test_cases = 0; for (auto pattern : htmlparser::testing::kTestDataDirs) { - std::string full_path = - absl::GetFlag(FLAGS_test_srcdir) + - pattern.data(); + std::string full_path = ::testing::SrcDir() + pattern.data(); std::vector filenames; EXPECT_TRUE(FileUtil::Glob(full_path, &filenames)) << "Error opening files: " << pattern; @@ -388,4 +389,6 @@ TEST(HTMLDatasetTest, WebkitData) { // Hardcoded, whenever dataset changes. Ensures no new tests are added, or // old tests mistakenly removed. EXPECT_EQ(1484, num_test_cases); -}; +} + +} // namespace htmlparser diff --git a/validator/cpp/htmlparser/json/json.cc b/validator/cpp/htmlparser/json/json.cc index 6dec3e7bc28a..b49cb4ae3400 100644 --- a/validator/cpp/htmlparser/json/json.cc +++ b/validator/cpp/htmlparser/json/json.cc @@ -1,6 +1,10 @@ #include "cpp/htmlparser/json/json.h" +#include #include +#include +#include +#include #include "absl/strings/numbers.h" #include "cpp/htmlparser/logging.h" diff --git a/validator/cpp/htmlparser/json/json_test.cc b/validator/cpp/htmlparser/json/json_test.cc index b9e688e19654..082dacdf2796 100644 --- a/validator/cpp/htmlparser/json/json_test.cc +++ b/validator/cpp/htmlparser/json/json_test.cc @@ -1,5 +1,7 @@ #include "cpp/htmlparser/json/json.h" +#include + #include #include "gtest/gtest.h" #include "cpp/htmlparser/json/types.h" diff --git a/validator/cpp/htmlparser/json/types.cc b/validator/cpp/htmlparser/json/types.cc index 9500a17df7e6..7ca54567994b 100644 --- a/validator/cpp/htmlparser/json/types.cc +++ b/validator/cpp/htmlparser/json/types.cc @@ -1,6 +1,10 @@ #include "cpp/htmlparser/json/types.h" #include +#include +#include +#include +#include namespace htmlparser::json { diff --git a/validator/cpp/htmlparser/json/types_test.cc b/validator/cpp/htmlparser/json/types_test.cc index 9a9862943bc8..0e265612fed4 100644 --- a/validator/cpp/htmlparser/json/types_test.cc +++ b/validator/cpp/htmlparser/json/types_test.cc @@ -1,7 +1,11 @@ #include "cpp/htmlparser/json/types.h" #include +#include +#include +#include #include +#include #include "gtest/gtest.h" diff --git a/validator/cpp/htmlparser/node.cc b/validator/cpp/htmlparser/node.cc index d7441a7a0c5b..8fb8b38b49f5 100644 --- a/validator/cpp/htmlparser/node.cc +++ b/validator/cpp/htmlparser/node.cc @@ -2,7 +2,11 @@ #include #include +#include #include +#include +#include +#include #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" @@ -45,7 +49,7 @@ void Node::DropDuplicateAttributes() { } bool Node::IsSpecialElement() const { - if (name_space_ == "" || name_space_ == "html") { + if (name_space_.empty() || name_space_ == "html") { return std::find(kSpecialElements.begin(), kSpecialElements.end(), atom_) != kSpecialElements.end(); @@ -156,7 +160,7 @@ void Node::ReparentChildrenTo(Node* destination) { } Node* NodeStack::Pop() { - if (stack_.size() > 0) { + if (!stack_.empty()) { Node* node = stack_.back(); stack_.pop_back(); return node; @@ -175,7 +179,7 @@ void NodeStack::Pop(int count) { } Node* NodeStack::Top() { - if (stack_.size() > 0) return stack_.at(stack_.size() - 1); + if (!stack_.empty()) return stack_.at(stack_.size() - 1); return nullptr; } @@ -427,7 +431,7 @@ void Node::UpdateChildNodesPositions(Node* relative_node) { } } -std::string Node::DebugString() { +std::string Node::DebugString() const { std::ostringstream ost; switch (node_type_) { case NodeType::ELEMENT_NODE: diff --git a/validator/cpp/htmlparser/node.h b/validator/cpp/htmlparser/node.h index 31b8c9482ce4..4693936be113 100644 --- a/validator/cpp/htmlparser/node.h +++ b/validator/cpp/htmlparser/node.h @@ -129,7 +129,7 @@ class Node { // Debug/Logging utils. // Outputs node debug info. - std::string DebugString(); + std::string DebugString() const; private: void SetManufactured(bool is_manufactured) { diff --git a/validator/cpp/htmlparser/node_test.cc b/validator/cpp/htmlparser/node_test.cc index 3bca02f512dd..1ff7a9de8333 100644 --- a/validator/cpp/htmlparser/node_test.cc +++ b/validator/cpp/htmlparser/node_test.cc @@ -1,5 +1,8 @@ #include "cpp/htmlparser/node.h" +#include +#include + #include "gtest/gtest.h" #include "cpp/htmlparser/atom.h" #include "cpp/htmlparser/document.h" @@ -16,7 +19,7 @@ using htmlparser::RenderError; using htmlparser::Renderer; // For operator""s. -using namespace std::string_literals; +using namespace std::string_literals; // NOLINT(build/namespaces) TEST(NodeTest, BasicStackFunctionality) { NodeStack stack; diff --git a/validator/cpp/htmlparser/parser.cc b/validator/cpp/htmlparser/parser.cc index 1ddbd303054d..02fc4e8e4a00 100644 --- a/validator/cpp/htmlparser/parser.cc +++ b/validator/cpp/htmlparser/parser.cc @@ -1,9 +1,16 @@ #include -#include -#include +#include #ifdef DUMP_NODES #include // For DumpDocument #endif // DUMP_NODES +#include +#include +#include +#include +#include +#include +#include +#include #include "absl/flags/flag.h" #include "absl/status/status.h" @@ -462,7 +469,7 @@ void Parser::AddFormattingElement() { Node* node = active_formatting_elements_stack_.at(i); if (node->node_type_ == NodeType::SCOPE_MARKER_NODE) break; if (node->node_type_ != NodeType::ELEMENT_NODE) continue; - if (node->name_space_ != "") continue; + if (!node->name_space_.empty()) continue; if (node->atom_ != tag_atom) continue; if (node->attributes_.size() != token_.attributes.size()) continue; @@ -1703,7 +1710,7 @@ bool Parser::InBodyIM() { // NOLINT break; } case TokenType::ERROR_TOKEN: { - if (template_stack_.size() > 0) { + if (!template_stack_.empty()) { insertion_mode_ = std::bind(&Parser::InTemplateIM, this); return false; } else { diff --git a/validator/cpp/htmlparser/parser_test.cc b/validator/cpp/htmlparser/parser_test.cc index 4a5a74de57ed..360bbf335a3c 100644 --- a/validator/cpp/htmlparser/parser_test.cc +++ b/validator/cpp/htmlparser/parser_test.cc @@ -1,5 +1,8 @@ #include "cpp/htmlparser/parser.h" +#include +#include + #include "gtest/gtest.h" #include "absl/flags/declare.h" #include "absl/flags/flag.h" diff --git a/validator/cpp/htmlparser/renderer_test.cc b/validator/cpp/htmlparser/renderer_test.cc index 1c37bc55a058..a502ef11b38b 100644 --- a/validator/cpp/htmlparser/renderer_test.cc +++ b/validator/cpp/htmlparser/renderer_test.cc @@ -1,11 +1,14 @@ #include "cpp/htmlparser/renderer.h" #include +#include +#include +#include #include "gtest/gtest.h" #include "cpp/htmlparser/parser.h" -using namespace std::string_literals; +using namespace std::string_literals; // NOLINT(build/namespaces) namespace htmlparser { @@ -167,4 +170,4 @@ TEST(RendererTest, NullCharsTest) { htmlparser::CheckParseRenderOutput( html_sources.at(i), rendered_outputs.at(i)); } -}; +} diff --git a/validator/cpp/htmlparser/strings.cc b/validator/cpp/htmlparser/strings.cc index 999ab6622912..4e1027cfb253 100644 --- a/validator/cpp/htmlparser/strings.cc +++ b/validator/cpp/htmlparser/strings.cc @@ -3,8 +3,13 @@ #include #include #include +#include // For std::hex +#include #include +#include #include +#include + #include "cpp/htmlparser/casetable.h" #include "cpp/htmlparser/entity.h" #include "cpp/htmlparser/whitespacetable.h" @@ -15,7 +20,7 @@ namespace htmlparser { // assumed Windows-1252 encoding. // https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference constexpr std::array kReplacementTable{ - L'\u20AC', // First entry is what 0x80 should be replaced with. + L'\u20AC', // First entry is what 0x80 should be replaced with. L'\u0081', L'\u201A', L'\u0192', @@ -46,7 +51,7 @@ constexpr std::array kReplacementTable{ L'\u0153', L'\u009D', L'\u017E', - L'\u0178', // Last entry is 0x9F. + L'\u0178', // Last entry is 0x9F. // 0x00->L'\uFFFD' is handled programmatically. // 0x0D->L'\u000D' is a no-op. }; @@ -645,7 +650,7 @@ bool Strings::EqualFold(std::string_view l, std::string_view r) { if ((l_char | 0x20) != (r_char | 0x20)) { return false; } - } else if (l_char != r_char) { // Compare other ascii character as-is. + } else if (l_char != r_char) { // Compare other ascii character as-is. return false; } @@ -846,13 +851,13 @@ std::pair UnescapeEntity(std::string* b, int dst, int src, i++; // Lower-cased characters are more common in entities, so we check for // them first. - if (Strings::IsCharAlphabet(c) || Strings::IsDigit(c)) { + if (Strings::IsCharAlphabet(c) || Strings::IsDigit(c)) { continue; - } - if (c != ';') { - i--; - } - break; + } + if (c != ';') { + i--; + } + break; } std::string entityName = s.substr(1, i - 1); @@ -900,7 +905,6 @@ std::pair UnescapeEntity(std::string* b, int dst, int src, void CaseTransformInternal(bool to_upper, std::string* s) { for (std::size_t i = 0; i < s->size(); ++i) { - uint8_t code_point = s->at(i) & 0xff; // ASCII characters first. diff --git a/validator/cpp/htmlparser/strings_test.cc b/validator/cpp/htmlparser/strings_test.cc index 2ae273c41c4b..763a6ff22819 100644 --- a/validator/cpp/htmlparser/strings_test.cc +++ b/validator/cpp/htmlparser/strings_test.cc @@ -1,10 +1,13 @@ #include "cpp/htmlparser/strings.h" +#include #include +#include +#include #include "gtest/gtest.h" -using namespace std::string_literals; +using namespace std::string_literals; // NOLINT(build/namespaces) TEST(StringsTest, SplitStringAtTest) { auto columns = htmlparser::Strings::SplitStringAt("a|b|c", '|'); @@ -410,7 +413,7 @@ TEST(StringsTest, ReplaceTest) { htmlparser::Strings::ReplaceAny(&whitespace_and_null2, htmlparser::Strings::kNullChar, htmlparser::Strings::kNullReplacementChar); - EXPECT_EQ(whitespace_and_null2, "amaltas is ��good �boy"); + EXPECT_EQ(whitespace_and_null2, "amaltas is ��good �boy"); // NOLINT std::string many_whitespaces = " a m a lta s "; htmlparser::Strings::RemoveExtraSpaceChars(&many_whitespaces); diff --git a/validator/cpp/htmlparser/token.cc b/validator/cpp/htmlparser/token.cc index 5ae0fdd66ed4..fa60af347fc2 100644 --- a/validator/cpp/htmlparser/token.cc +++ b/validator/cpp/htmlparser/token.cc @@ -1,5 +1,7 @@ #include "cpp/htmlparser/token.h" +#include + #include "cpp/htmlparser/strings.h" namespace htmlparser { diff --git a/validator/cpp/htmlparser/tokenizer.cc b/validator/cpp/htmlparser/tokenizer.cc index 49483ac3f6b2..021322556357 100644 --- a/validator/cpp/htmlparser/tokenizer.cc +++ b/validator/cpp/htmlparser/tokenizer.cc @@ -1,5 +1,13 @@ #include "cpp/htmlparser/tokenizer.h" +#include +#include +#include +#include +#include +#include +#include + #include "absl/flags/flag.h" #include "cpp/htmlparser/atom.h" #include "cpp/htmlparser/atomutil.h" @@ -681,6 +689,7 @@ void Tokenizer::ReadTagAttributeKey(bool template_mode) { // templates. See: https://amp.dev/documentation/components/amp-mustache/ bool mustache_inside_section_block = false; std::string mustache_section_name = ""; + bool is_at_attribute_key_start = true; while (!eof_) { char c = ReadByte(); @@ -744,12 +753,21 @@ void Tokenizer::ReadTagAttributeKey(bool template_mode) { return; } case '=': + if (is_at_attribute_key_start) { + // An unexpected equals sign at the start of the attribute name should + // be treated as part of the name. See §13.2.5.32 "Before attribute + // name state" in the HTML Living Standard from 2024-02-22 at + // https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state. + break; + } + [[fallthrough]]; case '>': { UnreadByte(); std::get<0>(pending_attribute_).end = raw_.end; return; } } + is_at_attribute_key_start = false; } } @@ -838,7 +856,7 @@ TokenType Tokenizer::Next(bool template_mode) { return token_type_; } - if (raw_tag_ != "") { + if (!raw_tag_.empty()) { if (raw_tag_ == "plaintext") { // Read everything up to EOF. while (!eof_) { diff --git a/validator/cpp/htmlparser/tokenizer_test.cc b/validator/cpp/htmlparser/tokenizer_test.cc index 8c5c9a119fea..b35b92e4c0e1 100644 --- a/validator/cpp/htmlparser/tokenizer_test.cc +++ b/validator/cpp/htmlparser/tokenizer_test.cc @@ -1,5 +1,9 @@ #include "cpp/htmlparser/tokenizer.h" +#include +#include +#include + #include "gtest/gtest.h" #include "cpp/htmlparser/token.h" @@ -229,6 +233,45 @@ TEST(TokenizerTest, BasicTokenizationOfADocument) { // their respective test cases. } +// Tests that an unexpected equals sign ("="") at the start of an attribute name +// is treated as part of the attribute name. See §13.2.5.32 "Before attribute +// name state" in the HTML Living Standard from 2024-02-22 at +// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state. +TEST(TokenizerTest, UnexpectedEqualsSignAtStartOfAttributeName) { + // Attribute name is prefixed with "=" and also has a value. + htmlparser::Tokenizer t1("
"); + t1.Next(); + htmlparser::Token token1 = t1.token(); + EXPECT_EQ(token1.token_type, htmlparser::TokenType::SELF_CLOSING_TAG_TOKEN); + EXPECT_EQ(token1.attributes.size(), 1); + EXPECT_EQ(token1.attributes[0].key, "=a"); + EXPECT_EQ(token1.attributes[0].value, "b"); + EXPECT_EQ(t1.Next(), htmlparser::TokenType::ERROR_TOKEN); + EXPECT_TRUE(t1.IsEOF()); + + // Attribute name is prefixed with "=" without value. + htmlparser::Tokenizer t2("
"); + t2.Next(); + htmlparser::Token token2 = t2.token(); + EXPECT_EQ(token2.token_type, htmlparser::TokenType::SELF_CLOSING_TAG_TOKEN); + EXPECT_EQ(token2.attributes.size(), 1); + EXPECT_EQ(token2.attributes[0].key, "=a"); + EXPECT_EQ(token2.attributes[0].value, ""); + EXPECT_EQ(t2.Next(), htmlparser::TokenType::ERROR_TOKEN); + EXPECT_TRUE(t2.IsEOF()); + + // Attribute name is "=" without value. + htmlparser::Tokenizer t3("
"); + t3.Next(); + htmlparser::Token token3 = t3.token(); + EXPECT_EQ(token3.token_type, htmlparser::TokenType::SELF_CLOSING_TAG_TOKEN); + EXPECT_EQ(token3.attributes.size(), 1); + EXPECT_EQ(token3.attributes[0].key, "="); + EXPECT_EQ(token3.attributes[0].value, ""); + EXPECT_EQ(t3.Next(), htmlparser::TokenType::ERROR_TOKEN); + EXPECT_TRUE(t3.IsEOF()); +} + TEST(TokenizerTest, TestMustangTemplateCase) { std::string_view template_html = R"HTML( diff --git a/validator/cpp/htmlparser/url.cc b/validator/cpp/htmlparser/url.cc index 98c2ad8d01d8..98cdad639169 100644 --- a/validator/cpp/htmlparser/url.cc +++ b/validator/cpp/htmlparser/url.cc @@ -1,5 +1,8 @@ #include "cpp/htmlparser/url.h" +#include +#include + #include "cpp/htmlparser/strings.h" namespace htmlparser { diff --git a/validator/cpp/htmlparser/url_test.cc b/validator/cpp/htmlparser/url_test.cc index ba6e7e0df335..12860f64351b 100644 --- a/validator/cpp/htmlparser/url_test.cc +++ b/validator/cpp/htmlparser/url_test.cc @@ -1,5 +1,7 @@ #include "cpp/htmlparser/url.h" +#include + #include "gtest/gtest.h" namespace htmlparser { diff --git a/validator/cpp/htmlparser/validators/ipaddress_test.cc b/validator/cpp/htmlparser/validators/ipaddress_test.cc index fc3e9584ab02..8e0a0041f344 100644 --- a/validator/cpp/htmlparser/validators/ipaddress_test.cc +++ b/validator/cpp/htmlparser/validators/ipaddress_test.cc @@ -1,4 +1,8 @@ #include "cpp/htmlparser/validators/ipaddress.h" + +#include +#include + #include "gtest/gtest.h" namespace htmlparser::ipaddress { diff --git a/validator/cpp/htmlparser/validators/json_test.cc b/validator/cpp/htmlparser/validators/json_test.cc index 5b9f37adc04d..0c01b9c706cc 100644 --- a/validator/cpp/htmlparser/validators/json_test.cc +++ b/validator/cpp/htmlparser/validators/json_test.cc @@ -16,6 +16,8 @@ #include "cpp/htmlparser/validators/json.h" +#include +#include #include #include diff --git a/validator/cpp/htmlparser/validators/supported_media_query_test.cc b/validator/cpp/htmlparser/validators/supported_media_query_test.cc index 26d0b4fb044f..25ed64ea8712 100644 --- a/validator/cpp/htmlparser/validators/supported_media_query_test.cc +++ b/validator/cpp/htmlparser/validators/supported_media_query_test.cc @@ -1,5 +1,8 @@ #include "cpp/htmlparser/validators/supported_media_query.h" +#include +#include + #include "gtest/gtest.h" namespace htmlparser::css {