diff --git a/.circleci/config.yml b/.circleci/config.yml index 3754e542..ea7032f9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -139,6 +139,7 @@ workflows: - bash - c - cairo + - circom - clojure - cpp - c-sharp @@ -158,11 +159,14 @@ workflows: - kotlin - lua - make + - move-on-aptos + - move-on-sui - ocaml - php - promql - proto - python + - ql - r - ruby - rust diff --git a/.gitmodules b/.gitmodules index 5eb2eec1..35283a77 100644 --- a/.gitmodules +++ b/.gitmodules @@ -48,7 +48,7 @@ url = https://github.com/returntocorp/ocaml-tree-sitter-core.git [submodule "lang/semgrep-grammars/src/tree-sitter-vue"] path = lang/semgrep-grammars/src/tree-sitter-vue - url = https://github.com/ikatyang/tree-sitter-vue.git + url = https://github.com/semgrep/tree-sitter-vue.git [submodule "lang/semgrep-grammars/src/tree-sitter-html"] path = lang/semgrep-grammars/src/tree-sitter-html url = https://github.com/tree-sitter/tree-sitter-html.git diff --git a/core b/core index c085bc52..e063ec57 160000 --- a/core +++ b/core @@ -1 +1 @@ -Subproject commit c085bc5280996b781a80d5706cc5590081e1629b +Subproject commit e063ec5749bb217a594773c13308145f453d5d0c diff --git a/lang/Makefile b/lang/Makefile index d0f2f30c..a1a0fcd5 100644 --- a/lang/Makefile +++ b/lang/Makefile @@ -37,11 +37,14 @@ SUPPORTED_TS_LANGUAGES = \ kotlin \ lua \ make \ + move-on-aptos \ + move-on-sui \ ocaml \ php \ promql \ proto \ python \ + ql \ r \ ruby \ rust \ @@ -86,8 +89,8 @@ SUPPORTED_DIALECTS = \ kotlin \ lua \ make \ - move-on-sui \ move-on-aptos \ + move-on-sui \ ocaml \ php \ promql \ diff --git a/lang/language-variants-0.20.6 b/lang/language-variants-0.20.6 new file mode 100644 index 00000000..391680c5 --- /dev/null +++ b/lang/language-variants-0.20.6 @@ -0,0 +1,21 @@ +apex +bash +c-sharp +dart +elixir +fsharp +hack +hcl +java +javascript +lua +php +python +r +ruby +rust +sml +solidity +tsx +typescript +vue diff --git a/lang/language-variants-0.22.6 b/lang/language-variants-0.22.6 new file mode 100644 index 00000000..2944501e --- /dev/null +++ b/lang/language-variants-0.22.6 @@ -0,0 +1,21 @@ +c +cairo +circom +clojure +cpp +dockerfile +go +haskell +html +jsonnet +julia +kotlin +make +move-on-aptos +move-on-sui +ocaml +promql +proto +ql +sqlite +swift diff --git a/lang/languages-0.20.6 b/lang/languages-0.20.6 new file mode 100644 index 00000000..e61996bc --- /dev/null +++ b/lang/languages-0.20.6 @@ -0,0 +1,20 @@ +bash +c-sharp +dart +elixir +fsharp +hack +hcl +java +javascript +lua +php +python +r +ruby +rust +sfapex +sml +solidity +typescript +vue diff --git a/lang/languages-0.20.6.readme b/lang/languages-0.20.6.readme new file mode 100644 index 00000000..e678751e --- /dev/null +++ b/lang/languages-0.20.6.readme @@ -0,0 +1,14 @@ +The files languages-0.20.6, languages-0.22.6, language-variants-0.20.6, +and language-variants-0.22.6 contain lists of languages that are useful +when regenerating the code for all the languages when necessary. + +We're in a situation where some languages are stuck with tree-sitter 0.20.6. +The language names in languages-* are suitable for the `test-lang` script. +The dialect names in language-variants-* are suitable for the `release` +script. + +Sample Bash commands iterating over languages: + +$ for x in $(cat languages-0.22.6,); do ./test-lang $x || break; done + +$ for x in $(cat language-variants-0.22.6,); do ./release $x || break; done diff --git a/lang/languages-0.22.6 b/lang/languages-0.22.6 new file mode 100644 index 00000000..2944501e --- /dev/null +++ b/lang/languages-0.22.6 @@ -0,0 +1,21 @@ +c +cairo +circom +clojure +cpp +dockerfile +go +haskell +html +jsonnet +julia +kotlin +make +move-on-aptos +move-on-sui +ocaml +promql +proto +ql +sqlite +swift diff --git a/lang/semgrep-grammars/src/.gitignore b/lang/semgrep-grammars/src/.gitignore index 93c89761..93c60fd9 100644 --- a/lang/semgrep-grammars/src/.gitignore +++ b/lang/semgrep-grammars/src/.gitignore @@ -2,4 +2,4 @@ /semgrep-*/**/index.js /semgrep-*/**/src /semgrep-*/**/inherited -/semgrep-*/test.log +/semgrep-*/**/test.log diff --git a/lang/semgrep-grammars/src/semgrep-go/grammar.js b/lang/semgrep-grammars/src/semgrep-go/grammar.js index 37682c8b..1c1fc1e4 100644 --- a/lang/semgrep-grammars/src/semgrep-go/grammar.js +++ b/lang/semgrep-grammars/src/semgrep-go/grammar.js @@ -17,48 +17,15 @@ module.exports = grammar(base_grammar, { if they're not already part of the base grammar. */ rules: { - semgrep_ellipsis: $ => "...", - - semgrep_ellipsis_metavar : $ => /\$\.\.\.[a-zA-Z_][a-zA-Z_0-9]*/, - semgrep_deep_ellipsis: $ => seq("<...", $._expression, "...>"), - - // The parser tries to wrap ellipsis with expression statements since we - // list ellipsis as expressions and usually we use them in a statement - // position (i.e `if(true) {...}`) - _statement: ($, previous) => choice( - previous, - prec(1,$.semgrep_ellipsis_metavar), - prec(1,$.semgrep_deep_ellipsis), - prec(1,$.semgrep_ellipsis) - ), - - _expression: ($, previous) => choice( - previous, - $.semgrep_ellipsis_metavar, - $.semgrep_deep_ellipsis, - $.semgrep_ellipsis, - $.typed_metavar - ), - - typed_metavar: $ => seq( - "(", $.identifier, ":", $._type, ")" - ), - - identifier: ($, previous) => token(choice( - previous, - // inline this here so we can stay inside of the `token`, because - // `identifier` is the word token - /\$[A-Z_][A-Z_0-9]*/ - )), - - parameter_declaration: ($, previous) => choice( - $.semgrep_ellipsis, - $.semgrep_ellipsis_metavar, - previous - ), - - // slightly more precedence so we bump this up over using `...` - // for a semgrep ellipsis - implicit_length_array_type: ($, previous) => prec(1, previous) + /* + semgrep_ellipsis: $ => '...', + + _expression: ($, previous) => { + return choice( + $.semgrep_ellipsis, + ...previous.members + ); + } + */ } }); diff --git a/lang/semgrep-grammars/src/semgrep-go/test/corpus/semgrep.txt b/lang/semgrep-grammars/src/semgrep-go/test/corpus/semgrep.txt index 4fc16596..e69de29b 100644 --- a/lang/semgrep-grammars/src/semgrep-go/test/corpus/semgrep.txt +++ b/lang/semgrep-grammars/src/semgrep-go/test/corpus/semgrep.txt @@ -1,234 +0,0 @@ -================================================================================ -Ellipsis -================================================================================ - -... - --------------------------------------------------------------------------------- - -(source_file - (semgrep_ellipsis)) - -================================================================================ -Top level statements -================================================================================ - -x := 1 -... -y := 2 - --------------------------------------------------------------------------------- - -(source_file - (short_var_declaration - (expression_list - (identifier)) - (expression_list - (int_literal))) - (semgrep_ellipsis) - (short_var_declaration - (expression_list - (identifier)) - (expression_list - (int_literal)))) - -================================================================================ -Function with ellipses -================================================================================ - -func $FUNC(x bool, ...) { - ... -} - --------------------------------------------------------------------------------- - -(source_file - (function_declaration - (identifier) - (parameter_list - (parameter_declaration - (identifier) - (type_identifier)) - (parameter_declaration - (semgrep_ellipsis))) - (block - (semgrep_ellipsis)))) - -================================================================================ -Function with ellipses -================================================================================ - -func $FUNC(x bool, $...ARGS) { - $...BODY -} - --------------------------------------------------------------------------------- - -(source_file - (function_declaration - (identifier) - (parameter_list - (parameter_declaration - (identifier) - (type_identifier)) - (parameter_declaration - (semgrep_ellipsis_metavar))) - (block - (semgrep_ellipsis_metavar)))) - -================================================================================ -Type declaration with metavariables -================================================================================ - -type $TY = $TY2 - --------------------------------------------------------------------------------- - -(source_file - (type_declaration - (type_alias - (type_identifier) - (type_identifier)))) - -================================================================================ -Struct metavariable -================================================================================ - -type $STRUCT struct { - $FIELD int - $FIELD2 $TY -} - --------------------------------------------------------------------------------- - -(source_file - (type_declaration - (type_spec - (type_identifier) - (struct_type - (field_declaration_list - (field_declaration - (field_identifier) - (type_identifier)) - (field_declaration - (field_identifier) - (type_identifier))))))) - -================================================================================ -Ellipsis args -================================================================================ - -foo(..., 5) - --------------------------------------------------------------------------------- - -(source_file - (expression_statement - (call_expression - (identifier) - (argument_list - (semgrep_ellipsis) - (int_literal))))) - -================================================================================ -Ellipsis in if -================================================================================ - -if (...) { - ... -} else { - ... -} - --------------------------------------------------------------------------------- - -(source_file - (if_statement - (parenthesized_expression - (semgrep_ellipsis)) - (block - (semgrep_ellipsis)) - (block - (semgrep_ellipsis)))) - -================================================================================ -Metavariable in import -================================================================================ - -import ( - $NAME "crypto/rand" -) - --------------------------------------------------------------------------------- - -(source_file - (import_declaration - (import_spec_list - (import_spec - (package_identifier) - (interpreted_string_literal))))) - -================================================================================ -Deep expression -================================================================================ - -<... 1 ...> - --------------------------------------------------------------------------------- - -(source_file - (semgrep_deep_ellipsis - (int_literal))) - -================================================================================ -Deep expression again -================================================================================ - -x := <... foo() ...> - --------------------------------------------------------------------------------- - -(source_file - (short_var_declaration - (expression_list - (identifier)) - (expression_list - (semgrep_deep_ellipsis - (call_expression - (identifier) - (argument_list)))))) - -================================================================================ -Typed metavariables -================================================================================ - -x := ($TY : bool) - --------------------------------------------------------------------------------- - -(source_file - (short_var_declaration - (expression_list - (identifier)) - (expression_list - (typed_metavar - (identifier) - (type_identifier))))) - -================================================================================ -Implicit length array type -================================================================================ - -x := [...] bool {} - --------------------------------------------------------------------------------- - -(source_file - (short_var_declaration - (expression_list - (identifier)) - (expression_list - (composite_literal - (implicit_length_array_type - (type_identifier)) - (literal_value))))) diff --git a/lang/semgrep-grammars/src/semgrep-typescript/common/semgrep-ext.js b/lang/semgrep-grammars/src/semgrep-typescript/common/semgrep-ext.js index 9972b89b..9f289042 100644 --- a/lang/semgrep-grammars/src/semgrep-typescript/common/semgrep-ext.js +++ b/lang/semgrep-grammars/src/semgrep-typescript/common/semgrep-ext.js @@ -28,8 +28,16 @@ module.exports = { */ semgrep_ellipsis: $ => '...', - semgrep_metavar_ellipsis: $ => /\$\.\.\.[A-Z_][A-Z_0-9]*/, +/* TODO: restore this when the changes are made in semgrep. + Remove the XXXXXXX when uncommenting. + You also need to restore the test file: + lang/semgrep-grammars/src/semgrep-typescript/tsx/corpus/semgrep-ext.txt + See the original PR: + https://github.com/semgrep/ocaml-tree-sitter-semgrep/pull/488 + + semgrep_metavar_ellipsis: $ => /\$\.\.\.[A-Z_][A-Z_0-9]*XXXXXXX/, +/* /* In the expression context, there are LR(1) conflicts with spread and * rest. I (nmote) don't think that these are true ambiguities, but just in * case we'll declare conflicts and set this to low dynamic precedence so as @@ -40,13 +48,13 @@ module.exports = { previous, $.semgrep_expression_ellipsis, ), - +/* TODO: restore this when the changes are made in semgrep. _jsx_attribute: ($, previous) => choice( previous, $.semgrep_ellipsis, $.semgrep_metavar_ellipsis ), - +*/ // TODO Remove this when we update tree-sitter-typescript past // https://github.com/tree-sitter/tree-sitter-typescript/pull/239. I (nmote) // ran into unrelated issues updating it, documented in diff --git a/lang/semgrep-grammars/src/semgrep-typescript/tsx/corpus/semgrep-ext.txt b/lang/semgrep-grammars/src/semgrep-typescript/tsx/corpus/semgrep-ext.txt deleted file mode 100644 index e7ebac8c..00000000 --- a/lang/semgrep-grammars/src/semgrep-typescript/tsx/corpus/semgrep-ext.txt +++ /dev/null @@ -1,49 +0,0 @@ -================================== -JSX with ellipsis -================================== - - - ---- - -(program - (expression_statement - (jsx_self_closing_element - (identifier) - (semgrep_ellipsis)))) - -================================== -JSX with ellipsis and other props -================================== - - - ---- - -(program - (expression_statement - (jsx_self_closing_element - (identifier) - (jsx_attribute - (property_identifier) - (string - (string_fragment))) - (semgrep_ellipsis) - (jsx_attribute - (property_identifier) - (jsx_expression - (identifier)))))) - -================================== -JSX with metavariable ellipsis -================================== - - - ---- - -(program - (expression_statement - (jsx_self_closing_element - (identifier) - (semgrep_metavar_ellipsis)))) diff --git a/lang/semgrep-grammars/src/semgrep-vue/prep b/lang/semgrep-grammars/src/semgrep-vue/prep index 8ecabfd9..929e00c5 100755 --- a/lang/semgrep-grammars/src/semgrep-vue/prep +++ b/lang/semgrep-grammars/src/semgrep-vue/prep @@ -17,6 +17,19 @@ cat ../tree-sitter-vue/src/scanner.cc \ cd src ln -sf ../../tree-sitter-vue/src/tree_sitter_html/scanner.cc scanner_html.h ln -sf ../../tree-sitter-vue/src/tree_sitter_html/tag.h tag.h + + # Sanity check + if grep tree_sitter_html_ scanner_html.h; then + cat >&2 <