Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add codeql language #478

Merged
merged 4 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,6 @@
[submodule "lang/semgrep-grammars/src/tree-sitter-fsharp"]
path = lang/semgrep-grammars/src/tree-sitter-fsharp
url = https://github.com/Nsidorenco/tree-sitter-fsharp.git
[submodule "lang/semgrep-grammars/src/tree-sitter-ql"]
path = lang/semgrep-grammars/src/tree-sitter-ql
url = https://github.com/tree-sitter/tree-sitter-ql
1 change: 1 addition & 0 deletions lang/ql/Makefile
9 changes: 9 additions & 0 deletions lang/ql/extensions.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# File extensions for the target language, one per line. This is used for
# collecting parsing stats from the repos specified in 'projects.txt'. e.g.:
#
# .h
# .c
#

.ql
.qll
2 changes: 2 additions & 0 deletions lang/ql/fyi.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
semgrep-grammars/src/tree-sitter-ql/grammar.js
semgrep-grammars/src/semgrep-ql/grammar.js
5 changes: 5 additions & 0 deletions lang/ql/projects.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Git URLs of publicly-accessible projects to be used for parsing stats,
# one per line.
#

https://github.com/github/codeql
2 changes: 1 addition & 1 deletion lang/release
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ repo="semgrep-$lang"
(
cd "$export_dir"
if [[ ! -d "$repo" ]]; then
git clone [email protected]:returntocorp/"$repo".git
git clone [email protected]:semgrep/"$repo".git
fi
)

Expand Down
1 change: 1 addition & 0 deletions lang/semgrep-grammars/lang/ql
1 change: 1 addition & 0 deletions lang/semgrep-grammars/src/semgrep-ql/Makefile
71 changes: 71 additions & 0 deletions lang/semgrep-grammars/src/semgrep-ql/grammar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
semgrep-ql

Extends the standard ql grammar with semgrep pattern constructs.
*/

const base_grammar = require('tree-sitter-ql/grammar');

module.exports = grammar(base_grammar, {
name: 'ql',

conflicts: ($, previous) => previous.concat([
]),

/*
Support for semgrep ellipsis ('...') and metavariables ('$FOO'),
if they're not already part of the base grammar.
*/
rules: {
semgrep_ellipsis: $ => '...',
semgrep_ellipsis_metavar : $ => /\$\.\.\.[a-zA-Z_][a-zA-Z_0-9]*/,
semgrep_metavariable: $ => token(/\$[A-Z_][A-Z_0-9]*/),

// typed metavars
par_expr: ($, previous) => choice(
seq("(", $.simpleId, $.semgrep_metavariable, ")"),
previous
),

// This gets slightly more precedence to make it win over cases where
// annotName and predicateName are also possible.
_upper_id: ($, previous) => prec(1, choice(
previous,
$.semgrep_metavariable
)),
// These must be choice'd separarately, as the only use sites of
// _lower_id which are not also _upper_id.
annotName: ($, previous) => choice(
previous,
$.semgrep_metavariable
),
predicateName: ($, previous) => choice(
previous,
$.semgrep_metavariable
),

// ellipses
classMember: ($, previous) => choice(
$.semgrep_ellipsis,
...previous.members
),
moduleMember: ($, previous) => choice(
$.semgrep_ellipsis,
...previous.members
),

_primary: ($, previous) => choice(
$.semgrep_ellipsis,
$.semgrep_ellipsis_metavar,
...previous.members
),

// Alternate "entry point". Allows parsing a standalone expression.
semgrep_expression: $ => seq('__SEMGREP_EXPRESSION', $._exprOrTerm),

ql: ($, previous) => choice(
$.semgrep_expression,
previous
)
}
});
1 change: 1 addition & 0 deletions lang/semgrep-grammars/src/semgrep-ql/prep
221 changes: 221 additions & 0 deletions lang/semgrep-grammars/src/semgrep-ql/test/corpus/semgrep.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
============
Metavariable
============

__SEMGREP_EXPRESSION $X

---

(ql
(semgrep_expression
(variable
(varName
(simpleId
(semgrep_metavariable))))))

============
Ellipsis
============

__SEMGREP_EXPRESSION ...

---

(ql
(semgrep_expression
(semgrep_ellipsis)))

============
Metavariable ellipsis
============

__SEMGREP_EXPRESSION $...X

---

(ql
(semgrep_expression
(semgrep_ellipsis_metavar)))

============
Typed metavar
============

__SEMGREP_EXPRESSION (Foo $X)

---

(ql
(semgrep_expression
(par_expr
(simpleId)
(semgrep_metavariable))))

============
Sample query
============

from $TY $VAR
where $FORMULA
select $EXPR

---

(ql
(moduleMember
(select
(varDecl
(typeExpr
(className
(semgrep_metavariable)))
(varName
(simpleId
(semgrep_metavariable))))
(variable
(varName
(simpleId
(semgrep_metavariable))))
(asExprs
(asExpr
(variable
(varName
(simpleId
(semgrep_metavariable)))))))))

============
Metavariables as annotations
============

__SEMGREP_EXPRESSION $X[bar](2)

---

(ql
(semgrep_expression
(expr_annotation
(annotName
(semgrep_metavariable))
(annotName)
(literal
(integer)))))

============
Metavariables as predicates
============

predicate $FOO($TY $X) { 2 }

---

(ql
(moduleMember
(classlessPredicate
(predicate)
(predicateName
(semgrep_metavariable))
(varDecl
(typeExpr
(className
(semgrep_metavariable)))
(varName
(simpleId
(semgrep_metavariable))))
(body
(literal
(integer))))))

============
Metavariables as imports
============

import $X

---

(ql
(moduleMember
(importDirective
(importModuleExpr
(moduleExpr
(simpleId
(semgrep_metavariable)))))))

============
Metavariable as variable name
============

int getANumber() { $Z = 3 }

---

(ql
(moduleMember
(classlessPredicate
(typeExpr
(primitiveType))
(predicateName)
(body
(comp_term
(variable
(varName
(simpleId
(semgrep_metavariable))))
(compop)
(literal
(integer)))))))

============
Ellipsis as predicate body
============

predicate $FOO() {
...
}

---

(ql
(moduleMember
(classlessPredicate
(predicate)
(predicateName
(semgrep_metavariable))
(body
(semgrep_ellipsis)))))

============
Ellipsis as class body
============

class $FOO {
...
}

---

(ql
(moduleMember
(dataclass
(className
(semgrep_metavariable))
(classMember
(semgrep_ellipsis)))))

============
Ellipsis as module member
============

module $FOO {
...
}

---

(ql
(moduleMember
(module
(moduleName
(simpleId
(semgrep_metavariable)))
(moduleMember
(semgrep_ellipsis)))))
1 change: 1 addition & 0 deletions lang/semgrep-grammars/src/tree-sitter-ql
Submodule tree-sitter-ql added at ff04ba
Loading