Skip to content

Commit 071189a

Browse files
authored
Merge pull request #18175 from asgerf/jss/documentation
JS: Update data flow documentation and tutorials for JavaScript
2 parents e1aff15 + 27e61a1 commit 071189a

File tree

10 files changed

+249
-302
lines changed

10 files changed

+249
-302
lines changed

docs/codeql/codeql-language-guides/analyzing-data-flow-in-javascript-and-typescript.rst

+57-70
Original file line numberDiff line numberDiff line change
@@ -204,58 +204,45 @@ data flow solver that can check whether there is (global) data flow from a sourc
204204
Optionally, configurations may specify extra data flow edges to be added to the data flow graph, and may also specify `barriers`. Barriers are data flow nodes or edges through
205205
which data should not be tracked for the purposes of this analysis.
206206

207-
To define a configuration, extend the class ``DataFlow::Configuration`` as follows:
207+
To define a configuration, add a module that implements the signature ``DataFlow::ConfigSig`` and pass it to ``DataFlow::Global`` as follows:
208208

209209
.. code-block:: ql
210210
211-
class MyDataFlowConfiguration extends DataFlow::Configuration {
212-
MyDataFlowConfiguration() { this = "MyDataFlowConfiguration" }
211+
module MyAnalysisConfig implements DataFlow::ConfigSig {
212+
predicate isSource(DataFlow::Node source) { /* ... */ }
213213
214-
override predicate isSource(DataFlow::Node source) { /* ... */ }
214+
predicate isSink(DataFlow::Node sink) { /* ... */ }
215215
216-
override predicate isSink(DataFlow::Node sink) { /* ... */ }
217-
218-
// optional overrides:
219-
override predicate isBarrier(DataFlow::Node nd) { /* ... */ }
220-
override predicate isBarrierEdge(DataFlow::Node pred, DataFlow::Node succ) { /* ... */ }
221-
override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { /* ... */ }
216+
// optional predicates:
217+
predicate isBarrier(DataFlow::Node nd) { /* ... */ }
218+
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { /* ... */ }
222219
}
223220
224-
The characteristic predicate ``MyDataFlowConfiguration()`` defines the name of the configuration, so ``"MyDataFlowConfiguration"`` should be replaced by a suitable
225-
name describing your particular analysis configuration.
221+
module MyAnalysisFlow = DataFlow::Global<MyAnalysisConfig>
226222
227-
The data flow analysis is performed using the predicate ``hasFlow(source, sink)``:
223+
The data flow analysis is performed using the predicate ``MyAnalysisFlow::flow(source, sink)``:
228224

229225
.. code-block:: ql
230226
231-
from MyDataFlowConfiguration dataflow, DataFlow::Node source, DataFlow::Node sink
232-
where dataflow.hasFlow(source, sink)
227+
from DataFlow::Node source, DataFlow::Node sink
228+
where MyAnalysisFlow::flow(source, sink)
233229
select source, "Data flow from $@ to $@.", source, source.toString(), sink, sink.toString()
234230
235231
Using global taint tracking
236232
~~~~~~~~~~~~~~~~~~~~~~~~~~~
237233

238-
Global taint tracking extends global data flow with additional non-value-preserving steps, such as flow through string-manipulating operations. To use it, simply extend
239-
``TaintTracking::Configuration`` instead of ``DataFlow::Configuration``:
234+
Global taint tracking extends global data flow with additional non-value-preserving steps, such as flow through string-manipulating operations. To use it, simply
235+
use ``TaintTracking::Global<...>`` instead of ``DataFlow::Global<...>``:
240236

241237
.. code-block:: ql
242238
243-
class MyTaintTrackingConfiguration extends TaintTracking::Configuration {
244-
MyTaintTrackingConfiguration() { this = "MyTaintTrackingConfiguration" }
245-
246-
override predicate isSource(DataFlow::Node source) { /* ... */ }
247-
248-
override predicate isSink(DataFlow::Node sink) { /* ... */ }
239+
module MyAnalysisConfig implements DataFlow::ConfigSig {
240+
/* ... */
249241
}
250242
251-
Analogous to ``isAdditionalFlowStep``, there is a predicate ``isAdditionalTaintStep`` that you can override to specify custom flow steps to consider in the analysis.
252-
Instead of the ``isBarrier`` and ``isBarrierEdge`` predicates, the taint tracking configuration includes ``isSanitizer`` and ``isSanitizerEdge`` predicates that specify
253-
data flow nodes or edges that act as taint sanitizers and hence stop flow from a source to a sink.
243+
module MyAnalysisFlow = TaintTracking::Global<MyAnalysisConfig>
254244
255-
Similar to global data flow, the characteristic predicate ``MyTaintTrackingConfiguration()`` defines the unique name of the configuration, so ``"MyTaintTrackingConfiguration"``
256-
should be replaced by an appropriate descriptive name.
257-
258-
The taint tracking analysis is again performed using the predicate ``hasFlow(source, sink)``.
245+
The taint tracking analysis is again performed using the predicate ``MyAnalysisFlow::flow(source, sink)``.
259246

260247
Examples
261248
~~~~~~~~
@@ -267,20 +254,20 @@ time using global taint tracking.
267254
268255
import javascript
269256
270-
class CommandLineFileNameConfiguration extends TaintTracking::Configuration {
271-
CommandLineFileNameConfiguration() { this = "CommandLineFileNameConfiguration" }
272-
273-
override predicate isSource(DataFlow::Node source) {
257+
module CommandLineFileNameConfig implements DataFlow::ConfigSig {
258+
predicate isSource(DataFlow::Node source) {
274259
DataFlow::globalVarRef("process").getAPropertyRead("argv").getAPropertyRead() = source
275260
}
276261
277-
override predicate isSink(DataFlow::Node sink) {
262+
predicate isSink(DataFlow::Node sink) {
278263
DataFlow::moduleMember("fs", "readFile").getACall().getArgument(0) = sink
279264
}
280265
}
281266
282-
from CommandLineFileNameConfiguration cfg, DataFlow::Node source, DataFlow::Node sink
283-
where cfg.hasFlow(source, sink)
267+
module CommandLineFileNameFlow = TaintTracking::Global<CommandLineFileNameConfig>;
268+
269+
from DataFlow::Node source, DataFlow::Node sink
270+
where CommandLineFileNameFlow::flow(source, sink)
284271
select source, sink
285272
286273
This query will now find flows that involve inter-procedural steps, like in the following example (where the individual steps have been marked with comments
@@ -325,15 +312,15 @@ with an error if it does not. We could then use that function in ``readFileHelpe
325312
}
326313
327314
For the purposes of our above analysis, ``checkPath`` is a `sanitizer`: its output is always untainted, even if its input is tainted. To model this
328-
we can add an override of ``isSanitizer`` to our taint-tracking configuration like this:
315+
we can add an ``isBarrier`` predicate to our taint-tracking configuration like this:
329316

330317
.. code-block:: ql
331318
332-
class CommandLineFileNameConfiguration extends TaintTracking::Configuration {
319+
module CommandLineFileNameConfig implements DataFlow::ConfigSig {
333320
334321
// ...
335322
336-
override predicate isSanitizer(DataFlow::Node nd) {
323+
predicate isBarrier(DataFlow::Node nd) {
337324
nd.(DataFlow::CallNode).getCalleeName() = "checkPath"
338325
}
339326
}
@@ -359,36 +346,36 @@ Note that ``checkPath`` is now no longer a sanitizer in the sense described abov
359346
through ``checkPath`` any more. The flow is, however, `guarded` by ``checkPath`` in the sense that the expression ``checkPath(p)`` has to evaluate
360347
to ``true`` (or, more precisely, to a truthy value) in order for the flow to happen.
361348

362-
Such sanitizer guards can be supported by defining a new subclass of ``TaintTracking::SanitizerGuardNode`` and overriding the predicate
363-
``isSanitizerGuard`` in the taint-tracking configuration class to add all instances of this class as sanitizer guards to the configuration.
349+
Such sanitizer guards can be supported by defining a class with a ``blocksExpr`` predicate and using the `DataFlow::MakeBarrierGuard`` module
350+
to implement the ``isBarrier`` predicate.
364351

365-
For our above example, we would begin by defining a subclass of ``SanitizerGuardNode`` that identifies guards of the form ``checkPath(...)``:
352+
For our above example, we would begin by defining a subclass of ``DataFlow::CallNode`` that identifies guards of the form ``checkPath(...)``:
366353

367354
.. code-block:: ql
368355
369-
class CheckPathSanitizerGuard extends TaintTracking::SanitizerGuardNode, DataFlow::CallNode {
356+
class CheckPathSanitizerGuard extends DataFlow::CallNode {
370357
CheckPathSanitizerGuard() { this.getCalleeName() = "checkPath" }
371358
372-
override predicate sanitizes(boolean outcome, Expr e) {
359+
predicate blocksExpr(boolean outcome, Expr e) {
373360
outcome = true and
374-
e = getArgument(0).asExpr()
361+
e = this.getArgument(0).asExpr()
375362
}
376363
}
377364
378-
The characteristic predicate of this class checks that the sanitizer guard is a call to a function named ``checkPath``. The overriding definition
379-
of ``sanitizes`` says such a call sanitizes its first argument (that is, ``getArgument(0)``) if it evaluates to ``true`` (or rather, a truthy
365+
The characteristic predicate of this class checks that the sanitizer guard is a call to a function named ``checkPath``. The definition
366+
of ``blocksExpr`` says such a call sanitizes its first argument (that is, ``getArgument(0)``) if it evaluates to ``true`` (or rather, a truthy
380367
value).
381368

382-
Now we can override ``isSanitizerGuard`` to add these sanitizer guards to our configuration:
369+
Now we can implement ``isBarrier`` to add this sanitizer guard to our configuration:
383370

384371
.. code-block:: ql
385372
386-
class CommandLineFileNameConfiguration extends TaintTracking::Configuration {
373+
module CommandLineFileNameConfig implements DataFlow::ConfigSig {
387374
388375
// ...
389376
390-
override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode nd) {
391-
nd instanceof CheckPathSanitizerGuard
377+
predicate isBarrier(DataFlow::Node node) {
378+
node = DataFlow::MakeBarrierGuard<CheckPathSanitizerGuard>::getABarrierNode()
392379
}
393380
}
394381
@@ -399,7 +386,7 @@ reach there if ``checkPath(p)`` evaluates to a truthy value. Consequently, there
399386
Additional taint steps
400387
~~~~~~~~~~~~~~~~~~~~~~
401388

402-
Sometimes the default data flow and taint steps provided by ``DataFlow::Configuration`` and ``TaintTracking::Configuration`` are not sufficient
389+
Sometimes the default data flow and taint steps provided by the data flow library are not sufficient
403390
and we need to add additional flow or taint steps to our configuration to make it find the expected flow. For example, this can happen because
404391
the analyzed program uses a function from an external library whose source code is not available to the analysis, or because it uses a function
405392
that is too difficult to analyze.
@@ -420,16 +407,16 @@ to resolve any symlinks in the path ``p`` before passing it to ``readFile``:
420407
Resolving symlinks does not make an unsafe path any safer, so we would still like our query to flag this, but since the standard library does
421408
not have a model of ``resolve-symlinks`` it will no longer return any results.
422409
423-
We can fix this quite easily by adding an overriding definition of the ``isAdditionalTaintStep`` predicate to our configuration, introducing an
410+
We can fix this quite easily by adding a definition of the ``isAdditionalFlowStep`` predicate to our configuration, introducing an
424411
additional taint step from the first argument of ``resolveSymlinks`` to its result:
425412
426413
.. code-block:: ql
427414
428-
class CommandLineFileNameConfiguration extends TaintTracking::Configuration {
415+
module CommandLineFileNameConfig implements DataFlow::ConfigSig {
429416
430417
// ...
431418
432-
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
419+
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
433420
exists(DataFlow::CallNode c |
434421
c = DataFlow::moduleImport("resolve-symlinks").getACall() and
435422
pred = c.getArgument(0) and
@@ -494,18 +481,18 @@ Exercise 2
494481
495482
import javascript
496483
497-
class HardCodedTagNameConfiguration extends DataFlow::Configuration {
498-
HardCodedTagNameConfiguration() { this = "HardCodedTagNameConfiguration" }
499-
500-
override predicate isSource(DataFlow::Node source) { source.asExpr() instanceof ConstantString }
484+
module HardCodedTagNameConfig implements DataFlow::ConfigSig {
485+
predicate isSource(DataFlow::Node source) { source.asExpr() instanceof ConstantString }
501486
502-
override predicate isSink(DataFlow::Node sink) {
487+
predicate isSink(DataFlow::Node sink) {
503488
sink = DataFlow::globalVarRef("document").getAMethodCall("createElement").getArgument(0)
504489
}
505490
}
506491
507-
from HardCodedTagNameConfiguration cfg, DataFlow::Node source, DataFlow::Node sink
508-
where cfg.hasFlow(source, sink)
492+
module HardCodedTagNameFlow = DataFlow::Global<HardCodedTagNameConfig>;
493+
494+
from DataFlow::Node source, DataFlow::Node sink
495+
where HardCodedTagNameFlow::flow(source, sink)
509496
select source, sink
510497
511498
Exercise 3
@@ -540,18 +527,18 @@ Exercise 4
540527
}
541528
}
542529
543-
class HardCodedTagNameConfiguration extends DataFlow::Configuration {
544-
HardCodedTagNameConfiguration() { this = "HardCodedTagNameConfiguration" }
530+
module HardCodedTagNameConfig implements DataFlow::ConfigSig {
531+
predicate isSource(DataFlow::Node source) { source instanceof ArrayEntryCallResult }
545532
546-
override predicate isSource(DataFlow::Node source) { source instanceof ArrayEntryCallResult }
547-
548-
override predicate isSink(DataFlow::Node sink) {
533+
predicate isSink(DataFlow::Node sink) {
549534
sink = DataFlow::globalVarRef("document").getAMethodCall("createElement").getArgument(0)
550535
}
551536
}
552537
553-
from HardCodedTagNameConfiguration cfg, DataFlow::Node source, DataFlow::Node sink
554-
where cfg.hasFlow(source, sink)
538+
module HardCodedTagNameFlow = DataFlow::Global<HardCodedTagNameConfig>;
539+
540+
from DataFlow::Node source, DataFlow::Node sink
541+
where HardCodedTagNameFlow::flow(source, sink)
555542
select source, sink
556543
557544
Further reading

docs/codeql/codeql-language-guides/codeql-library-for-javascript.rst

+17-28
Original file line numberDiff line numberDiff line change
@@ -700,55 +700,44 @@ The data flow graph-based analyses described so far are all intraprocedural: the
700700

701701
We distinguish here between data flow proper, and *taint tracking*: the latter not only considers value-preserving flow (such as from variable definitions to uses), but also cases where one value influences ("taints") another without determining it entirely. For example, in the assignment ``s2 = s1.substring(i)``, the value of ``s1`` influences the value of ``s2``, because ``s2`` is assigned a substring of ``s1``. In general, ``s2`` will not be assigned ``s1`` itself, so there is no data flow from ``s1`` to ``s2``, but ``s1`` still taints ``s2``.
702702

703-
It is a common pattern that we wish to specify data flow or taint analysis in terms of its *sources* (where flow starts), *sinks* (where it should be tracked), and *barriers* or *sanitizers* (where flow is interrupted). Sanitizers they are very common in security analyses: for example, an analysis that tracks the flow of untrusted user input into, say, a SQL query has to keep track of code that validates the input, thereby making it safe to use. Such a validation step is an example of a sanitizer.
703+
It is a common pattern that we wish to specify data flow or taint analysis in terms of its *sources* (where flow starts), *sinks* (where it should be tracked), and *barriers* (also called *sanitizers*) where flow is interrupted. Sanitizers they are very common in security analyses: for example, an analysis that tracks the flow of untrusted user input into, say, a SQL query has to keep track of code that validates the input, thereby making it safe to use. Such a validation step is an example of a sanitizer.
704704

705-
The classes ``DataFlow::Configuration`` and ``TaintTracking::Configuration`` allow specifying a data flow or taint analysis, respectively, by overriding the following predicates:
705+
A module implementing the signature `DataFlow::ConfigSig` may specify a data flow or taint analysis by implementing the following predicates:
706706

707707
- ``isSource(DataFlow::Node nd)`` selects all nodes ``nd`` from where flow tracking starts.
708708
- ``isSink(DataFlow::Node nd)`` selects all nodes ``nd`` to which the flow is tracked.
709-
- ``isBarrier(DataFlow::Node nd)`` selects all nodes ``nd`` that act as a barrier for data flow; ``isSanitizer`` is the corresponding predicate for taint tracking configurations.
710-
- ``isBarrierEdge(DataFlow::Node src, DataFlow::Node trg)`` is a variant of ``isBarrier(nd)`` that allows specifying barrier *edges* in addition to barrier nodes; again, ``isSanitizerEdge`` is the corresponding predicate for taint tracking;
711-
- ``isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg)`` allows specifying custom additional flow steps for this analysis; ``isAdditionalTaintStep`` is the corresponding predicate for taint tracking configurations.
709+
- ``isBarrier(DataFlow::Node nd)`` selects all nodes ``nd`` that act as a barrier/sanitizer for data flow.
710+
- ``isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg)`` allows specifying custom additional flow steps for this analysis.
712711

713-
Since for technical reasons both ``Configuration`` classes are subtypes of ``string``, you have to choose a unique name for each flow configuration and equate ``this`` with it in the characteristic predicate (as in the example below).
714-
715-
The predicate ``Configuration.hasFlow`` performs the actual flow tracking, starting at a source and looking for flow to a sink that does not pass through a barrier node or edge.
712+
Such a module can be passed to ``DataFlow::Global<...>``. This will produce a module with a ``flow`` predicate that performs the actual flow tracking, starting at a source and looking for flow to a sink that does not pass through a barrier node.
716713

717714
For example, suppose that we are developing an analysis to find hard-coded passwords. We might write a simple query that looks for string constants flowing into variables named ``"password"``.
718715

719716
.. code-block:: ql
720717
721718
import javascript
722719
723-
class PasswordTracker extends DataFlow::Configuration {
724-
PasswordTracker() {
725-
// unique identifier for this configuration
726-
this = "PasswordTracker"
727-
}
720+
module PasswordConfig implements DataFlow::ConfigSig {
721+
predicate isSource(DataFlow::Node nd) { nd.asExpr() instanceof StringLiteral }
728722
729-
override predicate isSource(DataFlow::Node nd) {
730-
nd.asExpr() instanceof StringLiteral
731-
}
732-
733-
override predicate isSink(DataFlow::Node nd) {
734-
passwordVarAssign(_, nd)
735-
}
723+
predicate isSink(DataFlow::Node nd) { passwordVarAssign(_, nd) }
724+
}
736725
737-
predicate passwordVarAssign(Variable v, DataFlow::Node nd) {
738-
v.getAnAssignedExpr() = nd.asExpr() and
739-
v.getName().toLowerCase() = "password"
740-
}
726+
predicate passwordVarAssign(Variable v, DataFlow::Node nd) {
727+
v.getAnAssignedExpr() = nd.asExpr() and
728+
v.getName().toLowerCase() = "password"
741729
}
742730
743-
Now we can rephrase our query to use ``Configuration.hasFlow``:
731+
module PasswordFlow = DataFlow::Global<PasswordConfig>;
732+
733+
Now we can rephrase our query to use ``PasswordFlow::flow``:
744734

745735
.. code-block:: ql
746736
747-
from PasswordTracker pt, DataFlow::Node source, DataFlow::Node sink, Variable v
748-
where pt.hasFlow(source, sink) and pt.passwordVarAssign(v, sink)
737+
from DataFlow::Node source, DataFlow::Node sink, Variable v
738+
where PasswordFlow::flow(_, sink) and passwordVarAssign(v, sink)
749739
select sink, "Password variable " + v + " is assigned a constant string."
750740
751-
752741
Syntax errors
753742
~~~~~~~~~~~~~
754743

docs/codeql/codeql-language-guides/data-flow-cheat-sheet-for-javascript.rst

+9-10
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,17 @@ Use the following template to create a taint tracking path query:
1616
* @kind path-problem
1717
*/
1818
import javascript
19-
import DataFlow
20-
import DataFlow::PathGraph
21-
22-
class MyConfig extends TaintTracking::Configuration {
23-
MyConfig() { this = "MyConfig" }
24-
override predicate isSource(Node node) { ... }
25-
override predicate isSink(Node node) { ... }
26-
override predicate isAdditionalTaintStep(Node pred, Node succ) { ... }
19+
20+
module MyConfig implements DataFlow::ConfigSig {
21+
predicate isSource(DataFlow::Node node) { ... }
22+
predicate isSink(DataFlow::Node node) { ... }
23+
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { ... }
2724
}
2825
29-
from MyConfig cfg, PathNode source, PathNode sink
30-
where cfg.hasFlowPath(source, sink)
26+
module MyFlow = TaintTracking::Global<MyConfig>;
27+
28+
from MyFlow::PathNode source, MyFlow::PathNode sink
29+
where MyFlow::flowPath(source, sink)
3130
select sink.getNode(), source, sink, "taint from $@.", source.getNode(), "here"
3231
3332
This query reports flow paths which:

0 commit comments

Comments
 (0)