From 7f24699f0d78db892126111eb1f1ebdf54faa089 Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 9 Dec 2024 10:33:39 +0100 Subject: [PATCH 1/4] JS: Add class harness callable --- .../dataflow/internal/DataFlowPrivate.qll | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll index 2fcc2acbd167..d3dc29a0ee8b 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll @@ -373,7 +373,15 @@ cached newtype TDataFlowCallable = MkSourceCallable(StmtContainer container) or MkLibraryCallable(LibraryCallable callable) or - MkFileCallable(File file) + MkFileCallable(File file) or + MkClassHarnessCallable(Function f) { + // We only need a class harness for functions that act as classes (i.e. constructors), + // but since DataFlow::Node has not been materialised at this stage, we can't use DataFlow::ClassNode. + // Exclude arrow functions as they can't be called with 'new'. + not f instanceof ArrowFunctionExpr and + // We also don't need harnesses for externs + not f.getTopLevel().isExterns() + } /** * A callable entity. This is a wrapper around either a `StmtContainer`, `LibraryCallable`, or `File`. @@ -383,14 +391,20 @@ class DataFlowCallable extends TDataFlowCallable { string toString() { result = this.asSourceCallable().toString() or - result = this.asLibraryCallable() + result = this.asLibraryCallable().toString() or result = this.asFileCallable().toString() + or + result = this.asClassHarness().toString() } /** Gets the location of this callable, if it is present in the source code. */ Location getLocation() { - result = this.asSourceCallable().getLocation() or result = this.asFileCallable().getLocation() + result = this.asSourceCallable().getLocation() + or + result = this.asFileCallable().getLocation() + or + result = this.asClassHarness().getLocation() } /** Gets the corresponding `StmtContainer` if this is a source callable. */ @@ -399,6 +413,9 @@ class DataFlowCallable extends TDataFlowCallable { /** Gets the corresponding `File` if this is a file representing a callable. */ File asFileCallable() { this = MkFileCallable(result) } + /** Gets the class constructor for which this is a class harness. */ + Function asClassHarness() { this = MkClassHarnessCallable(result) } + /** Gets the corresponding `StmtContainer` if this is a source callable. */ pragma[nomagic] StmtContainer asSourceCallableNotExterns() { From 93d0b412b2fb0e931ca0c4ee1e05bc52e103e666 Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 9 Dec 2024 11:16:22 +0100 Subject: [PATCH 2/4] JS: Add test for flow from constructor to method --- .../library-tests/TripleDot/class-harness.js | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 javascript/ql/test/library-tests/TripleDot/class-harness.js diff --git a/javascript/ql/test/library-tests/TripleDot/class-harness.js b/javascript/ql/test/library-tests/TripleDot/class-harness.js new file mode 100644 index 000000000000..093233937957 --- /dev/null +++ b/javascript/ql/test/library-tests/TripleDot/class-harness.js @@ -0,0 +1,40 @@ +import 'dummy'; + +function h1() { + class C { + constructor(arg) { + this.x = source("h1.1") + } + method() { + sink(this.x); // $ MISSING: hasValueFlow=h1.1 + } + } +} + +function h2() { + class C { + method1() { + this.x = source("h2.1") + } + method2() { + sink(this.x); // $ MISSING: hasValueFlow=h2.1 + } + } +} + +function h3() { + class C { + constructor(arg) { + this.x = arg; + } + method1() { + sink(this.x); // $ hasValueFlow=h3.2 + } + method2() { + sink(this.x); // $ hasValueFlow=h3.3 + } + } + new C(source("h3.1")); + new C(source("h3.2")).method1(); + new C(source("h3.3")).method2(); +} From 25c3d8fce947e6d412a71ace8af6b83ba241c15e Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 9 Dec 2024 11:37:32 +0100 Subject: [PATCH 3/4] JS: Add more features to AdditionalFlowInternal This lets us place some more logic outside of the giant DataFlowPrivate file --- .../internal/AdditionalFlowInternal.qll | 18 +++++++ .../dataflow/internal/DataFlowPrivate.qll | 49 +++++++++++++++++-- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/AdditionalFlowInternal.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/AdditionalFlowInternal.qll index d7f92ce8dd30..c162491590d9 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/AdditionalFlowInternal.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/AdditionalFlowInternal.qll @@ -9,6 +9,14 @@ DataFlow::Node getSynthesizedNode(AstNode node, string tag) { result = TGenericSynthesizedNode(node, tag, _) } +DataFlowCallable getSynthesizedCallable(AstNode node, string tag) { + result = MkGenericSynthesizedCallable(node, tag) +} + +DataFlowCall getSynthesizedCall(AstNode node, string tag) { + result = MkGenericSynthesizedCall(node, tag, _) +} + /** * An extension to `AdditionalFlowStep` with additional internal-only predicates. */ @@ -22,6 +30,10 @@ class AdditionalFlowInternal extends DataFlow::AdditionalFlowStep { */ predicate needsSynthesizedNode(AstNode node, string tag, DataFlowCallable container) { none() } + predicate needsSynthesizedCallable(AstNode node, string tag) { none() } + + predicate needsSynthesizedCall(AstNode node, string tag, DataFlowCallable container) { none() } + /** * Holds if `node` should only permit flow of values stored in `contents`. */ @@ -31,4 +43,10 @@ class AdditionalFlowInternal extends DataFlow::AdditionalFlowStep { * Holds if `node` should not permit flow of values stored in `contents`. */ predicate clearsContent(DataFlow::Node node, DataFlow::ContentSet contents) { none() } + + predicate argument(DataFlowCall call, ArgumentPosition pos, DataFlow::Node value) { none() } + + predicate postUpdate(DataFlow::Node pre, DataFlow::Node post) { none() } + + predicate viableCallable(DataFlowCall call, DataFlowCallable target) { none() } } diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll index d3dc29a0ee8b..ef81b35a1ebe 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll @@ -13,6 +13,7 @@ private import sharedlib.FlowSummaryImpl as FlowSummaryImpl private import semmle.javascript.dataflow.internal.FlowSummaryPrivate as FlowSummaryPrivate private import semmle.javascript.dataflow.FlowSummary as FlowSummary private import semmle.javascript.dataflow.internal.BarrierGuards +private import codeql.util.Boolean class DataFlowSecondLevelScope = Unit; @@ -363,6 +364,8 @@ predicate postUpdatePair(Node pre, Node post) { pre.(FlowSummaryNode).getSummaryNode()) or VariableCaptureOutput::capturePostUpdateNode(getClosureNode(post), getClosureNode(pre)) + or + any(AdditionalFlowInternal f).postUpdate(pre, post) } class CastNode extends DataFlow::Node { @@ -381,10 +384,16 @@ newtype TDataFlowCallable = not f instanceof ArrowFunctionExpr and // We also don't need harnesses for externs not f.getTopLevel().isExterns() + } or + /** + * A callable entity. This is a wrapper around either a `StmtContainer`, `LibraryCallable`, or `File`. + */ + MkGenericSynthesizedCallable(AstNode node, string tag) { + any(AdditionalFlowInternal f).needsSynthesizedCallable(node, tag) } /** - * A callable entity. This is a wrapper around either a `StmtContainer`, `LibraryCallable`, or `File`. + * A callable entity. */ class DataFlowCallable extends TDataFlowCallable { /** Gets a string representation of this callable. */ @@ -395,7 +404,7 @@ class DataFlowCallable extends TDataFlowCallable { or result = this.asFileCallable().toString() or - result = this.asClassHarness().toString() + this.isGenericSynthesizedCallable(_, result) } /** Gets the location of this callable, if it is present in the source code. */ @@ -404,7 +413,10 @@ class DataFlowCallable extends TDataFlowCallable { or result = this.asFileCallable().getLocation() or - result = this.asClassHarness().getLocation() + exists(AstNode node | + this.isGenericSynthesizedCallable(node, _) and + result = node.getLocation() + ) } /** Gets the corresponding `StmtContainer` if this is a source callable. */ @@ -414,7 +426,9 @@ class DataFlowCallable extends TDataFlowCallable { File asFileCallable() { this = MkFileCallable(result) } /** Gets the class constructor for which this is a class harness. */ - Function asClassHarness() { this = MkClassHarnessCallable(result) } + predicate isGenericSynthesizedCallable(AstNode node, string tag) { + this = MkGenericSynthesizedCallable(node, tag) + } /** Gets the corresponding `StmtContainer` if this is a source callable. */ pragma[nomagic] @@ -544,6 +558,8 @@ private predicate isArgumentNodeImpl(Node n, DataFlowCall call, ArgumentPosition n = TDynamicArgumentArrayNode(invoke) and pos.isDynamicArgumentArray() ) + or + any(AdditionalFlowInternal f).argument(call, pos, n) } predicate isArgumentNode(ArgumentNode n, DataFlowCall call, ArgumentPosition pos) { @@ -791,7 +807,7 @@ ContentApprox getContentApprox(Content c) { } cached -private newtype TDataFlowCall = +newtype TDataFlowCall = MkOrdinaryCall(DataFlow::InvokeNode node) or MkPartialCall(DataFlow::PartialInvokeNode node, DataFlow::Node callback) { callback = node.getACallbackNode() @@ -812,6 +828,9 @@ private newtype TDataFlowCall = FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver ) { FlowSummaryImpl::Private::summaryCallbackRange(c, receiver) + } or + MkGenericSynthesizedCall(AstNode node, string tag, DataFlowCallable container) { + any(AdditionalFlowInternal f).needsSynthesizedCall(node, tag, container) } private module TotalOrdering { @@ -877,6 +896,10 @@ class DataFlowCall extends TDataFlowCall { this = MkSummaryCall(enclosingCallable, receiver) } + predicate isGenericSynthesizedCall(AstNode node, string tag, DataFlowCallable container) { + this = MkGenericSynthesizedCall(node, tag, container) + } + Location getLocation() { none() } // Overridden in subclass int totalorder() { @@ -995,6 +1018,20 @@ private class ImpliedLambdaCall extends DataFlowCall, MkImpliedLambdaCall { } } +class GenericSynthesizedCall extends DataFlowCall, MkGenericSynthesizedCall { + private AstNode node; + private string tag; + private DataFlowCallable container; + + GenericSynthesizedCall() { this = MkGenericSynthesizedCall(node, tag, container) } + + override string toString() { result = tag } + + override Location getLocation() { result = node.getLocation() } + + override DataFlowCallable getEnclosingCallable() { result = container } +} + private int getMaxArity() { // TODO: account for flow summaries result = @@ -1092,6 +1129,8 @@ DataFlowCallable viableCallable(DataFlowCall node) { ) or result.asSourceCallableNotExterns() = node.asImpliedLambdaCall() + or + any(AdditionalFlowInternal f).viableCallable(node, result) } private DataFlowCall getACallOnThis(DataFlow::ClassNode cls) { From a18b9ff637e83204c7bdc413fe33f083c4f06ceb Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 9 Dec 2024 11:57:00 +0100 Subject: [PATCH 4/4] JS: Add ClassHarness --- .../flow_summaries/AllFlowSummaries.qll | 1 + .../internal/flow_summaries/ClassHarness.qll | 104 ++++++++++++++++++ .../library-tests/TripleDot/class-harness.js | 4 +- 3 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 javascript/ql/lib/semmle/javascript/internal/flow_summaries/ClassHarness.qll diff --git a/javascript/ql/lib/semmle/javascript/internal/flow_summaries/AllFlowSummaries.qll b/javascript/ql/lib/semmle/javascript/internal/flow_summaries/AllFlowSummaries.qll index 940180d80cb4..b1d42bd1f3fb 100644 --- a/javascript/ql/lib/semmle/javascript/internal/flow_summaries/AllFlowSummaries.qll +++ b/javascript/ql/lib/semmle/javascript/internal/flow_summaries/AllFlowSummaries.qll @@ -12,3 +12,4 @@ private import Sets private import Strings private import DynamicImportStep private import UrlSearchParams +private import ClassHarness diff --git a/javascript/ql/lib/semmle/javascript/internal/flow_summaries/ClassHarness.qll b/javascript/ql/lib/semmle/javascript/internal/flow_summaries/ClassHarness.qll new file mode 100644 index 000000000000..2a8bb2580f78 --- /dev/null +++ b/javascript/ql/lib/semmle/javascript/internal/flow_summaries/ClassHarness.qll @@ -0,0 +1,104 @@ +/** + * Contains flow for the "class harness", which facilitates flow from constructor to methods in a class. + */ + +private import javascript +private import semmle.javascript.dataflow.internal.DataFlowNode +private import semmle.javascript.dataflow.internal.AdditionalFlowInternal +private import semmle.javascript.dataflow.internal.DataFlowPrivate + +/** + * Synthesizes a callable for each class, which invokes the class constructor and every + * instance method with the same value of `this`. + * + * This ensures flow between methods in a class when the source originated "within the class", + * but not when the flow into the field came from an argument. + * + * For example: + * ```js + * class C { + * constructor(arg) { + * this.x = sourceOfTaint(); + * this.y = arg; + * } + * method() { + * sink(this.x); // sourceOfTaint() flows here + * sink(this.y); // but 'arg' does not flow here (only through real call sites) + * } + * } + * ``` + * + * The class harness for a class `C` can roughly be thought of as the following code: + * ```js + * function classHarness() { + * var c = new C(); + * while (true) { + * // call an arbitrary instance methods in the loop + * c.arbitraryInstaceMethod(); + * } + * } + * ``` + * + * This is realized with the following data flow graph: + * ``` + * [Call to constructor] + * | + * | post-update for 'this' argument + * V + * [Data flow node] <----------------------+ + * | | + * | 'this' argument | post-update for 'this' argument + * V | + * [Call to an instance method] -----------+ + * ``` + */ +class ClassHarnessModel extends AdditionalFlowInternal { + override predicate needsSynthesizedCallable(AstNode node, string tag) { + node instanceof Function and + not node instanceof ArrowFunctionExpr and // can't be called with 'new' + not node.getTopLevel().isExterns() and // we don't need harnesses in externs + tag = "class-harness" + } + + override predicate needsSynthesizedCall(AstNode node, string tag, DataFlowCallable container) { + container = getSynthesizedCallable(node, "class-harness") and + tag = ["class-harness-constructor-call", "class-harness-method-call"] + } + + override predicate needsSynthesizedNode(AstNode node, string tag, DataFlowCallable container) { + // We synthesize two nodes, but note that `class-harness-constructor-this-arg` never actually has any + // ingoing flow, we just need it to specify which post-update node to use for that argument. + container = getSynthesizedCallable(node, "class-harness") and + tag = ["class-harness-constructor-this-arg", "class-harness-method-this-arg"] + } + + override predicate argument(DataFlowCall call, ArgumentPosition pos, DataFlow::Node value) { + pos.isThis() and + exists(Function f | + call = getSynthesizedCall(f, "class-harness-constructor-call") and + value = getSynthesizedNode(f, "class-harness-constructor-this-arg") + or + call = getSynthesizedCall(f, "class-harness-method-call") and + value = getSynthesizedNode(f, "class-harness-method-this-arg") + ) + } + + override predicate postUpdate(DataFlow::Node pre, DataFlow::Node post) { + exists(Function f | + pre = + getSynthesizedNode(f, + ["class-harness-constructor-this-arg", "class-harness-method-this-arg"]) and + post = getSynthesizedNode(f, "class-harness-method-this-arg") + ) + } + + override predicate viableCallable(DataFlowCall call, DataFlowCallable target) { + exists(DataFlow::ClassNode cls, Function f | f = cls.getConstructor().getFunction() | + call = getSynthesizedCall(f, "class-harness-constructor-call") and + target.asSourceCallable() = f + or + call = getSynthesizedCall(f, "class-harness-method-call") and + target.asSourceCallable() = cls.getAnInstanceMember().getFunction() + ) + } +} diff --git a/javascript/ql/test/library-tests/TripleDot/class-harness.js b/javascript/ql/test/library-tests/TripleDot/class-harness.js index 093233937957..87a232fd9eee 100644 --- a/javascript/ql/test/library-tests/TripleDot/class-harness.js +++ b/javascript/ql/test/library-tests/TripleDot/class-harness.js @@ -6,7 +6,7 @@ function h1() { this.x = source("h1.1") } method() { - sink(this.x); // $ MISSING: hasValueFlow=h1.1 + sink(this.x); // $ hasValueFlow=h1.1 } } } @@ -17,7 +17,7 @@ function h2() { this.x = source("h2.1") } method2() { - sink(this.x); // $ MISSING: hasValueFlow=h2.1 + sink(this.x); // $ hasValueFlow=h2.1 } } }