Skip to content

Commit 7008827

Browse files
author
Alvaro Muñoz
authored
Merge pull request #10 from GitHubSecurityLab/job_outputs
feat(field-flow): Refactor flow through job outputs
2 parents 7139d3b + f65587e commit 7008827

File tree

8 files changed

+61
-99
lines changed

8 files changed

+61
-99
lines changed

ql/lib/codeql/actions/Ast.qll

+4-26
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ class JobStmt extends Statement instanceof Actions::Job {
149149
* out1: ${steps.foo.bar}
150150
* out2: ${steps.foo.baz}
151151
*/
152-
JobOutputStmt getOutputStmt() { result = this.(Actions::Job).lookup("outputs") }
152+
OutputsStmt getOutputsStmt() { result = this.(Actions::Job).lookup("outputs") }
153153

154154
/**
155155
* Reusable workflow jobs may have Uses children
@@ -166,28 +166,6 @@ class JobStmt extends Statement instanceof Actions::Job {
166166
}
167167
}
168168

169-
/**
170-
* Declaration of the outputs for the job.
171-
* eg:
172-
* out1: ${steps.foo.bar}
173-
* out2: ${steps.foo.baz}
174-
*/
175-
class JobOutputStmt extends Statement instanceof YamlMapping {
176-
JobStmt job;
177-
178-
JobOutputStmt() { job.(YamlMapping).lookup("outputs") = this }
179-
180-
YamlMapping asYamlMapping() { result = this }
181-
182-
/**
183-
* Gets a specific value expression
184-
* eg: ${steps.foo.bar}
185-
*/
186-
Expression getOutputExpr(string id) {
187-
this.(YamlMapping).maps(any(YamlScalar s | s.getValue() = id), result)
188-
}
189-
}
190-
191169
/**
192170
* A Step is a single task that can be executed as part of a job.
193171
*/
@@ -435,9 +413,9 @@ class NeedsCtxAccessExpr extends CtxAccessExpr {
435413
job.getLocation().getFile() = this.getLocation().getFile() and
436414
(
437415
// regular jobs
438-
job.getOutputStmt().getOutputExpr(fieldName) = result
416+
job.getOutputsStmt() = result
439417
or
440-
// jobs calling reusable workflows
418+
// reusable workflow calling jobs
441419
job.getUsesExpr() = result
442420
)
443421
}
@@ -464,7 +442,7 @@ class JobsCtxAccessExpr extends CtxAccessExpr {
464442
exists(JobStmt job |
465443
job.getId() = jobId and
466444
job.getLocation().getFile() = this.getLocation().getFile() and
467-
job.getOutputStmt().getOutputExpr(fieldName) = result
445+
job.getOutputsStmt() = result
468446
)
469447
}
470448
}

ql/lib/codeql/actions/controlflow/internal/Cfg.qll

+1-5
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ private class JobTree extends StandardPreOrderTree instanceof JobStmt {
231231
rank[i](Expression child, Location l |
232232
(
233233
child = super.getAStepStmt() or
234-
child = super.getOutputStmt() or
234+
child = super.getOutputsStmt() or
235235
child = super.getUsesExpr()
236236
) and
237237
l = child.getLocation()
@@ -243,10 +243,6 @@ private class JobTree extends StandardPreOrderTree instanceof JobStmt {
243243
}
244244
}
245245

246-
private class JobOutputTree extends StandardPreOrderTree instanceof JobOutputStmt {
247-
override ControlFlowTree getChildNode(int i) { result = super.asYamlMapping().getValueNode(i) }
248-
}
249-
250246
private class StepUsesTree extends StandardPreOrderTree instanceof StepUsesExpr {
251247
override ControlFlowTree getChildNode(int i) {
252248
result =

ql/lib/codeql/actions/dataflow/ExternalFlow.qll

+15-11
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,22 @@ predicate externallyDefinedSource(DataFlow::Node source, string sourceType, stri
5050
) and
5151
(
5252
if fieldName.trim().matches("env.%")
53-
then source.asExpr() = uses.getEnvExpr(fieldName.trim().replaceAll("env\\.", ""))
53+
then source.asExpr() = uses.getEnvExpr(fieldName.trim().replaceAll("env.", ""))
5454
else
5555
if fieldName.trim().matches("output.%")
56-
then
57-
// 'output.' is the default qualifier
58-
source.asExpr() = uses
56+
then source.asExpr() = uses
5957
else none()
6058
) and
6159
sourceType = kind
6260
)
6361
}
6462

65-
predicate externallyDefinedSummary(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
63+
predicate externallyDefinedStoreStep(
64+
DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c
65+
) {
6666
exists(UsesExpr uses, string action, string version, string input, string output |
67-
c = any(DataFlow::FieldContent ct | ct.getName() = output.replaceAll("output\\.", "")) and
6867
summaryModel(action, version, input, output, "taint") and
68+
c = any(DataFlow::FieldContent ct | ct.getName() = output.replaceAll("output.", "")) and
6969
uses.getCallee() = action.toLowerCase() and
7070
(
7171
if version.trim() = "*"
@@ -74,10 +74,11 @@ predicate externallyDefinedSummary(DataFlow::Node pred, DataFlow::Node succ, Dat
7474
) and
7575
(
7676
if input.trim().matches("env.%")
77-
then pred.asExpr() = uses.getEnvExpr(input.trim().replaceAll("env\\.", ""))
77+
then pred.asExpr() = uses.getEnvExpr(input.trim().replaceAll("env.", ""))
7878
else
79-
// 'input.' is the default qualifier
80-
pred.asExpr() = uses.getArgumentExpr(input.trim().replaceAll("input\\.", ""))
79+
if input.trim().matches("input.%")
80+
then pred.asExpr() = uses.getArgumentExpr(input.trim().replaceAll("input.", ""))
81+
else none()
8182
) and
8283
succ.asExpr() = uses
8384
)
@@ -87,8 +88,11 @@ predicate externallyDefinedSink(DataFlow::ExprNode sink, string kind) {
8788
exists(UsesExpr uses, string action, string version, string input |
8889
(
8990
if input.trim().matches("env.%")
90-
then sink.asExpr() = uses.getEnvExpr(input.trim().replaceAll("input\\.", ""))
91-
else sink.asExpr() = uses.getArgumentExpr(input.trim())
91+
then sink.asExpr() = uses.getEnvExpr(input.trim().replaceAll("env.", ""))
92+
else
93+
if input.trim().matches("input.%")
94+
then sink.asExpr() = uses.getArgumentExpr(input.trim().replaceAll("input.", ""))
95+
else none()
9296
) and
9397
sinkModel(action, version, input, kind) and
9498
uses.getCallee() = action.toLowerCase() and

ql/lib/codeql/actions/dataflow/FlowSteps.qll

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class AdditionalTaintStep extends Unit {
4040
* echo "foo=$(echo $TAINTED)" >> $GITHUB_OUTPUT
4141
* echo "test=${{steps.step1.outputs.MSG}}" >> "$GITHUB_OUTPUT"
4242
*/
43-
predicate runEnvToScriptstep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
43+
predicate runEnvToScriptStoreStep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
4444
exists(RunExpr r, string varName, string output |
4545
c = any(DataFlow::FieldContent ct | ct.getName() = output.replaceAll("output\\.", "")) and
4646
r.getEnvExpr(varName) = pred.asExpr() and

ql/lib/codeql/actions/dataflow/internal/DataFlowPrivate.qll

+29-52
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
133133

134134
newtype TContent =
135135
TFieldContent(string name) {
136+
// We only use field flow for steps and jobs outputs, not for accessing other context fields such as env or inputs
136137
name = any(StepsCtxAccessExpr a).getFieldName() or
137138
name = any(NeedsCtxAccessExpr a).getFieldName() or
138139
name = any(JobsCtxAccessExpr a).getFieldName()
@@ -188,35 +189,22 @@ class ArgumentPosition extends string {
188189
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
189190

190191
/**
191-
* Holds if there is a local flow step between a ${{}} expression accesing a step output variable and the step output itself
192-
* But only for those cases where the step output is defined externally in a MaD specification.
193-
* The reason for this is that we don't currently have a way to specify that a source starts with a non-empty access
194-
* path so the easiest thing is to add the corresponding read steps of that field as local flow steps as well.
195-
* e.g. ${{ steps.step1.output.foo }}
192+
* Holds if there is a local flow step between a ${{ steps.xxx.outputs.yyy }} expression accesing a step output field
193+
* and the step output itself. But only for those cases where the step output is defined externally in a MaD Source
194+
* specification. The reason for this is that we don't currently have a way to specify that a source starts with a
195+
* non-empty access path so we cannot write a Source that stores the taint in a Content, we can only do that for steps
196+
* (storeStep). The easiest thing is to add this local flow step that simulates a read step from the source node for a specific
197+
* field name.
196198
*/
197199
predicate stepsCtxLocalStep(Node nodeFrom, Node nodeTo) {
198-
exists(StepStmt astFrom, StepsCtxAccessExpr astTo |
200+
exists(UsesExpr astFrom, StepsCtxAccessExpr astTo |
199201
externallyDefinedSource(nodeFrom, _, "output." + astTo.getFieldName()) and
200-
astFrom instanceof UsesExpr and
201202
astFrom = nodeFrom.asExpr() and
202203
astTo = nodeTo.asExpr() and
203204
astTo.getRefExpr() = astFrom
204205
)
205206
}
206207

207-
/**
208-
* Holds if there is a local flow step between a ${{}} expression accesing a job output variable and the job output itself
209-
* e.g. ${{ needs.job1.output.foo }} or ${{ jobs.job1.output.foo }}
210-
*/
211-
predicate jobsCtxLocalStep(Node nodeFrom, Node nodeTo) {
212-
exists(Expression astFrom, CtxAccessExpr astTo |
213-
astFrom = nodeFrom.asExpr() and
214-
astTo = nodeTo.asExpr() and
215-
astTo.getRefExpr() = astFrom and
216-
(astTo instanceof NeedsCtxAccessExpr or astTo instanceof JobsCtxAccessExpr)
217-
)
218-
}
219-
220208
/**
221209
* Holds if there is a local flow step between a ${{}} expression accesing an input variable and the input itself
222210
* e.g. ${{ inputs.foo }}
@@ -252,7 +240,6 @@ predicate envCtxLocalStep(Node nodeFrom, Node nodeTo) {
252240
pragma[nomagic]
253241
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
254242
stepsCtxLocalStep(nodeFrom, nodeTo) or
255-
jobsCtxLocalStep(nodeFrom, nodeTo) or
256243
inputsCtxLocalStep(nodeFrom, nodeTo) or
257244
envCtxLocalStep(nodeFrom, nodeTo)
258245
}
@@ -273,42 +260,35 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { localFlowStep(nodeFr
273260
predicate jumpStep(Node nodeFrom, Node nodeTo) { none() }
274261

275262
/**
276-
* A read step to read the value of a ReusableWork uses step and connect it to its
277-
* corresponding JobOutputAccessExpr
263+
* Holds if a CtxAccessExpr reads a field from a job (needs/jobs), step (steps) output via a read of `c` (fieldname)
278264
*/
279-
predicate reusableWorkflowReturnReadStep(Node node1, Node node2, ContentSet c) {
280-
exists(NeedsCtxAccessExpr expr, string fieldName |
281-
expr.usesReusableWorkflow() and
282-
expr.getRefExpr() = node1.asExpr() and
283-
expr.getFieldName() = fieldName and
284-
expr = node2.asExpr() and
285-
c = any(FieldContent ct | ct.getName() = fieldName)
265+
predicate ctxFieldReadStep(Node node1, Node node2, ContentSet c) {
266+
exists(CtxAccessExpr access |
267+
(
268+
access instanceof NeedsCtxAccessExpr or
269+
access instanceof StepsCtxAccessExpr or
270+
access instanceof JobsCtxAccessExpr
271+
) and
272+
c = any(FieldContent ct | ct.getName() = access.getFieldName()) and
273+
node1.asExpr() = access.getRefExpr() and
274+
node2.asExpr() = access
286275
)
287276
}
288277

289278
/**
290279
* Holds if data can flow from `node1` to `node2` via a read of `c`. Thus,
291280
* `node1` references an object with a content `c.getAReadContent()` whose
292281
* value ends up in `node2`.
282+
* Store steps without corresponding reads are pruned aggressively very early, since they can never contribute to a complete path.
293283
*/
294-
predicate readStep(Node node1, ContentSet c, Node node2) {
295-
// TODO: Extract to its own predicate
296-
exists(StepsCtxAccessExpr access |
297-
c = any(FieldContent ct | ct.getName() = access.getFieldName()) and
298-
node1.asExpr() = access.getRefExpr() and
299-
node2.asExpr() = access
300-
)
301-
or
302-
reusableWorkflowReturnReadStep(node1, node2, c)
303-
}
284+
predicate readStep(Node node1, ContentSet c, Node node2) { ctxFieldReadStep(node1, node2, c) }
304285

305286
/**
306-
* A store step to store the value of a ReusableWorkflowStmt output expr into the return node (node2)
307-
* with a given access path (fieldName)
287+
* Stores an output expression (node1) into its OutputsStm node (node2)
288+
* using the output variable name as the access path
308289
*/
309-
predicate reusableWorkflowReturnStoreStep(Node node1, Node node2, ContentSet c) {
310-
exists(ReusableWorkflowStmt stmt, OutputsStmt out, string fieldName |
311-
out = stmt.getOutputsStmt() and
290+
predicate fieldStoreStep(Node node1, Node node2, ContentSet c) {
291+
exists(OutputsStmt out, string fieldName |
312292
node1.asExpr() = out.getOutputExpr(fieldName) and
313293
node2.asExpr() = out and
314294
c = any(FieldContent ct | ct.getName() = fieldName)
@@ -319,15 +299,12 @@ predicate reusableWorkflowReturnStoreStep(Node node1, Node node2, ContentSet c)
319299
* Holds if data can flow from `node1` to `node2` via a store into `c`. Thus,
320300
* `node2` references an object with a content `c.getAStoreContent()` that
321301
* contains the value of `node1`.
302+
* Store steps without corresponding reads are pruned aggressively very early, since they can never contribute to a complete path.
322303
*/
323304
predicate storeStep(Node node1, ContentSet c, Node node2) {
324-
reusableWorkflowReturnStoreStep(node1, node2, c)
325-
or
326-
// TODO: rename to xxxxStoreStep
327-
externallyDefinedSummary(node1, node2, c)
328-
or
329-
// TODO: rename to xxxxStoreStep
330-
runEnvToScriptstep(node1, node2, c)
305+
fieldStoreStep(node1, node2, c) or
306+
externallyDefinedStoreStep(node1, node2, c) or
307+
runEnvToScriptStoreStep(node1, node2, c)
331308
}
332309

333310
/**

ql/lib/ext/PLACEHOLDER.model.yml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
extensions:
2+
- addsTo:
3+
pack: codeql/actions-all
4+
extensible: sinkModel
5+
data:
6+
- ["","","",""]
7+

ql/lib/ext/frabert_replace-string-action.model.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ extensions:
33
pack: codeql/actions-all
44
extensible: summaryModel
55
data:
6-
- ["frabert/replace-string-action", "*", "string", "replaced", "taint"]
7-
- ["frabert/replace-string-action", "*", "replace-with", "replaced", "taint"]
6+
- ["frabert/replace-string-action", "*", "input.string", "output.replaced", "taint"]
7+
- ["frabert/replace-string-action", "*", "input.replace-with", "output.replaced", "taint"]

ql/lib/ext/mad9000_actions-find-and-replace-string.model.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ extensions:
33
pack: codeql/actions-all
44
extensible: summaryModel
55
data:
6-
- ["mad9000/actions-find-and-replace-string", "*", "source", "value", "taint"]
7-
- ["mad9000/actions-find-and-replace-string", "*", "replace", "value", "taint"]
6+
- ["mad9000/actions-find-and-replace-string", "*", "input.source", "output.value", "taint"]
7+
- ["mad9000/actions-find-and-replace-string", "*", "input.replace", "output.value", "taint"]

0 commit comments

Comments
 (0)