From a23c5091016ff7112a305674121408e0fffd5624 Mon Sep 17 00:00:00 2001 From: elsapet Date: Wed, 5 Jun 2024 12:52:50 +0200 Subject: [PATCH] fix(python): handle reflexive methods (#1622) * fix(python): handle reflexive methods * fix: handle identifier case (import) * fix: update snapshots following analyzer changes --- .../python/.snapshots/TestFlow--flow.yml | 28 +++++++++++++++++++ pkg/languages/python/analyzer/analyzer.go | 25 ++++++++++++++--- .../.snapshots/TestPythonObjects-object_class | 13 +++++++-- .../TestPythonObjects-object_no_class | 1 - .../.snapshots/TestPythonString-string | 10 +++++++ pkg/languages/python/testdata/flow/flow.py | 14 +++++++++- 6 files changed, 82 insertions(+), 9 deletions(-) diff --git a/pkg/languages/python/.snapshots/TestFlow--flow.yml b/pkg/languages/python/.snapshots/TestFlow--flow.yml index 9afe42bcb..420bcad57 100644 --- a/pkg/languages/python/.snapshots/TestFlow--flow.yml +++ b/pkg/languages/python/.snapshots/TestFlow--flow.yml @@ -55,4 +55,32 @@ high: parent_line_number: 7 fingerprint: 22039dd750c8bd604904ee9f5bc626f0_1 old_fingerprint: 22039dd750c8bd604904ee9f5bc626f0_1 + - rule: + cwe_ids: + - "42" + id: flow_test + title: Test dataflow and variables + description: Test dataflow and variables + documentation_url: "" + line_number: 13 + full_filename: flow.py + filename: flow.py + source: + location: + start: 13 + end: 13 + column: + start: 5 + end: 19 + sink: + location: + start: 13 + end: 13 + column: + start: 5 + end: 19 + content: "" + parent_line_number: 13 + fingerprint: 22039dd750c8bd604904ee9f5bc626f0_2 + old_fingerprint: 22039dd750c8bd604904ee9f5bc626f0_2 diff --git a/pkg/languages/python/analyzer/analyzer.go b/pkg/languages/python/analyzer/analyzer.go index 21253c7ee..5fd75a2a6 100644 --- a/pkg/languages/python/analyzer/analyzer.go +++ b/pkg/languages/python/analyzer/analyzer.go @@ -9,6 +9,13 @@ import ( "github.com/bearer/bearer/pkg/scanner/language" ) +var reflexiveMethods = []string{ + "decode", + "encode", + "format", + "replace", +} + type analyzer struct { builder *tree.Builder scope *language.Scope @@ -23,7 +30,7 @@ func New(builder *tree.Builder) language.Analyzer { func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) error { switch node.Type() { - case "class_definition", "block", "function_definition": + case "class_definition", "function_definition": return analyzer.withScope(language.NewScope(analyzer.scope), func() error { return visitChildren() }) @@ -100,10 +107,20 @@ func (analyzer *analyzer) analyzeAssignment(node *sitter.Node, visitChildren fun // foo.bar(a, b) func (analyzer *analyzer) analyzeCall(node *sitter.Node, visitChildren func() error) error { - if receiver := node.ChildByFieldName("function"); receiver != nil { - analyzer.lookupVariable(receiver) + if function := node.ChildByFieldName("function"); function != nil { + object := function.ChildByFieldName("object") + analyzer.lookupVariable(object) - analyzer.builder.Dataflow(node, receiver) + if function.Type() == "identifier" { + analyzer.builder.Dataflow(node, object) + } + + if function.Type() == "attribute" { + attribute := function.ChildByFieldName("attribute") + if attribute.Type() == "identifier" && slices.Contains(reflexiveMethods, analyzer.builder.ContentFor(attribute)) { + analyzer.builder.Dataflow(node, object) + } + } } if argumentsNode := node.ChildByFieldName("arguments"); argumentsNode != nil { diff --git a/pkg/languages/python/detectors/.snapshots/TestPythonObjects-object_class b/pkg/languages/python/detectors/.snapshots/TestPythonObjects-object_class index b20590186..e0d395598 100644 --- a/pkg/languages/python/detectors/.snapshots/TestPythonObjects-object_class +++ b/pkg/languages/python/detectors/.snapshots/TestPythonObjects-object_class @@ -23,6 +23,9 @@ children: - type: block id: 5 range: 2:5 - 8:33 + dataflow_sources: + - 6 + - 40 children: - type: function_definition id: 6 @@ -95,6 +98,9 @@ children: - type: block id: 23 range: 3:9 - 4:27 + dataflow_sources: + - 24 + - 32 children: - type: expression_statement id: 24 @@ -211,6 +217,9 @@ children: - type: block id: 48 range: 7:9 - 8:33 + dataflow_sources: + - 49 + - 62 children: - type: expression_statement id: 49 @@ -222,7 +231,6 @@ children: id: 50 range: 7:9 - 7:33 dataflow_sources: - - 51 - 55 children: - type: attribute @@ -289,7 +297,7 @@ children: id: 63 range: 8:9 - 8:33 dataflow_sources: - - 64 + - 0 - 65 children: - type: identifier @@ -311,7 +319,6 @@ children: id: 67 range: 8:15 - 8:32 dataflow_sources: - - 68 - 75 children: - type: attribute diff --git a/pkg/languages/python/detectors/.snapshots/TestPythonObjects-object_no_class b/pkg/languages/python/detectors/.snapshots/TestPythonObjects-object_no_class index b353d8e5b..dd26a6c6e 100644 --- a/pkg/languages/python/detectors/.snapshots/TestPythonObjects-object_no_class +++ b/pkg/languages/python/detectors/.snapshots/TestPythonObjects-object_no_class @@ -14,7 +14,6 @@ children: id: 2 range: 1:1 - 1:12 dataflow_sources: - - 3 - 7 children: - type: attribute diff --git a/pkg/languages/python/detectors/.snapshots/TestPythonString-string b/pkg/languages/python/detectors/.snapshots/TestPythonString-string index 1e7437132..95506b59e 100644 --- a/pkg/languages/python/detectors/.snapshots/TestPythonString-string +++ b/pkg/languages/python/detectors/.snapshots/TestPythonString-string @@ -23,6 +23,9 @@ children: - type: block id: 5 range: 2:5 - 12:31 + dataflow_sources: + - 6 + - 13 children: - type: expression_statement id: 6 @@ -89,6 +92,13 @@ children: - type: block id: 21 range: 5:9 - 12:31 + dataflow_sources: + - 22 + - 35 + - 42 + - 49 + - 58 + - 65 children: - type: expression_statement id: 22 diff --git a/pkg/languages/python/testdata/flow/flow.py b/pkg/languages/python/testdata/flow/flow.py index c196f15b8..0063476c1 100644 --- a/pkg/languages/python/testdata/flow/flow.py +++ b/pkg/languages/python/testdata/flow/flow.py @@ -1,10 +1,22 @@ def with_statement(): with source() as value, other: cursor_sink(value) - + def for_statement(): for value in source(): result_sink(value) cursor_sink(value) # no match +def reflexive_methods(): + s = source() + x = s.format("hello") + result_sink(x) + cursor_sink(x) # no match + +def non_reflexive_methods(): + s = source() + x = s.my_method("hello") + result_sink(x) # no match + cursor_sink(x) # no match + cursor_sink(value) # no match