diff --git a/codyze-compliance/src/integrationTest/kotlin/de/fraunhofer/aisec/codyze/compliance/CommandTest.kt b/codyze-compliance/src/integrationTest/kotlin/de/fraunhofer/aisec/codyze/compliance/CommandTest.kt index 6128a7b2f13..28b47c9922d 100644 --- a/codyze-compliance/src/integrationTest/kotlin/de/fraunhofer/aisec/codyze/compliance/CommandTest.kt +++ b/codyze-compliance/src/integrationTest/kotlin/de/fraunhofer/aisec/codyze/compliance/CommandTest.kt @@ -38,7 +38,8 @@ class CommandIntegrationTest { "--project-dir src/integrationTest/resources/demo-app --components webapp --components auth" ) assertEquals( - "Message(arguments=null, id=null, markdown=This is a **finding**, properties=null, text=null)\n", + "Message(arguments=null, id=null, markdown=null, properties=null, text=Query was successful)\n" + + "Message(arguments=null, id=null, markdown=null, properties=null, text=Query was successful)\n", result.output, ) } diff --git a/codyze-compliance/src/integrationTest/kotlin/de/fraunhofer/aisec/codyze/compliance/SarifTest.kt b/codyze-compliance/src/integrationTest/kotlin/de/fraunhofer/aisec/codyze/compliance/SarifTest.kt new file mode 100644 index 00000000000..2e448cd6143 --- /dev/null +++ b/codyze-compliance/src/integrationTest/kotlin/de/fraunhofer/aisec/codyze/compliance/SarifTest.kt @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2025, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.codyze.compliance + +import de.fraunhofer.aisec.codyze.AnalysisProject +import de.fraunhofer.aisec.cpg.graph.* +import kotlin.io.path.Path +import kotlin.io.path.createTempFile +import kotlin.test.Test +import kotlin.test.assertNotNull +import kotlin.test.assertTrue + +class SarifTest { + @Test + fun testSarifFindings() { + val project = + AnalysisProject.from( + projectDir = Path("src/integrationTest/resources/demo-app"), + components = listOf("webapp"), + ) + + val result = project.analyzeWithGoals() + val tr = result.translationResult + val webappMain = tr.namespaces["webapp.main"] + assertNotNull(webappMain) + + val tmpFile = createTempFile(prefix = "findings", suffix = ".sarif").toFile() + result.writeSarifJson(tmpFile) + + assertTrue(tmpFile.length() > 0) + tmpFile.delete() + } +} diff --git a/codyze-compliance/src/integrationTest/resources/demo-app/components/auth/main.py b/codyze-compliance/src/integrationTest/resources/demo-app/components/auth/auth/main.py similarity index 100% rename from codyze-compliance/src/integrationTest/resources/demo-app/components/auth/main.py rename to codyze-compliance/src/integrationTest/resources/demo-app/components/auth/auth/main.py diff --git a/codyze-compliance/src/integrationTest/resources/demo-app/components/webapp/main.py b/codyze-compliance/src/integrationTest/resources/demo-app/components/webapp/main.py deleted file mode 100644 index 5388f2e573e..00000000000 --- a/codyze-compliance/src/integrationTest/resources/demo-app/components/webapp/main.py +++ /dev/null @@ -1,4 +0,0 @@ -print("Hello World") - -def encrypt(): - return very_good_encryption() diff --git a/codyze-compliance/src/integrationTest/resources/demo-app/components/webapp/webapp/main.py b/codyze-compliance/src/integrationTest/resources/demo-app/components/webapp/webapp/main.py new file mode 100644 index 00000000000..c6bb501b55c --- /dev/null +++ b/codyze-compliance/src/integrationTest/resources/demo-app/components/webapp/webapp/main.py @@ -0,0 +1,28 @@ +""" +Simulates the execution of a command line tool +""" +def execute(command, *args, stdin=None): + pass + +""" +Simulates the retrieval of a secret from a server +""" +def get_secret_from_server() -> str: + pass + + +def encrypt(): + my_secret = get_secret_from_server() + execute("encrypt", + "--very-good", + stdin=my_secret) + del my_secret + return + +def decrypt(): + my_secret = get_secret_from_server() + execute("decrypt", + "--very-good", + stdin=my_secret) + del my_secret + return diff --git a/codyze-compliance/src/integrationTest/resources/demo-app/queries/good-encryption.query.kts b/codyze-compliance/src/integrationTest/resources/demo-app/queries/good-encryption.query.kts deleted file mode 100644 index 7d05f0e4a93..00000000000 --- a/codyze-compliance/src/integrationTest/resources/demo-app/queries/good-encryption.query.kts +++ /dev/null @@ -1,16 +0,0 @@ -import de.fraunhofer.aisec.cpg.TranslationResult -import de.fraunhofer.aisec.cpg.graph.calls -import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration -import de.fraunhofer.aisec.cpg.query.QueryTree -import de.fraunhofer.aisec.cpg.query.allExtended - -fun statement1(tr: TranslationResult): QueryTree { - val result = tr.allExtended(sel = { - it.name.localName.contains("encrypt") && !it.isInferred - }) { - QueryTree(it.calls.any { - it.name.contains("very_good") - }) - } - return result -} \ No newline at end of file diff --git a/codyze-compliance/src/integrationTest/resources/demo-app/queries/proper-handling-of-key-material.query.kts b/codyze-compliance/src/integrationTest/resources/demo-app/queries/proper-handling-of-key-material.query.kts new file mode 100644 index 00000000000..5a54ca3b6b6 --- /dev/null +++ b/codyze-compliance/src/integrationTest/resources/demo-app/queries/proper-handling-of-key-material.query.kts @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2025, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +import de.fraunhofer.aisec.cpg.TranslationResult +import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.graph.edges.* +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.DeleteExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference +import de.fraunhofer.aisec.cpg.query.QueryTree +import de.fraunhofer.aisec.cpg.query.allExtended +import de.fraunhofer.aisec.cpg.query.executionPath + +fun statement1(tr: TranslationResult): QueryTree { + val result = + tr.allExtended( + sel = { + it.name.toString() == "execute" && + it.arguments[0].evaluate() in listOf("encrypt", "decrypt") + } + ) { + val processInput = it.argumentEdges["stdin"]?.end + if (processInput == null) { + QueryTree(true) + } else { + executionPath(processInput) { to -> + to is DeleteExpression && + to.operands.any { + it is Reference && it.refersTo == (processInput as? Reference)?.refersTo + } + } + } + } + + return result +} diff --git a/codyze-compliance/src/integrationTest/resources/demo-app/security-goals/goal1.yaml b/codyze-compliance/src/integrationTest/resources/demo-app/security-goals/goal1.yaml index de0f02ad068..940e6a44b39 100644 --- a/codyze-compliance/src/integrationTest/resources/demo-app/security-goals/goal1.yaml +++ b/codyze-compliance/src/integrationTest/resources/demo-app/security-goals/goal1.yaml @@ -6,7 +6,7 @@ components: assumptions: - Third party code is very good objectives: - - name: Good encryption - description: Encryption used is very good + - name: Proper handling of key material + description: Sensitive material, such as keys are handled properly statements: - - For each algorithm A, if A is used, then A must be a very good cryptographic algorithm + - For each key K, if K is used in encryption or decryption, it must be deleted after use diff --git a/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/Command.kt b/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/Command.kt index 01d79849338..998f4fbad30 100644 --- a/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/Command.kt +++ b/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/Command.kt @@ -40,58 +40,23 @@ class ComplianceCommand : CliktCommand() { * all commands. */ abstract class ProjectCommand : CliktCommand() { - private val projectOptions by ProjectOptions() - private val translationOptions by TranslationOptions() - - /** Loads the security goals from the project. */ - fun loadSecurityGoals(): List { - return loadSecurityGoals(projectOptions.directory.resolve("security-goals")) - } - - /** - * This method is called by the `run` method to perform the actual analysis. It is separated to - * allow for easier access from overriding applications. - */ - protected fun analyze(): AnalysisResult { - // Load the security goals from the project - val goals = loadSecurityGoals(projectOptions.directory.resolve("security-goals")) - - // Analyze the project - val project = AnalysisProject.fromOptions(projectOptions, translationOptions) - val result = project.analyze() - val tr = result.translationResult - - // Connect the security goals to the translation result for now. Later we will add them to - // individual concepts - for (goal in goals) { - goal.underlyingNode = tr - - // Load and execute queries associated to the goals - for (objective in goal.objectives) { - objective.underlyingNode = tr - - val scriptFile = - projectOptions.directory - .resolve("queries") - .resolve( - "${objective.name.localName.lowercase().replace(" ", "-")}.query.kts" - ) - for (stmt in objective.statements.withIndex()) { - tr.evalQuery(scriptFile.toFile(), "statement${stmt.index + 1}") - } - } - } - - return result - } + protected val projectOptions by ProjectOptions() + protected val translationOptions by TranslationOptions() } /** The `scan` command. This will scan the project for compliance violations in the future. */ open class ScanCommand : ProjectCommand() { override fun run() { - val result = analyze() + val project = + AnalysisProject.fromOptions(projectOptions, translationOptions) { + // just to show that we can use a config build here + it + } + val result = project.analyzeWithGoals() - result.run.results?.forEach { echo(it.message) } + result.sarif.runs.forEach { run -> + run.results?.forEach { result -> echo(result.message.toString()) } + } } } @@ -104,7 +69,9 @@ open class ScanCommand : ProjectCommand() { */ class ListSecurityGoals : ProjectCommand() { override fun run() { - val goals = loadSecurityGoals() + val project = AnalysisProject.fromOptions(projectOptions, translationOptions) + val goals = project.loadSecurityGoals() + // Print the name of each security goal goals.forEach { echo(it.name.localName) } } diff --git a/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/ProjectExtension.kt b/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/ProjectExtension.kt new file mode 100644 index 00000000000..19d66d68c3d --- /dev/null +++ b/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/ProjectExtension.kt @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2025, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.codyze.compliance + +import de.fraunhofer.aisec.codyze.* +import de.fraunhofer.aisec.cpg.TranslationResult +import io.github.detekt.sarif4k.MultiformatMessageString +import io.github.detekt.sarif4k.ReportingDescriptor +import io.github.detekt.sarif4k.Result + +/** Loads the security goals from the project directory. */ +fun AnalysisProject.loadSecurityGoals(): List { + return securityGoalsFolder?.let { loadSecurityGoals(it) } ?: listOf() +} + +/** + * Extends the regular [AnalysisProject.analyze] method with the ability to load security goals and + * execute queries based on them. + */ +fun AnalysisProject.analyzeWithGoals(): AnalysisResult { + return this.analyze(postProcess = ::executeSecurityGoalsQueries) +} + +/** + * Executes the security goals queries and returns the security goals as SARIF rules and the query + * results as SARIF results. + */ +fun AnalysisProject.executeSecurityGoalsQueries( + tr: TranslationResult +): Pair, List> { + val rules = mutableListOf() + val results = mutableListOf() + val goals = loadSecurityGoals() + + // Connect the security goals to the translation result for now. Later we will add them + // to individual concepts + for (goal in goals) { + goal.underlyingNode = tr + + // Load and execute queries associated to the goals + for (objective in goal.objectives) { + val objectiveID = objective.name.localName.lowercase().replace(" ", "-") + objective.underlyingNode = tr + + projectDir?.let { + val scriptFile = it.resolve("queries").resolve("${objectiveID}.query.kts") + for (stmt in objective.statements.withIndex()) { + val idx1 = stmt.index + 1 + val statementID = "statement${idx1}" + val rule = + ReportingDescriptor( + id = "${objectiveID}-${statementID}", + name = "${objective.name.localName}: Statement $idx1", + shortDescription = MultiformatMessageString(text = stmt.value), + ) + val queryResult = tr.evalQuery(scriptFile.toFile(), statementID, rule.id) + results += queryResult.sarif + + rules += rule + } + } + } + } + + return Pair(rules, results) +} diff --git a/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/SecurityGoal.kt b/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/SecurityGoal.kt index 7037bfcdf77..5ea7c9b4c2d 100644 --- a/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/SecurityGoal.kt +++ b/codyze-compliance/src/main/kotlin/de/fraunhofer/aisec/codyze/compliance/SecurityGoal.kt @@ -132,8 +132,8 @@ fun loadSecurityGoal(stream: InputStream, result: TranslationResult? = null): Se } /** - * This function returns a [Yaml] instance that is configured to use the given [result] to resolve - * components. + * This function returns a [com.charleskorn.kaml.Yaml] instance that is configured to use the given + * [result] to resolve components. */ private fun yaml(result: TranslationResult?): Yaml { val module = SerializersModule { contextual(Component::class, ComponentSerializer(result)) } diff --git a/codyze-core/src/integrationTest/kotlin/codyze/QueryHostTest.kt b/codyze-core/src/integrationTest/kotlin/codyze/QueryHostTest.kt index 2c3c9f15dad..59dd22cf4e8 100644 --- a/codyze-core/src/integrationTest/kotlin/codyze/QueryHostTest.kt +++ b/codyze-core/src/integrationTest/kotlin/codyze/QueryHostTest.kt @@ -28,13 +28,11 @@ package codyze import de.fraunhofer.aisec.codyze.evalQuery import de.fraunhofer.aisec.cpg.frontends.python.PythonLanguage import de.fraunhofer.aisec.cpg.graph.* -import de.fraunhofer.aisec.cpg.query.QueryTree import de.fraunhofer.aisec.cpg.test.analyze import java.io.File import kotlin.io.path.Path import kotlin.test.Test import kotlin.test.assertEquals -import kotlin.test.assertNotNull class QueryHostTest { @Test @@ -44,10 +42,12 @@ class QueryHostTest { analyze(listOf(topLevel.resolve("simple.py").toFile()), topLevel, true) { it.registerLanguage() } - val evalResult = - result.evalQuery(File("src/integrationTest/resources/simple.query.kts"), "statement1") - val queryTree = evalResult as? QueryTree<*> - assertNotNull(queryTree) - assertEquals(true, queryTree.value) + val queryResult = + result.evalQuery( + File("src/integrationTest/resources/simple.query.kts"), + "statement1", + "statement1", + ) + assertEquals(true, queryResult.tree.value) } } diff --git a/codyze-core/src/integrationTest/kotlin/codyze/SarifTest.kt b/codyze-core/src/integrationTest/kotlin/codyze/SarifTest.kt new file mode 100644 index 00000000000..c6550932fdd --- /dev/null +++ b/codyze-core/src/integrationTest/kotlin/codyze/SarifTest.kt @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2025, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package codyze + +import de.fraunhofer.aisec.codyze.toSarifLocation +import de.fraunhofer.aisec.cpg.frontends.python.PythonLanguage +import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.test.analyze +import kotlin.io.path.Path +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class SarifTest { + @Test + fun testSarifLocation() { + val topLevel = Path("src/integrationTest/resources") + val result = + analyze(listOf(topLevel.resolve("simple.py").toFile()), topLevel, true) { + it.registerLanguage() + } + val fullLoc = result.functions["foo"].toSarifLocation() + assertNotNull(fullLoc) + assertEquals(3, fullLoc.physicalLocation?.region?.endLine) + assertEquals(15, fullLoc.physicalLocation?.region?.endColumn) + + val logical = fullLoc.logicalLocations?.firstOrNull() + assertNotNull(logical) + assertEquals("foo", logical.name) + assertEquals("simple.foo", logical.fullyQualifiedName) + assertEquals("function", logical.kind) + + val onlyHeader = result.functions["foo"].toSarifLocation(onlyFunctionHeader = true) + assertNotNull(onlyHeader) + assertEquals(2, onlyHeader.physicalLocation?.region?.endLine) + assertEquals(5, onlyHeader.physicalLocation?.region?.endColumn) + } +} diff --git a/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/Project.kt b/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/Project.kt index 5f27cb4cb94..6279eb7808d 100644 --- a/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/Project.kt +++ b/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/Project.kt @@ -69,9 +69,13 @@ class TranslationOptions : OptionGroup("CPG Translation Options") { * Represents the result of the analysis. * * @param translationResult The result of the CPG translation. - * @param run The SARIF run object, that contains findings. + * @param sarif The SARIF object, that contains findings. */ -data class AnalysisResult(val translationResult: TranslationResult, val run: Run) +data class AnalysisResult(val translationResult: TranslationResult, val sarif: SarifSchema210) { + fun writeSarifJson(file: File) { + file.writeText(SarifSerializer.toJson(sarif)) + } +} /** * Represents an analysis project. This class is responsible for translating the project to a CPG @@ -97,33 +101,37 @@ class AnalysisProject( ) { /** Analyzes the project and returns the result. */ - fun analyze(): AnalysisResult { - // TODO(oxisto): Replace this mock run object with a real one later on. - // Currently, this is only to show that we support SARIF + fun analyze( + postProcess: ((TranslationResult) -> Pair, List>)? = null + ): AnalysisResult { + val tr = TranslationManager.builder().config(config).build().analyze().get() + val (rules, results) = postProcess?.invoke(tr) ?: Pair(emptyList(), emptyList()) + + // Create a new SARIF run, including a tool definition and rules corresponding to the + // individual security statements val run = Run( - tool = Tool(driver = ToolComponent(name = "Codyze", version = "x.x.x")), - results = - listOf( - Result( - rule = ReportingDescriptorReference(id = "Rule1"), - message = Message(markdown = "This is a **finding**"), - level = Level.Note, - locations = listOf(), - ) - ), + tool = + Tool(driver = ToolComponent(name = "Codyze", version = "x.x.x", rules = rules)), + results = results, ) - val result = TranslationManager.builder().config(config).build().analyze().get() - - return AnalysisResult(run = run, translationResult = result) + return AnalysisResult( + translationResult = tr, + sarif = SarifSchema210(version = Version.The210, runs = listOf(run)), + ) } companion object { - /** Builds a translation configuration from the given CLI options. */ - fun fromOptions( - projectOptions: ProjectOptions, - translationOptions: TranslationOptions, + /** Builds a translation configuration from the given project directory. */ + fun from( + projectDir: Path, + sources: List? = null, + components: List? = null, + exclusionPatterns: List? = null, + configBuilder: + ((TranslationConfiguration.Builder) -> TranslationConfiguration.Builder)? = + null, ): AnalysisProject { var builder = TranslationConfiguration.builder() @@ -143,18 +151,21 @@ class AnalysisProject( // We can either have a single source (using --sources) or multiple components (using // --components) - translationOptions.sources?.let { + sources?.let { builder = builder .sourceLocations(it.map { source -> source.toFile() }) - .topLevel(projectOptions.directory.toFile()) + .topLevel(projectDir.toFile()) } - translationOptions.components?.let { - val componentDir = projectOptions.directory.toFile().resolve("components") + components?.let { + val componentDir = projectDir.resolve("components") val pairs = it.map { component -> - Pair(component, mutableListOf(componentDir.resolve(component))) + Pair( + component, + mutableListOf(componentDir.resolve(component).toFile()), + ) } builder = builder @@ -171,17 +182,33 @@ class AnalysisProject( } .toMutableMap() ) - .topLevel(componentDir) + .topLevels(it.associate { Pair(it, componentDir.resolve(it).toFile()) }) } - translationOptions.exclusionPatterns?.forEach { - builder = builder.exclusionPatterns(it) - } + exclusionPatterns?.forEach { builder = builder.exclusionPatterns(it) } + configBuilder?.invoke(builder) return AnalysisProject( config = builder.build(), - name = projectOptions.directory.fileName.toString(), - projectDir = projectOptions.directory, + name = projectDir.fileName.toString(), + projectDir = projectDir, + ) + } + + /** Builds a translation configuration from the given CLI options. */ + fun fromOptions( + projectOptions: ProjectOptions, + translationOptions: TranslationOptions, + configModifier: + ((TranslationConfiguration.Builder) -> TranslationConfiguration.Builder)? = + null, + ): AnalysisProject { + return from( + projectOptions.directory, + translationOptions.sources, + translationOptions.components, + translationOptions.exclusionPatterns, + configModifier, ) } } diff --git a/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/QueryHost.kt b/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/QueryHost.kt index 5f5f4194e47..f62eca7a1c4 100644 --- a/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/QueryHost.kt +++ b/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/QueryHost.kt @@ -27,6 +27,8 @@ package de.fraunhofer.aisec.codyze import de.fraunhofer.aisec.cpg.TranslationResult import de.fraunhofer.aisec.cpg.helpers.Benchmark +import de.fraunhofer.aisec.cpg.query.QueryTree +import io.github.detekt.sarif4k.Result import java.io.File import kotlin.reflect.full.functions import kotlin.script.experimental.api.* @@ -35,6 +37,8 @@ import kotlin.script.experimental.jvmhost.BasicJvmScriptingHost import kotlin.script.experimental.jvmhost.createJvmCompilationConfigurationFromTemplate import kotlin.script.experimental.jvmhost.createJvmEvaluationConfigurationFromTemplate +data class QueryResult(val tree: QueryTree, val sarif: List) + /** * Evaluates a query script with the given query function name on the [TranslationResult]. It uses * the [BasicJvmScriptingHost] to execute the script. The function must be defined in the script @@ -44,7 +48,7 @@ import kotlin.script.experimental.jvmhost.createJvmEvaluationConfigurationFromTe * @param queryFunc The name of the query function to call * @return The result of the query function */ -fun TranslationResult.evalQuery(scriptFile: File, queryFunc: String): Any? { +fun TranslationResult.evalQuery(scriptFile: File, queryFunc: String, ruleID: String): QueryResult { var b = Benchmark(TranslationResult::class.java, "Compiling query script $scriptFile") val compilationConfiguration = createJvmCompilationConfigurationFromTemplate() val evaluationConfiguration = createJvmEvaluationConfigurationFromTemplate() @@ -59,10 +63,21 @@ fun TranslationResult.evalQuery(scriptFile: File, queryFunc: String): Any? { if (func == null) { throw IllegalArgumentException("Query function $queryFunc not found in script") } + + // Check, if the return type is correct + if ( + func.returnType.classifier != QueryTree::class || + func.returnType.arguments.firstOrNull()?.type?.classifier != Boolean::class + ) { + throw IllegalArgumentException("Query function $queryFunc must return a QueryTree") + } b.stop() b = Benchmark(TranslationResult::class.java, "Executing query function $queryFunc") - val ret = func.call(value.returnValue.scriptInstance, this) + @Suppress("UNCHECKED_CAST") + val ret = func.call(value.returnValue.scriptInstance, this) as QueryTree + val res = QueryResult(ret, ret.toSarif(ruleID)) b.stop() - return ret + + return res } diff --git a/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/QueryScriptDefinition.kt b/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/QueryScriptDefinition.kt index 737d7124f73..412b43f25ec 100644 --- a/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/QueryScriptDefinition.kt +++ b/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/QueryScriptDefinition.kt @@ -61,7 +61,14 @@ object QueryScriptConfiguration : baseClass(QueryScript::class) jvm { val libraries = - setOf("codyze-core", "cpg-core", "cpg-analysis", "kotlin-stdlib", "kotlin-reflect") + setOf( + "codyze-core", + "cpg-core", + "cpg-concepts", + "cpg-analysis", + "kotlin-stdlib", + "kotlin-reflect", + ) val cp = classpathFromClassloader(QueryScript::class.java.classLoader) checkNotNull(cp) { "Could not read classpath" } updateClasspath(cp.filter { element -> libraries.any { it in element.toString() } }) diff --git a/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/Sarif.kt b/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/Sarif.kt new file mode 100644 index 00000000000..109e3d43ece --- /dev/null +++ b/codyze-core/src/main/kotlin/de/fraunhofer/aisec/codyze/Sarif.kt @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2025, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.codyze + +import de.fraunhofer.aisec.cpg.graph.Node +import de.fraunhofer.aisec.cpg.graph.declarations.Declaration +import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.NamespaceDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.ParameterDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration +import de.fraunhofer.aisec.cpg.graph.firstParentOrNull +import de.fraunhofer.aisec.cpg.graph.types.Type +import de.fraunhofer.aisec.cpg.query.QueryTree +import io.github.detekt.sarif4k.* + +/** + * Converts a [QueryTree] to a list of [Result]s. This expects that the query tree is of type + * [Boolean] and that the [QueryTree.children] represent the individual findings. + */ +fun QueryTree.toSarif(ruleID: String): List { + return this.children.map { + Result( + ruleID = ruleID, + message = + if (this.value) { + Message(text = "Query was successful") + } else { + Message(text = "Query failed") + }, + level = + if (this.value) { + Level.None + } else { + Level.Error + }, + kind = + if (this.value) { + ResultKind.Pass + } else { + ResultKind.Fail + }, + locations = listOfNotNull(it.node?.toSarifLocation()), + stacks = it.node?.toSarifCallStack(), + codeFlows = + it.children.mapNotNull { child -> + if (child.value is List<*>) { + CodeFlow( + threadFlows = + listOf( + ThreadFlow( + message = Message(text = "Thread flow"), + locations = + (child.value as List<*>) + .filterIsInstance() + .toSarifThreadFlowLocation(), + ) + ) + ) + } else { + null + } + }, + ) + } +} + +/** + * Converts a [Node] into a [Message] for SARIF output. Currently, this is a short representation of + * the node type and node. In the future, we want to include a brief description of any eventual + * overlay nodes that better describe the node semantically. + */ +private fun Node?.toSarifMessage(): Message? { + return this?.let { Message(text = "${it.javaClass.simpleName}[name=${it.name}]") } +} + +/** + * Converts a [Node] into a [Stack] for SARIF output. Currently, this is a single stack frame with + * the location of the node. In the future, we want to include a call stack that leads to the node's + * current function. + */ +private fun Node.toSarifCallStack(): List { + val currentFunc = this.firstParentOrNull { it is FunctionDeclaration } + return listOf( + Stack( + message = Message(text = "Stack"), + frames = + listOf( + StackFrame( + location = this.toSarifLocation(message = currentFunc.toSarifMessage()) + ) + ), + ) + ) +} + +/** Converts a list of [Node]s into a list of [ThreadFlowLocation]s for SARIF output. */ +private fun List?.toSarifThreadFlowLocation(): List { + return this?.mapIndexed { idx, node -> + ThreadFlowLocation( + location = + node.toSarifLocation(message = node.toSarifMessage(), onlyFunctionHeader = true), + executionOrder = idx.toLong(), + ) + } ?: listOf() +} + +/** Converts a [Node.location] to a [Location]. */ +fun Node?.toSarifLocation( + message: Message? = this.toSarifMessage(), + /** + * If this option is set to true, we only emit the location of the function header, not the + * entire function body. + * + * This is helpful for cases where we want to highlight the function declaration in the code + * editor, but not the entire function body. + */ + onlyFunctionHeader: Boolean = false, +): Location? { + val location = this?.location ?: return null + + return if (this is FunctionDeclaration && this.body != null && onlyFunctionHeader) { + // Try to calculate the end of the header by using the beginning of the body. This is + // not entirely correct since in some programming languages we need to start the body + // location at the first statement, since we are missing location information for the + // body "block", but it's the best we can do + de.fraunhofer.aisec.cpg.sarif.PhysicalLocation( + uri = location.artifactLocation.uri, + region = + de.fraunhofer.aisec.cpg.sarif.Region( + startLine = location.region.startLine, + startColumn = location.region.startColumn, + endLine = this.body?.location?.region?.startLine ?: location.region.endLine, + endColumn = + this.body?.location?.region?.startColumn ?: location.region.endColumn, + ), + ) + } else { + this.location + } + ?.let { + Location( + physicalLocation = it.toSarif(), + logicalLocations = + listOf( + LogicalLocation( + fullyQualifiedName = + if (this is Declaration) this.name.toString() else null, + name = this.name.localName, + kind = this.toSarifKind(), + ) + ), + message = message, + ) + } +} + +/** Converts a [de.fraunhofer.aisec.cpg.sarif.PhysicalLocation] to a [PhysicalLocation]. */ +fun de.fraunhofer.aisec.cpg.sarif.PhysicalLocation.toSarif(): PhysicalLocation { + return PhysicalLocation( + artifactLocation = ArtifactLocation(uri = "file://${this.artifactLocation.uri.path}"), + region = + Region( + startLine = this.region.startLine.toLong(), + startColumn = this.region.startColumn.toLong(), + endLine = this.region.endLine.toLong(), + endColumn = this.region.endColumn.toLong(), + ), + ) +} + +/** + * Converts a [Node] to a well-known SARIF kind. This is used to categorize the node in the SARIF + * output. + */ +private fun Node.toSarifKind(): String? { + return when (this) { + is FunctionDeclaration -> "function" + is FieldDeclaration -> "member" + is TranslationUnitDeclaration -> "module" + is NamespaceDeclaration -> "namespace" + is ParameterDeclaration -> "parameter" + is VariableDeclaration -> "variable" + is Type -> "type" + else -> null + } +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt index 6028ce89994..a17a6e4ec47 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt @@ -285,7 +285,7 @@ fun Node.followPrevDFGEdgesUntilHit( // from). // We try to pop from the stack and only select the elements with the // matching index. - ctx.indexStack.popIfOnTop( + ctx.indexStack.checkAndPop( it.granularity as IndexedDataflowGranularity ) == true } else { @@ -325,35 +325,26 @@ fun Node.followPrevDFGEdgesUntilHit( */ class Context( val indexStack: SimpleStack = SimpleStack(), - val callStack: SimpleStack = SimpleStack(), + val callStack: ArrayDeque = ArrayDeque(), ) { fun clone(): Context { - return Context(indexStack.clone(), callStack.clone()) + return Context(indexStack.clone(), ArrayDeque(callStack)) } } -/** Implementation of a simple stack, based on an [ArrayDeque] */ -class SimpleStack { +class SimpleStack() { private val deque = ArrayDeque() - /** Returns true if the stack is empty. */ fun isEmpty(): Boolean = deque.isEmpty() - /** Pushes a new element onto the stack. */ fun push(newElem: T) { deque.addFirst(newElem) } - /** Returns the top element from the stack, without popping it. */ - val top: T? + val current: T? get() = deque.firstOrNull() - /** - * Pops the top element from the stack, if [elemToPop] is the top element. - * - * @return true if the element was popped, false otherwise - */ - fun popIfOnTop(elemToPop: T): Boolean { + fun checkAndPop(elemToPop: T): Boolean { if (deque.firstOrNull() == elemToPop) { deque.removeFirst() return true @@ -361,7 +352,6 @@ class SimpleStack { return false } - /** Clones the stack. */ fun clone(): SimpleStack { return SimpleStack().apply { deque.addAll(this@SimpleStack.deque) } } @@ -635,7 +625,7 @@ inline fun Node.followXUntilHit( alreadySeenNodes.add(currentNode) // The last node of the path is where we continue. We get all of its outgoing CDG edges and // follow them - val nextNodes = x(currentNode, currentContext, currentPath.first) + var nextNodes = x(currentNode, currentContext, currentPath.first) // No further nodes in the path and the path criteria are not satisfied. if (nextNodes.isEmpty() && collectFailedPaths) failedPaths.add(currentPath.first) @@ -709,11 +699,11 @@ fun Node.followNextDFGEdgesUntilHit( predicate: (Node) -> Boolean, ): FulfilledAndFailedPaths { return followXUntilHit( - x = { currentNode, ctx, _ -> + x = { currentNode, ctx, path -> if ( useIndexStack && currentNode is InitializerListExpression && - !ctx.indexStack.isEmpty() + ctx.indexStack.isEmpty() != true ) { // There's something on the stack. Get the relevant parts currentNode.nextDFGEdges @@ -726,7 +716,9 @@ fun Node.followNextDFGEdgesUntilHit( // from). // We try to pop from the stack and only select the elements with the // matching index. - ctx.indexStack.popIfOnTop(it.granularity as IndexedDataflowGranularity) + ctx.indexStack.checkAndPop( + it.granularity as IndexedDataflowGranularity + ) == true } else { true } @@ -738,7 +730,7 @@ fun Node.followNextDFGEdgesUntilHit( currentNode.nextDFGEdges.forEach { if (it is ContextSensitiveDataflow && it.callingContext is CallingContextIn) { // Push the call of our calling context to the stack - ctx.callStack.push(it.callingContext.call) + ctx.callStack.addFirst(it.callingContext.call) } if ( it.end is InitializerListExpression && @@ -762,7 +754,7 @@ fun Node.followNextDFGEdgesUntilHit( ) { // We are only interested in outgoing edges from our current // "call-in", i.e., the call expression that is on the stack. - ctx.callStack.top == it.callingContext.call + ctx.callStack.firstOrNull() == it.callingContext.call } else { true } @@ -773,7 +765,7 @@ fun Node.followNextDFGEdgesUntilHit( currentNode.nextDFGEdges.forEach { if (it is ContextSensitiveDataflow && it.callingContext is CallingContextOut) { // Pop the current call, if it's on top - ctx.callStack.popIfOnTop(it.callingContext.call) + ctx.callStack.removeIfFirst(it.callingContext.call) } } @@ -1315,3 +1307,12 @@ val Expression.isImported: Boolean get() { return this.importedFrom.isNotEmpty() } + +private fun ArrayDeque.removeIfFirst(element: T): Boolean { + return if (firstOrNull() == element) { + removeFirst() + true + } else { + false + } +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index 6227be7e0df..5e330a03492 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -160,21 +160,12 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } /** Caches all TemplateDeclarations in [templateList] */ - protected fun findTemplates(node: Node?) { + private fun findTemplates(node: Node?) { if (node is TemplateDeclaration) { templateList.add(node) } } - /** - * Determines if the [reference] refers to the super class, and we have to start searching - * there. - */ - protected fun isSuperclassReference(reference: Reference): Boolean { - val language = reference.language - return language is HasSuperClasses && reference.name.endsWith(language.superClassKeyword) - } - /** * This function handles symbol resolving for a [Reference]. After a successful lookup of the * symbol contained in [Reference.name], the property [Reference.refersTo] is set to the best @@ -197,7 +188,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { * once the EOG reaches the appropriate [CallExpression] (which should actually be just be the * next EOG node). */ - protected fun handleReference(currentClass: RecordDeclaration?, ref: Reference) { + protected open fun handleReference(currentClass: RecordDeclaration?, ref: Reference) { val language = ref.language val helperType = ref.resolutionHelper?.type @@ -233,32 +224,50 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { eogPredicate } - // Find a list of candidate symbols. Currently, this is only used the in the "next-gen" call - // resolution, but in future this will also be used in resolving regular references. - ref.candidates = scopeManager.lookupSymbolByNodeName(ref, predicate = predicate).toSet() + // Find a list of candidate symbols. In most cases, we can just perform a lookup by name + // which either performs an unqualified lookup beginning from the current scope "up-wards", + // or a qualified lookup starting from the scope specified in the name. + var candidates = scopeManager.lookupSymbolByNodeName(ref, predicate = predicate).toSet() + + // But we have to consider one special case: For languages, that support implicit receivers, + // this reference might be a member access of either the current class or a parent class. + // While a regular lookup would only consider the current scope, we have to consider the + // parent classes as well, which is exactly what resolveMemberByName does. We could probably + // get around this if we would include the symbols of the parent class somehow in the child + // class as a sort of "sibling" scope, but we do not have that (yet). + if ( + language is HasImplicitReceiver && + candidates.isEmpty() && + !ref.name.isQualified() && + currentClass != null + ) { + candidates = + resolveMemberByName(ref.name.localName, setOf(currentClass.toType())).toSet() + } + + // Store the candidates in the reference + ref.candidates = candidates // We need to choose the best viable candidate out of the ones we have for our reference. // Hopefully we have only one, but there might be instances where more than one is a valid // candidate. We let the language have a chance at overriding the default behaviour (which // takes only a single one). - var wouldResolveTo = language.bestViableReferenceCandidate(ref) - - // For now, we need to ignore reference expressions that are directly embedded into call - // expressions, because they are the "callee" property. In the future, we will use this - // property to actually resolve the function call. However, there is a special case that - // we want to catch already, that is if we are "calling" a reference to a variable. This - // can be done in several languages, e.g., in C/C++ as function pointers or in Go as - // function references. In this case, we want to resolve the declared reference expression - // of this call expression back to its original variable declaration. In the future, we want - // to extend this particular code to resolve all callee references to their declarations, - // i.e., their function definitions and get rid of the separate CallResolver. - if (ref.resolutionHelper is CallExpression) { - // Peek into the declaration, and if it is only one declaration and a variable, we can - // proceed normally, as we are running into the special case explained above. Otherwise, - // we abort here (for now). - if (wouldResolveTo !is VariableDeclaration && wouldResolveTo !is ParameterDeclaration) { - return - } + val wouldResolveTo = language.bestViableReferenceCandidate(ref) + + // For now, we still separate the resolving of simple variable references from call + // resolving. Therefore, we need to stop here if we are the callee of a call and continue in + // handleCallExpression. + // + // However, there is a special case that we want to catch, that is if we are "calling" a + // reference to a variable (or parameter). This can be done in several languages, e.g., in + // C/C++ as function pointers or in Go as function references. In this case, we want to + // resolve the reference of this call expression back to its original declaration, and then + // we later continue in the DynamicInvokeResolver, which sets the invokes edge. + if ( + ref.resolutionHelper is CallExpression && + (wouldResolveTo !is VariableDeclaration && wouldResolveTo !is ParameterDeclaration) + ) { + return } // Only consider resolving, if the language frontend did not specify a resolution. If we @@ -266,14 +275,9 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // resolving again var refersTo = ref.refersTo ?: wouldResolveTo - var recordDeclType: Type? = null - if (currentClass != null) { - recordDeclType = currentClass.toType() - } - // If we did not resolve the reference up to this point, we can try to infer the declaration if (refersTo == null) { - // If its a function pointer, we can try to infer a function + // If it's a function pointer, we can try to infer a function refersTo = if (helperType is FunctionPointerType) { tryFunctionInferenceFromFunctionPointer(ref, helperType) @@ -290,10 +294,21 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } - protected fun handleMemberExpression(curClass: RecordDeclaration?, current: MemberExpression) { + /** + * This function handles resolving of a [MemberExpression] in the [curClass]. This works similar + * to [handleReference]. First, we set the [MemberExpression.candidates] based on + * [resolveMemberByName], which internally calls [ScopeManager.lookupSymbolByName] based on the + * current class and its parent classes. Then, if we resolve a [MemberCallExpression], we abort + * (and later pick up resolving in [handleCallExpression]). In case of a field access, we set + * the [MemberExpression.refersTo] based on [Language.bestViableReferenceCandidate]. + */ + protected open fun handleMemberExpression( + curClass: RecordDeclaration?, + current: MemberExpression, + ) { // Some locals for easier smart casting - var base = current.base - var language = current.language + val base = current.base + val language = current.language // We need to adjust certain types of the base in case of a "super" expression, and we // delegate this to the language. If that is successful, we can continue with regular @@ -307,30 +322,38 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { language.handleSuperExpression(current, curClass, scopeManager) } - // For legacy reasons, method and field resolving is split between the VariableUsageResolver - // and the CallResolver. Since we are trying to merge these two, the first step was to have - // the callee/member field of a MemberCallExpression set to a MemberExpression. This means - // however, that these will show up in this callback function. To not mess with legacy code - // (yet), we are ignoring all MemberExpressions whose parents are MemberCallExpressions in - // this function for now. - if (current.resolutionHelper is MemberCallExpression) { + // Handle a possible overloaded operator->. If we find an overloaded operator, this inserts + // an additional operator expression in-between the existing member expression and the base + // and also affects the base type. + val baseType = resolveOverloadedArrowOperator(current) ?: base.type.root + + // Find candidates based on possible base types + val (possibleTypes, _) = getPossibleContainingTypes(current) + current.candidates = resolveMemberByName(current.name.localName, possibleTypes).toSet() + + // For legacy reasons, resolving of simple variable references (including fields) is + // separated from call resolving. Therefore, we need to stop here if we are the callee of a + // member call and continue in handleCallExpression. But we can already make + // handleCallExpression a bit cleaner, if we set the candidates here, similar to what we do + // in handleReference. + val helper = current.resolutionHelper + if (helper is MemberCallExpression) { return } - if (base is Reference) { - // The base has been resolved by now. Maybe we have some other clue about - // this base from the type system, so we can set the declaration accordingly. - // TODO(oxisto): It is actually not really a good approach, but it is currently - // needed to make the java frontend happy, but this needs to be removed at some point - if (base.refersTo == null) { - base.refersTo = base.type.recordDeclaration - } - } + // We need to choose the best viable candidate out of the ones we have for our reference. + // Hopefully we have only one, but there might be instances where more than one is a valid + // candidate. We let the language have a chance at overriding the default behaviour (which + // takes only a single one). + val wouldResolveTo = language.bestViableReferenceCandidate(current) + + var refersTo = current.refersTo ?: wouldResolveTo - val baseType = base.type.root - if (baseType is ObjectType) { - current.refersTo = resolveMember(baseType, current) + if (refersTo == null && baseType is ObjectType) { + refersTo = tryFieldInference(current, baseType) } + + current.refersTo = refersTo } /** @@ -359,54 +382,15 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { ex.base = call } } - return type - } - - protected fun resolveMember( - containingClass: ObjectType, - reference: Reference, - ): ValueDeclaration? { - if (isSuperclassReference(reference)) { - // if we have a "super" on the member side, this is a member call. We need to resolve - // this in the call resolver instead - return null - } - var member: ValueDeclaration? = null - var type: Type = containingClass - // Handle a possible overloaded operator-> - type = resolveOverloadedArrowOperator(reference) ?: type - - val record = type.recordDeclaration - if (record != null) { - // TODO(oxisto): This should use symbols rather than the AST fields - member = - record.fields - .filter { it.name.lastPartsMatch(reference.name) } - .map { it.definition } - .firstOrNull() - } - if (member == null) { - member = - type.superTypes - .flatMap { it.recordDeclaration?.fields ?: listOf() } - .filter { it.name.localName == reference.name.localName } - .map { it.definition } - .firstOrNull() - } - - if (member == null && record is EnumDeclaration) { - member = record.entries[reference.name.localName] - } - - if (member == null) { - member = tryFieldInference(reference, containingClass) - } - - return member + return type } - protected fun handle(node: Node?, currClass: RecordDeclaration?) { + /** + * The central entry-point for all symbol-resolving. It dispatches the handling of the node to + * the appropriate function based on the node type. + */ + protected open fun handle(node: Node?, currClass: RecordDeclaration?) { when (node) { is MemberExpression -> handleMemberExpression(currClass, node) is Reference -> handleReference(currClass, node) @@ -416,7 +400,24 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } - protected fun handleCallExpression(call: CallExpression) { + /** + * This function handles the resolution of a [CallExpression] based on a list of candidates. The + * candidates are taken from [CallExpression.callee] which are set either in [handleReference] + * or [handleMemberExpression], depending on the type. + * + * In any case, the candidates are then resolved with the arguments of the call expression using + * [resolveWithArguments]. The result of this resolution is stored in [CallExpression.invokes] + * and depending on [CallResolutionResult.SuccessKind] are warning is emitted if resolution was + * erroneous or ambiguous. Furthermore, the [CallExpression.callee]'s [Reference.refersTo] is + * also set. + * + * If the resolution was unsuccessful, we try to infer the function based on the information + * provided in the [CallResolutionResult] and the [CallExpression]. This is done in + * [tryFunctionInference]. + * + * @param call The [CallExpression] to resolve. + */ + protected open fun handleCallExpression(call: CallExpression) { // Some local variables for easier smart casting val callee = call.callee val language = call.language @@ -431,15 +432,13 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return } - // Handle a possible overloaded operator-> - resolveOverloadedArrowOperator(callee) - - // Dynamic function invokes (such as function pointers) are handled by extra pass, so we are + // Dynamic function invokes (such as function pointers) are handled by an extra pass, so we + // are // not resolving them here. // // We have a dynamic invoke in two cases: // a) our calleee is not a reference - // b) our reference refers to a variable rather than a function + // b) our reference already refers to a variable rather than a function if ( callee !is Reference || callee.refersTo is VariableDeclaration || @@ -467,39 +466,8 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } - // We are moving towards a new approach to call resolution. However, we cannot use this for - // all nodes yet, so we need to use legacy resolution for some - val isSpecialCXXCase = - call.language.isCPP && scopeManager.currentRecord != null && !callee.name.isQualified() - val useLegacyResolution = - when { - isSpecialCXXCase -> true - call is MemberCallExpression -> true - else -> { - false - } - } - - // Retrieve a list of candidates; either from the "legacy" system or directly from our - // callee - var candidates = - if (useLegacyResolution) { - val (possibleTypes, _) = getPossibleContainingTypes(call) - resolveMemberByName(callee.name.localName, possibleTypes).toSet() - } else { - callee.candidates - } - - // There seems to be one more special case and that is a regular function within a record. - // This could either be a member call with an omitted "this" or a regular call. The problem - // is that the legacy system can now only resolve member calls but not regular calls - // (anymore). So if we have this special case and the legacy system does not return any - // candidates, we need to switch to the new system. - if (isSpecialCXXCase && candidates.isEmpty()) { - candidates = callee.candidates - } - - val result = resolveWithArguments(candidates, call.arguments, call) + // Try to resolve the best viable function based on the candidates and the arguments + val result = resolveWithArguments(callee.candidates, call.arguments, call) when (result.success) { PROBLEMATIC -> { log.error( @@ -612,7 +580,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return result } - protected fun resolveMemberByName( + private fun resolveMemberByName( symbol: String, possibleContainingTypes: Set, ): Set { @@ -641,7 +609,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return candidates } - protected fun handleConstructExpression(constructExpression: ConstructExpression) { + protected open fun handleConstructExpression(constructExpression: ConstructExpression) { if (constructExpression.instantiates != null && constructExpression.constructor != null) return val recordDeclaration = constructExpression.type.root.recordDeclaration @@ -684,7 +652,18 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } - private fun handleOverloadedOperator(op: HasOverloadedOperation) { + /** + * This function handles all nodes that have the [HasOverloadedOperation] trait. It tries to + * resolve the overloaded operator and replace the node with the resolved operator expression. + * + * Which overloads are possible, is depending on whether the language implements + * [HasOperatorOverloading] and can be specified in + * [HasOperatorOverloading.overloadedOperatorNames]. + * + * Internally, it takes the result of [resolveOperator] and if successful, replaces the node + * with the resolved [OperatorCallExpression]. + */ + protected open fun handleOverloadedOperator(op: HasOverloadedOperation) { val result = resolveOperator(op) val decl = result?.bestViable?.singleOrNull() ?: return @@ -695,6 +674,19 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } + /** + * This function tries to resolve an overloaded operator based on the + * [HasOverloadedOperation.operatorCode] of the [op] (if the [HasOverloadedOperation.language] + * allows it). It first lookups the corresponding symbol in the + * [HasOperatorOverloading.overloadedOperatorNames] of the language, for example `add` for a `+` + * operator. In then tries to find the matching method candidates in the base class of the [op] + * (using [resolveMemberByName]) and returns the result of the resolution. The base depends on + * the individual operator / expression and is specified in + * [HasOverloadedOperation.operatorBase]. + * + * Finally, the candidates are resolved with the arguments of the operator expression using + * [resolveWithArguments]. + */ private fun resolveOperator(op: HasOverloadedOperation): CallResolutionResult? { val language = op.language val base = op.operatorBase @@ -722,7 +714,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return resolveWithArguments(candidates, op.operatorArguments, op as Expression) } - protected fun getInvocationCandidatesFromParents( + private fun getInvocationCandidatesFromParents( name: Symbol, possibleTypes: Set, ): List { @@ -761,7 +753,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return this != null && this::class.simpleName == "CPPLanguage" } - protected fun getOverridingCandidates( + private fun getOverridingCandidates( possibleSubTypes: Set, declaration: FunctionDeclaration, ): Set { @@ -779,7 +771,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { * there is no valid ConstructDeclaration we will create an implicit ConstructDeclaration that * matches the ConstructExpression. */ - protected fun getConstructorDeclaration( + private fun getConstructorDeclaration( constructExpression: ConstructExpression, recordDeclaration: RecordDeclaration, ): ConstructorDeclaration? { @@ -825,19 +817,18 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } /** - * Returns a set of types in which the callee of our [call] could reside in. More concretely, it - * returns a [Pair], where the first element is the set of types and the second is our best guess. + * Returns a set of types in which the [CallExpression.callee] (which is a [Reference]) could reside + * in. More concretely, it returns a [Pair], where the first element is the set of types and the + * second is our best guess. */ -internal fun Pass<*>.getPossibleContainingTypes(call: CallExpression): Pair, Type?> { +internal fun Pass<*>.getPossibleContainingTypes(ref: Reference): Pair, Type?> { val possibleTypes = mutableSetOf() var bestGuess: Type? = null - if (call is MemberCallExpression) { - call.base?.let { base -> - bestGuess = base.type - possibleTypes.add(base.type) - possibleTypes.addAll(base.assignedTypes) - } - } else if (call.language is HasImplicitReceiver) { + if (ref is MemberExpression) { + bestGuess = ref.base.type + possibleTypes.add(ref.base.type) + possibleTypes.addAll(ref.base.assignedTypes) + } else if (ref.language is HasImplicitReceiver) { // This could be a member call with an implicit receiver, so let's add the current class // to the possible list scopeManager.currentRecord?.toType()?.let { diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt index 113b52a0266..ebe4c53a1c8 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt @@ -287,12 +287,15 @@ internal fun Pass<*>.tryFunctionInference( // the callee is not qualified, because otherwise we are in a static call like // MyClass::doSomething() or in a namespace call (in case we do not want to explore the // base type here yet). This will change in a future PR. + val callee = call.callee val (suitableBases, bestGuess) = if ( - call.callee is MemberExpression || - !call.callee.name.isQualified() && call.language is HasImplicitReceiver + callee is MemberExpression || + callee is Reference && + !call.callee.name.isQualified() && + call.language is HasImplicitReceiver ) { - getPossibleContainingTypes(call) + getPossibleContainingTypes(callee) } else { Pair(setOf(), null) } diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/ExpressionHandler.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/ExpressionHandler.kt index 3db5ccb11f3..ad2464e39a6 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/ExpressionHandler.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/ExpressionHandler.kt @@ -333,7 +333,7 @@ class ExpressionHandler(lang: JavaLanguageFrontend) : private fun handleThisExpression(expr: Expression): Reference { val thisExpr = expr.asThisExpr() val qualifiedName = frontend.scopeManager.currentRecord?.name.toString() - val type = this.objectType(qualifiedName) + var type = this.objectType(qualifiedName) var name = thisExpr.toString() // If the typeName is specified, then this a "qualified this" and we need to handle it @@ -344,6 +344,7 @@ class ExpressionHandler(lang: JavaLanguageFrontend) : val typeName = thisExpr.typeName if (typeName.isPresent) { name = "this$" + typeName.get().identifier + type = unknownType() // will be filled later by the symbol resolver } val thisExpression = newReference(name, type, rawNode = expr) return thisExpression