Skip to content

Commit 2007cdb

Browse files
authored
small perf improvements in ProtoToGraphNodeMappings and ZipArchive (#1800)
* ProtoToGraphNodeMappings: use mutable datastructures internally the immmutable maps have some unneccessary overhead * ZipArchive: don't return results as linked list
1 parent 71180f2 commit 2007cdb

File tree

2 files changed

+14
-28
lines changed

2 files changed

+14
-28
lines changed

codepropertygraph/src/main/scala/io/shiftleft/codepropertygraph/cpgloading/ProtoToGraphNodeMappings.scala

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,36 +3,21 @@ package io.shiftleft.codepropertygraph.cpgloading
33
import flatgraph.*
44
import io.shiftleft.proto.cpg.Cpg.CpgStruct
55

6+
import scala.collection.mutable
67
import scala.jdk.CollectionConverters.*
78

8-
/** Mutable datastructure to preserve mapping between proto and cpg nodes during ProtoToCpg import.
9+
/** Mutable data structure to preserve mapping between proto and cpg nodes during ProtoToCpg import.
910
*
1011
* Context: we need to run two passes: 1) add nodes and 2) set node properties and add edges (this is due to
1112
* flatgraph-specific implementation details)
1213
*
13-
* Because of that, we need to remember the mapping from proto node id to gnode. Typically that's just a plain mapping,
14-
* but there's one special case for TYPE nodes: some (parallel) frontends create duplicate TYPE nodes which we need to
15-
* deduplicate...
14+
* Because of that, we need to remember the mapping from proto node id to gnode. Typically, that's just a plain
15+
* mapping. But there's one special case for TYPE nodes: some (parallel) frontends create duplicate TYPE nodes which we
16+
* need to deduplicate...
1617
*/
1718
class ProtoToGraphNodeMappings {
18-
private var protoNodeIdToGNode = Map.empty[Long, DNode]
19-
private var typeFullNameToGNode = Map.empty[String, DNode]
20-
21-
def addAll(other: ProtoToGraphNodeMappings): Unit = {
22-
val intersection1 = this.protoNodeIdToGNode.keySet.intersect(other.protoNodeIdToGNode.keySet)
23-
val intersection2 = this.typeFullNameToGNode.keySet.intersect(other.typeFullNameToGNode.keySet)
24-
assert(
25-
intersection1.isEmpty,
26-
s"unexpected duplicate entries in protoNodeIdToGNode mappings. protoNodeIds: $intersection1"
27-
)
28-
assert(
29-
intersection2.isEmpty,
30-
s"unexpected duplicate entries in typeFullNameToGNode mappings. FullNames: $intersection2"
31-
)
32-
33-
this.protoNodeIdToGNode = this.protoNodeIdToGNode ++ other.protoNodeIdToGNode
34-
this.typeFullNameToGNode = this.typeFullNameToGNode ++ other.typeFullNameToGNode
35-
}
19+
private val protoNodeIdToGNode = mutable.LongMap.empty[DNode]
20+
private val typeFullNameToGNode = mutable.Map.empty[String, DNode]
3621

3722
def add(protoNode: CpgStruct.Node, node: DNode): Unit = {
3823
protoNodeIdToGNode += protoNode.getKey -> node
@@ -48,7 +33,7 @@ class ProtoToGraphNodeMappings {
4833
}
4934
}
5035

51-
/** This will fail hard if the DiffGraph hasn't been applied yet, which is the assumption for it's use case. In other
36+
/** This will fail hard if the DiffGraph hasn't been applied yet, which is the assumption for its use case. In other
5237
* words, we specifically don't want to invoke `find(protoNode).flatMap(_.storedRef)` here
5338
*/
5439
def findGNode(protoNode: CpgStruct.Node): Option[GNode] =

codepropertygraph/src/main/scala/io/shiftleft/codepropertygraph/cpgloading/ZipArchive.scala

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,19 @@ package io.shiftleft.codepropertygraph.cpgloading
22

33
import java.io.Closeable
44
import java.nio.file.attribute.BasicFileAttributes
5-
import java.nio.file.{FileSystem, FileSystems, FileVisitResult, Files, Path, Paths, SimpleFileVisitor}
6-
import java.util.{Collection => JCollection}
5+
import java.nio.file.{FileSystem, FileSystems, FileVisitOption, FileVisitResult, Files, Path, Paths, SimpleFileVisitor}
6+
import java.util.Collection as JCollection
7+
import scala.collection.immutable.ArraySeq
78
import scala.collection.mutable.ArrayBuffer
8-
import scala.jdk.CollectionConverters._
9+
import scala.jdk.CollectionConverters.*
910

1011
class ZipArchive(inputFile: String) extends Closeable {
1112
private val zipFileSystem: FileSystem = FileSystems.newFileSystem(Paths.get(inputFile), null: ClassLoader)
1213

1314
private def root: Path = zipFileSystem.getRootDirectories.iterator.next
1415

1516
private def walk(rootPath: Path): Seq[Path] = {
16-
val entries = ArrayBuffer[Path]()
17+
val entries = ArraySeq.newBuilder[Path]()
1718
Files.walkFileTree(
1819
rootPath,
1920
new SimpleFileVisitor[Path]() {
@@ -24,7 +25,7 @@ class ZipArchive(inputFile: String) extends Closeable {
2425
}
2526
}
2627
)
27-
entries.toSeq
28+
entries.result()
2829
}
2930

3031
def entries: Seq[Path] = walk(root)

0 commit comments

Comments
 (0)