Skip to content

Commit 638a039

Browse files
committed
basic block graph vector collection
1 parent 16973ac commit 638a039

File tree

3 files changed

+62
-2
lines changed

3 files changed

+62
-2
lines changed

idaplugin/rematch/collectors/vectors/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
from .mnemonic_hash import MnemonicHashVector
77
from .mnemonic_hist import MnemonicHistVector
88
from .basicblocksize_hist import BasicBlockSizeHistVector
9+
from .basicblockgraph import BasicBlockGraphVector
910

1011

1112
__all__ = ["Vector", "InstructionHashVector", "IdentityHashVector",
1213
"NameHashVector", "AssemblyHashVector", "MnemonicHashVector",
13-
"MnemonicHistVector", "BasicBlockSizeHistVector"]
14+
"MnemonicHistVector", "BasicBlockSizeHistVector",
15+
"BasicBlockGraphVector"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import ida_gdl
2+
import ida_funcs
3+
4+
from . import vector
5+
6+
7+
class BasicBlockGraphVector(vector.Vector):
8+
type = "basicblockgraph"
9+
type_version = 0
10+
11+
def __init__(self, *args, **kwargs):
12+
super(BasicBlockGraphVector, self).__init__(*args, **kwargs)
13+
14+
self.func = ida_funcs.get_func(self.offset)
15+
16+
self.nodes = filter(self.node_contained, ida_gdl.FlowChart(self.func))
17+
self.node_ids = map(lambda n: n.id, self.nodes)
18+
19+
self.seen_nodes = set()
20+
21+
def data(self, offset):
22+
del offset
23+
24+
# Assuming node #0 is the root node
25+
serialized_bbs = self.add_node(self.nodes[0])
26+
print(serialized_bbs)
27+
return serialized_bbs
28+
29+
def add_node(self, node):
30+
if node.id in self.seen_nodes:
31+
return [self.token(node)]
32+
33+
self.seen_nodes.add(node.id)
34+
print("{:x}: {}".format(node.startEA, self.token(node)))
35+
36+
# get successive nodes ordered by thier own sizes
37+
sorted_succs = self.sort_nodes(node.succs())
38+
# get successive node sub-graphs ordered
39+
succ_values = map(self.add_node, sorted_succs)
40+
# merge all nodes together by order
41+
return sum(succ_values, [self.token(node)])
42+
43+
def sort_nodes(self, nodes):
44+
return sorted(nodes, key=self.sort_key)
45+
46+
@staticmethod
47+
def token(node):
48+
# alternatives: offset from start of function
49+
return node.endEA - node.startEA
50+
51+
sort_ket = token
52+
53+
def node_contained(self, node):
54+
# make sure only nodes inside the function are accounted for
55+
# this solves cascaded functions (when multiple functions share same ends)
56+
return (ida_funcs.func_contains(self.func, node.startEA) and
57+
ida_funcs.func_contains(self.func, node.endEA - 1))

idaplugin/rematch/instances/function.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ def __init__(self, *args, **kwargs):
2222
collectors.vectors.AssemblyHashVector,
2323
collectors.vectors.MnemonicHashVector,
2424
collectors.vectors.MnemonicHistVector,
25-
collectors.vectors.BasicBlockSizeHistVector}
25+
collectors.vectors.BasicBlockSizeHistVector,
26+
collectors.vectors.BasicBlockGraphVector}
2627
self.annotations |= {collectors.annotations.AssemblyAnnotation}
2728

2829
def size(self):

0 commit comments

Comments
 (0)