|
| 1 | +from google.protobuf import text_format |
| 2 | + |
| 3 | +from .caffe import get_caffe_resolver |
| 4 | +from .errors import KaffeError, print_stderr |
| 5 | +from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch |
| 6 | +from .shapes import TensorShape |
| 7 | + |
| 8 | +class Node(object): |
| 9 | + |
| 10 | + def __init__(self, name, kind, layer=None): |
| 11 | + self.name = name |
| 12 | + self.kind = kind |
| 13 | + self.layer = LayerAdapter(layer, kind) if layer else None |
| 14 | + self.parents = [] |
| 15 | + self.children = [] |
| 16 | + self.data = None |
| 17 | + self.output_shape = None |
| 18 | + self.metadata = {} |
| 19 | + |
| 20 | + def add_parent(self, parent_node): |
| 21 | + assert parent_node not in self.parents |
| 22 | + self.parents.append(parent_node) |
| 23 | + if self not in parent_node.children: |
| 24 | + parent_node.children.append(self) |
| 25 | + |
| 26 | + def add_child(self, child_node): |
| 27 | + assert child_node not in self.children |
| 28 | + self.children.append(child_node) |
| 29 | + if self not in child_node.parents: |
| 30 | + child_node.parents.append(self) |
| 31 | + |
| 32 | + def get_only_parent(self): |
| 33 | + if len(self.parents) != 1: |
| 34 | + raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' % |
| 35 | + (self, len(self.parents))) |
| 36 | + return self.parents[0] |
| 37 | + |
| 38 | + @property |
| 39 | + def parameters(self): |
| 40 | + if self.layer is not None: |
| 41 | + return self.layer.parameters |
| 42 | + return None |
| 43 | + |
| 44 | + def __str__(self): |
| 45 | + return '[%s] %s' % (self.kind, self.name) |
| 46 | + |
| 47 | + def __repr__(self): |
| 48 | + return '%s (0x%x)' % (self.name, id(self)) |
| 49 | + |
| 50 | + |
| 51 | +class Graph(object): |
| 52 | + |
| 53 | + def __init__(self, nodes=None, name=None): |
| 54 | + self.nodes = nodes or [] |
| 55 | + self.node_lut = {node.name: node for node in self.nodes} |
| 56 | + self.name = name |
| 57 | + |
| 58 | + def add_node(self, node): |
| 59 | + self.nodes.append(node) |
| 60 | + self.node_lut[node.name] = node |
| 61 | + |
| 62 | + def get_node(self, name): |
| 63 | + try: |
| 64 | + return self.node_lut[name] |
| 65 | + except KeyError: |
| 66 | + raise KaffeError('Layer not found: %s' % name) |
| 67 | + |
| 68 | + def get_input_nodes(self): |
| 69 | + return [node for node in self.nodes if len(node.parents) == 0] |
| 70 | + |
| 71 | + def get_output_nodes(self): |
| 72 | + return [node for node in self.nodes if len(node.children) == 0] |
| 73 | + |
| 74 | + def topologically_sorted(self): |
| 75 | + sorted_nodes = [] |
| 76 | + unsorted_nodes = list(self.nodes) |
| 77 | + temp_marked = set() |
| 78 | + perm_marked = set() |
| 79 | + |
| 80 | + def visit(node): |
| 81 | + if node in temp_marked: |
| 82 | + raise KaffeError('Graph is not a DAG.') |
| 83 | + if node in perm_marked: |
| 84 | + return |
| 85 | + temp_marked.add(node) |
| 86 | + for child in node.children: |
| 87 | + visit(child) |
| 88 | + perm_marked.add(node) |
| 89 | + temp_marked.remove(node) |
| 90 | + sorted_nodes.insert(0, node) |
| 91 | + |
| 92 | + while len(unsorted_nodes): |
| 93 | + visit(unsorted_nodes.pop()) |
| 94 | + return sorted_nodes |
| 95 | + |
| 96 | + def compute_output_shapes(self): |
| 97 | + sorted_nodes = self.topologically_sorted() |
| 98 | + for node in sorted_nodes: |
| 99 | + node.output_shape = TensorShape(*NodeKind.compute_output_shape(node)) |
| 100 | + |
| 101 | + def replaced(self, new_nodes): |
| 102 | + return Graph(nodes=new_nodes, name=self.name) |
| 103 | + |
| 104 | + def transformed(self, transformers): |
| 105 | + graph = self |
| 106 | + for transformer in transformers: |
| 107 | + graph = transformer(graph) |
| 108 | + if graph is None: |
| 109 | + raise KaffeError('Transformer failed: {}'.format(transformer)) |
| 110 | + assert isinstance(graph, Graph) |
| 111 | + return graph |
| 112 | + |
| 113 | + def __contains__(self, key): |
| 114 | + return key in self.node_lut |
| 115 | + |
| 116 | + def __str__(self): |
| 117 | + hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param', 'Output') |
| 118 | + s = [hdr, '-' * 94] |
| 119 | + for node in self.topologically_sorted(): |
| 120 | + # If the node has learned parameters, display the first one's shape. |
| 121 | + # In case of convolutions, this corresponds to the weights. |
| 122 | + data_shape = node.data[0].shape if node.data else '--' |
| 123 | + out_shape = node.output_shape or '--' |
| 124 | + s.append('{:<20} {:<30} {:>20} {:>20}'.format(node.kind, node.name, data_shape, |
| 125 | + tuple(out_shape))) |
| 126 | + return '\n'.join(s) |
| 127 | + |
| 128 | + |
| 129 | +class GraphBuilder(object): |
| 130 | + '''Constructs a model graph from a Caffe protocol buffer definition.''' |
| 131 | + |
| 132 | + def __init__(self, def_path, phase='test'): |
| 133 | + ''' |
| 134 | + def_path: Path to the model definition (.prototxt) |
| 135 | + data_path: Path to the model data (.caffemodel) |
| 136 | + phase: Either 'test' or 'train'. Used for filtering phase-specific nodes. |
| 137 | + ''' |
| 138 | + self.def_path = def_path |
| 139 | + self.phase = phase |
| 140 | + self.load() |
| 141 | + |
| 142 | + def load(self): |
| 143 | + '''Load the layer definitions from the prototxt.''' |
| 144 | + self.params = get_caffe_resolver().NetParameter() |
| 145 | + with open(self.def_path, 'rb') as def_file: |
| 146 | + text_format.Merge(def_file.read(), self.params) |
| 147 | + |
| 148 | + def filter_layers(self, layers): |
| 149 | + '''Filter out layers based on the current phase.''' |
| 150 | + phase_map = {0: 'train', 1: 'test'} |
| 151 | + filtered_layer_names = set() |
| 152 | + filtered_layers = [] |
| 153 | + for layer in layers: |
| 154 | + phase = self.phase |
| 155 | + if len(layer.include): |
| 156 | + phase = phase_map[layer.include[0].phase] |
| 157 | + if len(layer.exclude): |
| 158 | + phase = phase_map[1 - layer.include[0].phase] |
| 159 | + exclude = (phase != self.phase) |
| 160 | + # Dropout layers appear in a fair number of Caffe |
| 161 | + # test-time networks. These are just ignored. We'll |
| 162 | + # filter them out here. |
| 163 | + if (not exclude) and (phase == 'test'): |
| 164 | + exclude = (layer.type == LayerType.Dropout) |
| 165 | + if not exclude: |
| 166 | + filtered_layers.append(layer) |
| 167 | + # Guard against dupes. |
| 168 | + assert layer.name not in filtered_layer_names |
| 169 | + filtered_layer_names.add(layer.name) |
| 170 | + return filtered_layers |
| 171 | + |
| 172 | + def make_node(self, layer): |
| 173 | + '''Create a graph node for the given layer.''' |
| 174 | + kind = NodeKind.map_raw_kind(layer.type) |
| 175 | + if kind is None: |
| 176 | + raise KaffeError('Unknown layer type encountered: %s' % layer.type) |
| 177 | + # We want to use the layer's top names (the "output" names), rather than the |
| 178 | + # name attribute, which is more of readability thing than a functional one. |
| 179 | + # Other layers will refer to a node by its "top name". |
| 180 | + return Node(layer.name, kind, layer=layer) |
| 181 | + |
| 182 | + def make_input_nodes(self): |
| 183 | + ''' |
| 184 | + Create data input nodes. |
| 185 | +
|
| 186 | + This method is for old-style inputs, where the input specification |
| 187 | + was not treated as a first-class layer in the prototext. |
| 188 | + Newer models use the "Input layer" type. |
| 189 | + ''' |
| 190 | + nodes = [Node(name, NodeKind.Data) for name in self.params.input] |
| 191 | + if len(nodes): |
| 192 | + input_dim = map(int, self.params.input_dim) |
| 193 | + if not input_dim: |
| 194 | + if len(self.params.input_shape) > 0: |
| 195 | + input_dim = map(int, self.params.input_shape[0].dim) |
| 196 | + else: |
| 197 | + raise KaffeError('Dimensions for input not specified.') |
| 198 | + for node in nodes: |
| 199 | + node.output_shape = tuple(input_dim) |
| 200 | + return nodes |
| 201 | + |
| 202 | + def build(self): |
| 203 | + ''' |
| 204 | + Builds the graph from the Caffe layer definitions. |
| 205 | + ''' |
| 206 | + # Get the layers |
| 207 | + layers = self.params.layers or self.params.layer |
| 208 | + # Filter out phase-excluded layers |
| 209 | + layers = self.filter_layers(layers) |
| 210 | + # Get any separately-specified input layers |
| 211 | + nodes = self.make_input_nodes() |
| 212 | + nodes += [self.make_node(layer) for layer in layers] |
| 213 | + # Initialize the graph |
| 214 | + graph = Graph(nodes=nodes, name=self.params.name) |
| 215 | + # Connect the nodes |
| 216 | + # |
| 217 | + # A note on layers and outputs: |
| 218 | + # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs |
| 219 | + # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom |
| 220 | + # (in case of in-place operations). Note that the layer's name is not used for establishing |
| 221 | + # any connectivity. It's only used for data association. By convention, a layer with a |
| 222 | + # single top will often use the same name (although this is not required). |
| 223 | + # |
| 224 | + # The current implementation only supports single-output nodes (note that a node can still |
| 225 | + # have multiple children, since multiple child nodes can refer to the single top's name). |
| 226 | + node_outputs = {} |
| 227 | + for layer in layers: |
| 228 | + node = graph.get_node(layer.name) |
| 229 | + for input_name in layer.bottom: |
| 230 | + assert input_name != layer.name |
| 231 | + parent_node = node_outputs.get(input_name) |
| 232 | + if (parent_node is None) or (parent_node == node): |
| 233 | + parent_node = graph.get_node(input_name) |
| 234 | + node.add_parent(parent_node) |
| 235 | + if len(layer.top)>1: |
| 236 | + raise KaffeError('Multiple top nodes are not supported.') |
| 237 | + for output_name in layer.top: |
| 238 | + if output_name == layer.name: |
| 239 | + # Output is named the same as the node. No further action required. |
| 240 | + continue |
| 241 | + # There are two possibilities here: |
| 242 | + # |
| 243 | + # Case 1: output_name refers to another node in the graph. |
| 244 | + # This is an "in-place operation" that overwrites an existing node. |
| 245 | + # This would create a cycle in the graph. We'll undo the in-placing |
| 246 | + # by substituting this node wherever the overwritten node is referenced. |
| 247 | + # |
| 248 | + # Case 2: output_name violates the convention layer.name == output_name. |
| 249 | + # Since we are working in the single-output regime, we will can rename it to |
| 250 | + # match the layer name. |
| 251 | + # |
| 252 | + # For both cases, future references to this top re-routes to this node. |
| 253 | + node_outputs[output_name] = node |
| 254 | + |
| 255 | + graph.compute_output_shapes() |
| 256 | + return graph |
| 257 | + |
| 258 | + |
| 259 | +class NodeMapper(NodeDispatch): |
| 260 | + |
| 261 | + def __init__(self, graph): |
| 262 | + self.graph = graph |
| 263 | + |
| 264 | + def map(self): |
| 265 | + nodes = self.graph.topologically_sorted() |
| 266 | + # Remove input nodes - we'll handle them separately. |
| 267 | + input_nodes = self.graph.get_input_nodes() |
| 268 | + nodes = [t for t in nodes if t not in input_nodes] |
| 269 | + # Decompose DAG into chains. |
| 270 | + chains = [] |
| 271 | + for node in nodes: |
| 272 | + attach_to_chain = None |
| 273 | + if len(node.parents) == 1: |
| 274 | + parent = node.get_only_parent() |
| 275 | + for chain in chains: |
| 276 | + if chain[-1] == parent: |
| 277 | + # Node is part of an existing chain. |
| 278 | + attach_to_chain = chain |
| 279 | + break |
| 280 | + if attach_to_chain is None: |
| 281 | + # Start a new chain for this node. |
| 282 | + attach_to_chain = [] |
| 283 | + chains.append(attach_to_chain) |
| 284 | + attach_to_chain.append(node) |
| 285 | + # Map each chain. |
| 286 | + mapped_chains = [] |
| 287 | + for chain in chains: |
| 288 | + mapped_chains.append(self.map_chain(chain)) |
| 289 | + return self.commit(mapped_chains) |
| 290 | + |
| 291 | + def map_chain(self, chain): |
| 292 | + return [self.map_node(node) for node in chain] |
| 293 | + |
| 294 | + def map_node(self, node): |
| 295 | + map_func = self.get_handler(node.kind, 'map') |
| 296 | + mapped_node = map_func(node) |
| 297 | + assert mapped_node is not None |
| 298 | + mapped_node.node = node |
| 299 | + return mapped_node |
| 300 | + |
| 301 | + def commit(self, mapped_chains): |
| 302 | + raise NotImplementedError('Must be implemented by subclass.') |
0 commit comments