1
+ #include < iostream>
2
+ #include < string>
3
+ #include < vector>
4
+ #include < stack>
5
+ #include " Util.h"
6
+ #include " newick.h"
7
+
8
+ NewickParser::Node* NewickParser::parse (const std::string& newick) {
9
+ Node* root = new Node ();
10
+ std::stack<Node*> stack;
11
+ stack.push (root); // dummy root
12
+ std::string token;
13
+
14
+ std::string parsed = " " ;
15
+ bool readingBranchLength = false ;
16
+
17
+ for (char ch : newick) {
18
+ parsed += ch;
19
+ if (readingBranchLength) {
20
+ if (ch == ' ,' || ch == ' )' || ch == ' ;' ) {
21
+ readingBranchLength = false ;
22
+ } else {
23
+ continue ;
24
+ }
25
+ }
26
+ switch (ch) {
27
+ case ' (' : // Start new node
28
+ stack.push (new Node ());
29
+ break ;
30
+ case ' )' : // End of a node
31
+ if (!token.empty ()) {
32
+ stack.top ()->children .push_back (new Node (token));
33
+ token.clear ();
34
+ }
35
+ if (!stack.empty ()) {
36
+ Node* finishedNode = stack.top ();
37
+ stack.pop ();
38
+ if (!stack.empty ()) {
39
+ stack.top ()->children .push_back (finishedNode);
40
+ }
41
+ }
42
+ break ;
43
+ case ' ,' : // Another child of the current node
44
+ if (!token.empty ()) {
45
+ stack.top ()->children .push_back (new Node (token));
46
+ token.clear ();
47
+ }
48
+ break ;
49
+ case ' :' : // Branch length portion starting; set flag to ignore
50
+ readingBranchLength = true ;
51
+ break ;
52
+ case ' ;' : // End of tree
53
+ break ;
54
+ default : // Otherwise just add to the token if alphanumeric
55
+ if (!isspace (ch)) {
56
+ token += ch;
57
+ }
58
+ break ;
59
+ }
60
+ }
61
+ Node* actualRoot = root->children .empty () ? nullptr : root->children .front ();
62
+ root->children .clear ();
63
+ delete root;
64
+ return actualRoot;
65
+ }
66
+
67
+ std::string NewickParser::toNewick (const NewickParser::Node* node) {
68
+ if (!node) return " " ;
69
+ std::string buffer;
70
+ if (!node->children .empty ()) {
71
+ // Intermediate node with children
72
+ buffer += " (" ;
73
+ for (size_t i = 0 ; i < node->children .size (); i++) {
74
+ buffer += NewickParser::toNewick (node->children [i]);
75
+ if (i < node->children .size () - 1 ) {
76
+ buffer += " ," ;
77
+ }
78
+ }
79
+ buffer += " )" ;
80
+ }
81
+ // Leaf node, just add name
82
+ buffer += node->name ;
83
+
84
+ return buffer;
85
+ }
86
+
87
+ /* *
88
+ * @brief Post-order traversal of a parsed Tree.
89
+ * Generates the merging order for structuremsa
90
+ *
91
+ * @param node Pointer to root TNode of the tree
92
+ */
93
+ void NewickParser::postOrder (NewickParser::Node *node, std::vector<std::string> *linkage) {
94
+ for (NewickParser::Node *child : node->children ) {
95
+ postOrder (child, linkage);
96
+ }
97
+ if (node->children .size () > 0 ) {
98
+ for (NewickParser::Node *child : node->children ) {
99
+ linkage->push_back (child->name );
100
+
101
+ // Propagate child name from leaf to root, so we
102
+ // always have a reference during alignment stage
103
+ node->name = child->name ;
104
+ }
105
+ }
106
+ }
107
+
108
+ /* *
109
+ * @brief Update nodeMap to point to newest root for ALL children of a Node
110
+ *
111
+ * @param node
112
+ * @param newParent
113
+ * @param nodeMap
114
+ */
115
+ void NewickParser::updateDescendants ( NewickParser::Node* node, NewickParser::Node* newParent, std::unordered_map<size_t , Node*>& nodeMap) {
116
+ nodeMap[node->id ] = newParent;
117
+ for (NewickParser::Node* child : node->children ) {
118
+ nodeMap[child->id ] = newParent;
119
+ updateDescendants (child, newParent, nodeMap);
120
+ }
121
+ }
122
+
123
+ /* *
124
+ * @brief Build a tree from a list of successive merges (i.e. with queryId/targetId)
125
+ *
126
+ * @param merges
127
+ * @return NewickParser::Node*
128
+ */
129
+ NewickParser::Node* NewickParser::buildTree (std::vector<AlnSimple> &merges) {
130
+ std::unordered_map<size_t , NewickParser::Node*> nodeMap;
131
+ NewickParser::Node* root = nullptr ;
132
+ NewickParser::Node* nodeA = nullptr ;
133
+ NewickParser::Node* nodeB = nullptr ;
134
+ for (const AlnSimple& merge : merges) {
135
+ root = new NewickParser::Node (merge.queryId );
136
+ nodeA = (nodeMap.find (merge.queryId ) == nodeMap.end ()) ? new NewickParser::Node (merge.queryId ) : nodeMap[merge.queryId ];
137
+ nodeB = (nodeMap.find (merge.targetId ) == nodeMap.end ()) ? new NewickParser::Node (merge.targetId ) : nodeMap[merge.targetId ];
138
+ root->children .push_back (nodeA);
139
+ root->children .push_back (nodeB);
140
+ updateDescendants (nodeA, root, nodeMap);
141
+ updateDescendants (nodeB, root, nodeMap);
142
+ }
143
+ return root;
144
+ }
145
+
146
+
147
+ void NewickParser::addNames (Node* root, IndexReader* headers) {
148
+ for (auto &child : root->children ) {
149
+ NewickParser::addNames (child, headers);
150
+ }
151
+ if (root->children .size () == 0 ) {
152
+ unsigned int headerId = headers->sequenceReader ->getId (root->id );
153
+ root->name = Util::parseFastaHeader (headers->sequenceReader ->getData (headerId, 0 ));
154
+ }
155
+ }
0 commit comments