Skip to content

Commit 9d271ef

Browse files
committed
[WIP] Parser: Working on the Paint Algorithm for token path.
1 parent 7bb1d89 commit 9d271ef

File tree

8 files changed

+237
-164
lines changed

8 files changed

+237
-164
lines changed

gram/create_parse_functions.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ def create_serial_parse_function(serial_idx, key, v, tag_name="TagName"):
8787
static_assert(base::kNumberOfElements == 1);
8888
serial_funcs.executed_mask( decltype(serial_funcs)::base::bitset_type(this->executed_mask_.value() ) );
8989
output = serial_funcs();
90+
if(output.work_){{
91+
context_->paint(calling_depth(), start_token, output.cur_token_, ParseFunctionKind::k{name});
92+
}}
9093
this->executed_mask_.set(serial_funcs.executed_mask().value());
9194
return output;
9295
"""
@@ -117,7 +120,7 @@ def create_serial_in_parallel_function(s_v, k, flg, idx_str, tidx):
117120
break
118121
contents_type += f""" ParseFunction,"""
119122
contents += f"""ParseFunction::
120-
create_single_token_check(context_, {opt_str},calling_depth() + 2,
123+
create_single_token_check(context_, {opt_str},calling_depth() + 1,
121124
token::details::TokenKind::{sp_tokens[s]},
122125
diag::DiagKind::{k.replace("-","_")}_expect_{sp_tokens[s]}),"""
123126

@@ -128,7 +131,7 @@ def create_serial_in_parallel_function(s_v, k, flg, idx_str, tidx):
128131
else:
129132
assert s in gram_tree
130133
contents_type += f"{camel_case(s)},"
131-
contents += f"{camel_case(s)}(context_, {opt_str}, calling_depth() + 2),"
134+
contents += f"{camel_case(s)}(context_, {opt_str}, calling_depth() + 1),"
132135
contents = contents[:-1]
133136
contents_type = contents_type[:-1]
134137
contents_type += ">,"
@@ -208,6 +211,9 @@ def create_parallel_function(serial_idx, key, v):
208211
content_op_out = f"""
209212
{content_type} {content_func_def}
210213
{content_op}
214+
if(output.work_){{
215+
context_->paint(calling_depth(), start_token, output.cur_token_, ParseFunctionKind::k{name});
216+
}}
211217
return output; """
212218
return content_op_out, content_init, content_type, "", flg
213219
else:
@@ -234,10 +240,15 @@ def create_parallel_function(serial_idx, key, v):
234240
""" + "\n}\n" + f"""
235241
diag::infos() << basic::str::from({tag_name}, "into recursive. \\n");
236242
{content_r_type} {content_r_func_def}
237-
{content_r_op}""" + """
238-
if(!output.work_){
243+
{content_r_op}""" + f"""
244+
if(!output.work_){{
245+
246+
context_->paint(calling_depth(), start_token, non_recursive_output.cur_token_, ParseFunctionKind::k{name});
247+
239248
return non_recursive_output;
240-
}
249+
}}
250+
context_->paint(calling_depth(), start_token, output.cur_token_, ParseFunctionKind::k{name});
251+
241252
return output;
242253
243254
"""
@@ -458,6 +469,7 @@ class NAME : public ParseFunction<N> { \\
458469
if (!this->valid()) {
459470
return output;
460471
}
472+
auto start_token = output.cur_token_;
461473
___CONTENT_OP___
462474
}
463475
"""

include/ast.h

+64-1
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,67 @@
2222
*/
2323
#pragma once
2424

25-
namespace lsp::ast {}
25+
#include <array>
26+
#include "basic/map.h"
27+
#include "basic/vec.h"
28+
#include "token.h"
29+
30+
namespace lps::parser::details {
31+
32+
enum class ParseFunctionKind : uint16_t {
33+
kUnknown = 0,
34+
kExpectedToken = 1,
35+
#define PARSE_FUNC(FUNC) FUNC,
36+
#include "parse_function/kinds.def"
37+
kNum,
38+
};
39+
40+
namespace kind {
41+
42+
static constexpr std::array<std::pair<ParseFunctionKind, const char*>,
43+
static_cast<uint16_t>(ParseFunctionKind::kNum)>
44+
kLists = {{
45+
#define PARSE_FUNC(X) {ParseFunctionKind::X, #X},
46+
#include "parse_function/kinds.def"
47+
}};
48+
49+
static constexpr lps::basic::map::Map<ParseFunctionKind, const char*,
50+
static_cast<uint16_t>(
51+
ParseFunctionKind::kNum)>
52+
kMap{kLists};
53+
54+
} // namespace kind
55+
56+
inline std::ostream& operator<<(std::ostream& s, ParseFunctionKind kind) {
57+
s << kind::kMap.at(kind);
58+
return s;
59+
}
60+
61+
struct Line {
62+
const token::Token* start_{nullptr};
63+
const token::Token* end_{nullptr};
64+
ParseFunctionKind kind_{ParseFunctionKind::kUnknown};
65+
token::details::TokenKind token_kind_{token::details::TokenKind::unknown};
66+
size_t len_{0};
67+
size_t calling_depth_{0};
68+
};
69+
70+
class Tree {
71+
public:
72+
struct Node {
73+
using sub_nodes_type = basic::Vector<4, Node*>;
74+
sub_nodes_type children_;
75+
Line line_;
76+
};
77+
Node* append(const Node& n) {
78+
nodes_.append(n);
79+
return &nodes_.back();
80+
}
81+
Node& root() { return root_; }
82+
83+
private:
84+
Node root_;
85+
basic::Vector<8, Node> nodes_;
86+
};
87+
88+
} // namespace lps::parser::details

include/parser.h

+125-57
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525

2626
#include <functional>
2727
#include <limits>
28+
#include <unordered_map>
2829
#include <utility>
30+
#include "ast.h"
2931
#include "basic/bitset.h"
3032
#include "basic/exception.h"
3133
#include "basic/mem.h"
@@ -37,10 +39,10 @@
3739
namespace lps::parser {
3840

3941
namespace details {
40-
4142
class Context {
4243
public:
4344
friend class ContextTrait;
45+
static constexpr basic::mem::TraceTag::tag_type kTag = "Context";
4446
using executed_func_type = std::function<void(Context*)>;
4547
void with(const executed_func_type& func) { func(this); }
4648
token::TokenLists& token_lists() { return token_lists_; }
@@ -49,10 +51,128 @@ class Context {
4951
size_t len_from_start(const token::Token& cur_token) {
5052
return token_lists_.len(start_token_, cur_token);
5153
}
54+
void paint(size_t calling_depth, const token::Token& start,
55+
const token::Token& end, ParseFunctionKind kind,
56+
token::details::TokenKind token_kind =
57+
token::details::TokenKind::unknown) {
58+
lps_assert(kTag, token_lists_.has(start) && token_lists_.has(end));
59+
const auto* p_start = &token_lists_.at(start);
60+
const auto* p_end = &token_lists_.at(end);
61+
if (path_.contains(p_start)) {
62+
for (const auto& line : path_[p_start]) {
63+
if (line.end_ == p_end && line.kind_ == kind) {
64+
return;
65+
}
66+
}
67+
}
68+
path_[p_start].append({p_start, p_end, kind, token_kind,
69+
token::TokenLists::len(p_start, p_end),
70+
calling_depth});
71+
}
72+
73+
Line longest_line(const auto& start) {
74+
size_t max_l = 0;
75+
const auto* p_start = &token_lists_.at(start);
76+
auto cmp = [](size_t a, size_t b) {
77+
return a > b;
78+
};
79+
return find_line<cmp>(p_start, max_l);
80+
}
81+
82+
Line shortest_line(const token::Token& start) {
83+
size_t min_l = std::numeric_limits<size_t>::max();
84+
const auto* p_start = &token_lists_.at(start);
85+
auto cmp = [](size_t a, size_t b) {
86+
return a < b;
87+
};
88+
return find_line<cmp>(p_start, min_l);
89+
}
90+
91+
Tree l2t(const Line& root_line) { // tree to line
92+
Tree tree;
93+
auto l2t_impl = [this, &tree](const Line& root_line) -> bool {
94+
auto run = [this, &tree](Tree::Node& root, const Line& root_line,
95+
auto func) -> bool {
96+
const auto* tmp_start = root_line.start_;
97+
if (!path_.contains(tmp_start)) {
98+
return false;
99+
}
100+
basic::Vector<8, Tree::Node> ns;
101+
for (const auto& l0 : path_[root_line.start_]) {
102+
if (l0.calling_depth_ == root_line.calling_depth_ - 1) {
103+
do {
104+
bool flg = false;
105+
for (const auto& l : path_[tmp_start]) {
106+
if (l.calling_depth_ == root_line.calling_depth_ - 1) {
107+
tmp_start = l.end_;
108+
flg = true;
109+
break;
110+
}
111+
}
112+
if (!flg || !path_.contains(tmp_start)) {
113+
break;
114+
}
115+
} while (tmp_start != root_line.end_);
116+
}
117+
}
118+
// for (const auto& l : path_[root_line.start_]) {
119+
// if (l.calling_depth_ == root_line.calling_depth_ - 1) {
120+
// Tree::Node n;
121+
// n.line_ = l;
122+
// do {
123+
// if (func(n, l, func)) {
124+
// if ((!ns.empty() && ns.back().line_.end_ == l.start_) ||
125+
// ns.empty()) {
126+
// ns.append(n);
127+
// tmp_start = l.end_;
128+
// break;
129+
// }
130+
// }
131+
// break;
132+
// } while (tmp_start != root_line.end_);
133+
// }
134+
// }
135+
if (ns.back().line_.end_ == root_line.end_) {
136+
for (const auto& n : ns) {
137+
root.children_.append(tree.append(n));
138+
}
139+
return true;
140+
}
141+
return false;
142+
};
143+
return run(tree.root(), root_line, run);
144+
};
145+
146+
if (l2t_impl(root_line)) {
147+
int dummy = -1;
148+
}
149+
150+
return tree;
151+
}
52152

53153
private:
154+
Line shortest_line(const token::Token* p_start, size_t min_l) {
155+
return find_line<[](size_t a, size_t b) {
156+
return a < b;
157+
}>(p_start, min_l);
158+
}
159+
160+
template <auto F>
161+
Line find_line(const token::archived_type* p_start, size_t the_l) {
162+
Line the_line;
163+
lps_assert(kTag, path_.contains(p_start));
164+
for (const auto& line : path_[p_start]) {
165+
if (F(line.len_, the_l)) {
166+
the_line = line;
167+
}
168+
}
169+
lps_assert(kTag, the_line.len_ > 0);
170+
return the_line;
171+
}
172+
54173
token::TokenLists token_lists_;
55174
token::Token start_token_;
175+
std::unordered_map<const token::Token*, basic::Vector<16, Line>> path_;
56176
};
57177

58178
class ContextTrait {
@@ -64,62 +184,6 @@ class ContextTrait {
64184
Context* context_;
65185
};
66186

67-
enum class ParseFunctionKind : uint16_t {
68-
kUnknown = 0,
69-
kExpectedToken = 1,
70-
#define PARSE_FUNC(FUNC) FUNC,
71-
#include "parse_function/kinds.def"
72-
kNum,
73-
};
74-
75-
namespace kind {
76-
77-
static constexpr std::array<std::pair<ParseFunctionKind, const char*>,
78-
static_cast<uint16_t>(ParseFunctionKind::kNum)>
79-
kLists = {{
80-
#define PARSE_FUNC(X) {ParseFunctionKind::X, #X},
81-
#include "parse_function/kinds.def"
82-
}};
83-
84-
static constexpr lps::basic::map::Map<ParseFunctionKind, const char*,
85-
static_cast<uint16_t>(
86-
ParseFunctionKind::kNum)>
87-
kMap{kLists};
88-
89-
} // namespace kind
90-
91-
inline std::ostream& operator<<(std::ostream& s, ParseFunctionKind kind) {
92-
s << kind::kMap.at(kind);
93-
return s;
94-
}
95-
96-
class Tree {
97-
public:
98-
static Tree& instance() {
99-
static Tree tree;
100-
return tree;
101-
}
102-
103-
struct Node {
104-
using sub_nodes_type = basic::Vector<4, Node*>;
105-
using token_pts_type = basic::Vector<8, token::archived_type*>;
106-
ParseFunctionKind kind_{ParseFunctionKind::kUnknown};
107-
sub_nodes_type sub_nodes_;
108-
token_pts_type token_pts_;
109-
token::details::TokenKind expected_token_kind_{
110-
token::details::TokenKind::unknown};
111-
};
112-
113-
Node* append(const Node& node) {
114-
nodes_.append(node);
115-
return &nodes_.back();
116-
}
117-
[[nodiscard]] size_t size() const { return nodes_.size(); }
118-
119-
private:
120-
basic::Vector<4, Node> nodes_;
121-
};
122-
123187
struct ParseFunctionOutputs {
124188
explicit ParseFunctionOutputs() = default;
125189

@@ -355,6 +419,10 @@ class ParseFunction : public ContextTrait {
355419
output.work_ = true;
356420
output.cur_token_ = next_tok;
357421
++output.len_;
422+
423+
func->context()->paint(func->calling_depth(), output.last_token_,
424+
output.cur_token_, ParseFunctionKind::kUnknown,
425+
token_kind);
358426
}
359427

360428
return output;

include/src.h

+17
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <filesystem>
2727
#include <limits>
2828
#include <map>
29+
#include "ast.h"
2930
#include "basic/exception.h"
3031
#include "basic/file.h"
3132
#include "basic/mem.h"
@@ -198,6 +199,11 @@ class TokenLists {
198199

199200
using ele_type = token::archived_type;
200201

202+
struct SavedStructure {
203+
ele_type token_;
204+
basic::Vector<8, lps::parser::details::ParseFunctionKind> colors_;
205+
};
206+
201207
bool has(uint32_t file_id, uint64_t offset) const {
202208
if (lists_.contains(file_id)) {
203209
return lists_.at(file_id).contains(offset);
@@ -270,6 +276,17 @@ class TokenLists {
270276
return l;
271277
}
272278

279+
static size_t len(const Token* p_start, const Token* p_end) {
280+
const Token* p = p_start;
281+
size_t l = 0;
282+
while (p != nullptr && p != p_end) {
283+
p = p->next();
284+
++l;
285+
}
286+
lps_assert(kTag, p != nullptr);
287+
return l;
288+
}
289+
273290
void append(const Token& tok, Info last_tok_info = {0, 0}) {
274291
auto info = Info::create(tok);
275292
if (lists_.contains(tok.file_id())) {

0 commit comments

Comments
 (0)