Skip to content

Commit 9eb73f4

Browse files
committed
parser WIP
1 parent f0cfabb commit 9eb73f4

File tree

3 files changed

+228
-10
lines changed

3 files changed

+228
-10
lines changed

src/textual/css/tokenize.py

+64-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
import re
4-
from typing import TYPE_CHECKING, Iterable
4+
from typing import TYPE_CHECKING, ClassVar, Iterable
55

66
from textual.css.tokenizer import Expect, Token, Tokenizer
77

@@ -176,6 +176,43 @@
176176

177177

178178
class TokenizerState:
179+
EXPECT: ClassVar[Expect] = expect_root_scope
180+
STATE_MAP: ClassVar[dict[str, Expect]] = {}
181+
STATE_PUSH: ClassVar[dict[str, Expect]] = {}
182+
STATE_POP: ClassVar[dict[str, str]] = {}
183+
184+
def __call__(self, code: str, read_from: CSSLocation) -> Iterable[Token]:
185+
tokenizer = Tokenizer(code, read_from=read_from)
186+
expect = self.EXPECT
187+
get_token = tokenizer.get_token
188+
get_state = self.STATE_MAP.get
189+
state_stack: list[Expect] = []
190+
191+
skip_get_token = False
192+
while True:
193+
if not skip_get_token:
194+
token = get_token(expect)
195+
skip_get_token = False
196+
name = token.name
197+
if name in self.STATE_MAP:
198+
expect = get_state(token.name, expect)
199+
elif name in self.STATE_PUSH:
200+
expect = self.STATE_PUSH[name]
201+
state_stack.append(expect)
202+
elif name in self.STATE_POP:
203+
expect_pop = self.STATE_POP[name]
204+
if state_stack:
205+
expect = state_stack.pop()
206+
else:
207+
expect = self.EXPECT
208+
skip_get_token = True
209+
210+
yield token
211+
if name == "eof":
212+
break
213+
214+
215+
class TCSSTokenizerState:
179216
"""State machine for the tokenizer.
180217
181218
Attributes:
@@ -232,19 +269,19 @@ def __call__(self, code: str, read_from: CSSLocation) -> Iterable[Token]:
232269
yield token
233270

234271

235-
class DeclarationTokenizerState(TokenizerState):
272+
class DeclarationTokenizerState(TCSSTokenizerState):
236273
EXPECT = expect_declaration_solo
237274
STATE_MAP = {
238275
"declaration_name": expect_declaration_content,
239276
"declaration_end": expect_declaration_solo,
240277
}
241278

242279

243-
class ValueTokenizerState(TokenizerState):
280+
class ValueTokenizerState(TCSSTokenizerState):
244281
EXPECT = expect_declaration_content_solo
245282

246283

247-
class StyleTokenizerState(TokenizerState):
284+
class StyleTokenizerState(TCSSTokenizerState):
248285
EXPECT = (
249286
Expect(
250287
"style token",
@@ -262,7 +299,7 @@ class StyleTokenizerState(TokenizerState):
262299
)
263300

264301

265-
tokenize = TokenizerState()
302+
tokenize = TCSSTokenizerState()
266303
tokenize_declarations = DeclarationTokenizerState()
267304
tokenize_value = ValueTokenizerState()
268305
tokenize_style = StyleTokenizerState()
@@ -283,3 +320,25 @@ def tokenize_values(values: dict[str, str]) -> dict[str, list[Token]]:
283320
for name, value in values.items()
284321
}
285322
return value_tokens
323+
324+
325+
if __name__ == "__main__":
326+
text = "[@click=app.notify(['foo', 500])] Click me! [/] :-)"
327+
328+
# text = "[@click=hello]Click"
329+
from rich.console import Console
330+
331+
c = Console(markup=False)
332+
333+
from textual._profile import timer
334+
335+
with timer("tokenize"):
336+
list(tokenize_markup(text, read_from=("", "")))
337+
338+
from textual.markup import _parse
339+
340+
with timer("_parse"):
341+
list(_parse(text))
342+
343+
for token in tokenize_markup(text, read_from=("", "")):
344+
c.print(repr(token))

src/textual/css/tokenizer.py

+31-3
Original file line numberDiff line numberDiff line change
@@ -128,17 +128,22 @@ def __init__(self, description: str, **tokens: str) -> None:
128128
self.search = self._regex.search
129129
self._expect_eof = False
130130
self._expect_semicolon = True
131+
self._extract_text = False
131132

132-
def expect_eof(self, eof: bool) -> Expect:
133+
def expect_eof(self, eof: bool = True) -> Expect:
133134
"""Expect an end of file."""
134135
self._expect_eof = eof
135136
return self
136137

137-
def expect_semicolon(self, semicolon: bool) -> Expect:
138+
def expect_semicolon(self, semicolon: bool = True) -> Expect:
138139
"""Tokenizer expects text to be terminated with a semi-colon."""
139140
self._expect_semicolon = semicolon
140141
return self
141142

143+
def extract_text(self, extract: bool = True) -> Expect:
144+
self._extract_text = extract
145+
return self
146+
142147
def __rich_repr__(self) -> rich.repr.Result:
143148
yield from zip(self.names, self.regexes)
144149

@@ -253,7 +258,30 @@ def get_token(self, expect: Expect) -> Token:
253258
"Unexpected end of file; did you forget a '}' ?",
254259
)
255260
line = self.lines[line_no]
256-
match = expect.match(line, col_no)
261+
preceding_text: str = ""
262+
if expect._extract_text:
263+
match = expect.search(line, col_no)
264+
if match is None:
265+
preceding_text = line[self.col_no :]
266+
self.line_no += 1
267+
else:
268+
col_no = match.start()
269+
preceding_text = line[self.col_no : col_no]
270+
if preceding_text:
271+
token = Token(
272+
"text",
273+
preceding_text,
274+
self.read_from,
275+
self.code,
276+
(line_no, col_no),
277+
referenced_by=None,
278+
)
279+
self.col_no = col_no
280+
return token
281+
282+
else:
283+
match = expect.match(line, col_no)
284+
257285
if match is None:
258286
error_line = line[col_no:].rstrip()
259287
error_message = (

src/textual/markup.py

+133-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22

3+
__all__ = ["MarkupError", "escape", "to_content"]
4+
35
import re
46
from ast import literal_eval
57
from operator import attrgetter
@@ -15,6 +17,14 @@
1517
Union,
1618
)
1719

20+
from textual.css.tokenize import (
21+
COLOR,
22+
PERCENT,
23+
TOKEN,
24+
VARIABLE_REF,
25+
Expect,
26+
TokenizerState,
27+
)
1828
from textual.style import Style
1929

2030
if TYPE_CHECKING:
@@ -25,7 +35,63 @@ class MarkupError(Exception):
2535
"""An error occurred parsing Textual markup."""
2636

2737

28-
__all__ = ["MarkupError", "escape", "to_content"]
38+
expect_markup_tag = Expect(
39+
"style token",
40+
end_tag=r"(?<!\\)\]",
41+
key=r"[@a-zA-Z_-][a-zA-Z0-9_-]*=",
42+
percent=PERCENT,
43+
color=COLOR,
44+
token=TOKEN,
45+
variable_ref=VARIABLE_REF,
46+
whitespace=r"\s+",
47+
)
48+
49+
expect_markup = Expect(
50+
"markup token",
51+
open_closing_tag=r"(?<!\\)\[/",
52+
open_tag=r"(?<!\\)\[",
53+
end_tag=r"(?<!\\)\]",
54+
).extract_text()
55+
56+
expect_markup_expression = Expect(
57+
"markup",
58+
end_tag=r"(?<!\\)\]",
59+
word=r"\w+",
60+
period=r"\.",
61+
round_start=r"\(",
62+
round_end=r"\)",
63+
square_start=r"\[",
64+
square_end=r"\]",
65+
curly_start=r"\{",
66+
curly_end=r"\}",
67+
comma=",",
68+
whitespace=r"\s+",
69+
double_string=r"\".*?\"",
70+
single_string=r"'.*?'",
71+
)
72+
73+
74+
class MarkupTokenizer(TokenizerState):
75+
"""Tokenizes Textual markup."""
76+
77+
EXPECT = expect_markup.expect_eof(True)
78+
STATE_MAP = {
79+
"open_tag": expect_markup_tag,
80+
"open_closing_tag": expect_markup_tag,
81+
"end_tag": expect_markup,
82+
"key": expect_markup_expression,
83+
}
84+
STATE_PUSH = {
85+
"round_start": expect_markup_expression,
86+
"square_start": expect_markup_expression,
87+
"curly_start": expect_markup_expression,
88+
}
89+
STATE_POP = {
90+
"round_end": "round_start",
91+
"square_end": "square_start",
92+
"curly_end": "curly_start",
93+
}
94+
2995

3096
RE_TAGS = re.compile(
3197
r"""((\\*)\[([\$a-z#/@][^[]*?)])""",
@@ -248,6 +314,68 @@ def pop_style(style_name: str) -> Tuple[int, Tag]:
248314
return content
249315

250316

317+
def to_content(markup: str, style: str | Style = "") -> Content:
318+
319+
from textual.content import Content, Span
320+
321+
tokenizer = MarkupTokenizer()
322+
text: list[str] = []
323+
iter_tokens = iter(tokenizer(markup, ("inline", "")))
324+
325+
style_stack: list[tuple[int, str]] = []
326+
327+
spans: list[Span] = []
328+
329+
position = 0
330+
tag_text: list[str]
331+
for token in iter_tokens:
332+
print(repr(token))
333+
token_name = token.name
334+
if token_name == "text":
335+
text.append(token.value)
336+
position += len(token.value)
337+
elif token_name == "open_tag":
338+
tag_text = []
339+
print("open")
340+
for token in iter_tokens:
341+
print(" ", repr(token))
342+
if token.name == "end_tag":
343+
break
344+
tag_text.append(token.value)
345+
opening_tag = "".join(tag_text)
346+
style_stack.append((position, opening_tag))
347+
348+
elif token_name == "open_closing_tag":
349+
tag_text = []
350+
print("closing")
351+
for token in iter_tokens:
352+
print(" ", repr(token))
353+
if token.name == "end_tag":
354+
break
355+
tag_text.append(token.value)
356+
closing_tag = "".join(tag_text)
357+
if closing_tag:
358+
for index, (tag_position, tag_body) in enumerate(reversed(style_stack)):
359+
if tag_body == closing_tag:
360+
style_stack.pop(-index)
361+
spans.append(Span(tag_position, position, tag_body))
362+
break
363+
364+
else:
365+
open_position, tag = style_stack.pop()
366+
spans.append(Span(open_position, position, tag))
367+
368+
content_text = "".join(text)
369+
text_length = len(content_text)
370+
while style_stack:
371+
position, tag = style_stack.pop()
372+
spans.append(Span(position, text_length, tag))
373+
374+
content = Content(content_text, spans)
375+
print(repr(content))
376+
return content
377+
378+
251379
if __name__ == "__main__": # pragma: no cover
252380
from rich.highlighter import ReprHighlighter
253381

@@ -296,7 +424,10 @@ def on_markup_changed(self, event: TextArea.Changed) -> None:
296424
results.update(event.text_area.text)
297425
except Exception as error:
298426
highlight = ReprHighlighter()
299-
results.update(highlight(str(error)))
427+
# results.update(highlight(str(error)))
428+
from rich.traceback import Traceback
429+
430+
results.update(Traceback())
300431
self.query_one("#results-container").add_class("-error")
301432
else:
302433
self.query_one("#results-container").remove_class("-error")

0 commit comments

Comments
 (0)