Skip to content

Commit f2ab30f

Browse files
committed
up lexer
1 parent 7b5a471 commit f2ab30f

File tree

6 files changed

+150
-87
lines changed

6 files changed

+150
-87
lines changed

solidity/NOTES.md

-55
Original file line numberDiff line numberDiff line change
@@ -55,58 +55,3 @@ https://pygments.org/docs/lexers/
5555

5656

5757

58-
RubyVM::AbstractSyntaxTree.parse("puts('test', )", keep_tokens: true).tokens
59-
# =>
60-
# [[0, :tIDENTIFIER, "puts", [1, 0, 1, 4]],
61-
# [1, :"(", "(", [1, 4, 1, 5]],
62-
# [2, :tSTRING_BEG, "'", [1, 5, 1, 6]],
63-
# [3, :tSTRING_CONTENT, "test", [1, 6, 1, 10]],
64-
# [4, :tSTRING_END, "'", [1, 10, 1, 11]],
65-
# [5, :",", ",", [1, 11, 1, 12]],
66-
# [6, :tSP, " ", [1, 12, 1, 13]],
67-
# [7, :")", ")", [1, 13, 1, 14]]]
68-
69-
70-
require 'ripper'
71-
require 'pp'
72-
73-
code = <<STR
74-
75-
76-
5.times do | x |
77-
puts x
78-
puts "hello"
79-
puts 'hello' ## a comment here
80-
end
81-
82-
83-
STR
84-
85-
puts code
86-
pp Ripper.lex(code)
87-
88-
89-
90-
[[[1, 0], :on_ignored_nl, "\n", BEG],
91-
[[2, 0], :on_ignored_nl, "\n", BEG],
92-
[[3, 0], :on_int, "5", END],
93-
[[3, 1], :on_period, ".", DOT],
94-
[[3, 2], :on_ident, "times", ARG],
95-
[[3, 7], :on_sp, " ", ARG],
96-
[[3, 11], :on_kw, "do", BEG],
97-
[[3, 13], :on_sp, " ", BEG],
98-
[[3, 17], :on_op, "|", BEG|LABEL],
99-
[[3, 18], :on_sp, " ", BEG|LABEL],
100-
[[3, 22], :on_ident, "x", ARG],
101-
[[3, 23], :on_sp, " ", ARG],
102-
[[3, 27], :on_op, "|", BEG|LABEL],
103-
[[3, 28], :on_ignored_nl, "\n", BEG|LABEL],
104-
[[4, 0], :on_sp, "\t", BEG|LABEL],
105-
[[4, 1], :on_ident, "puts", CMDARG],
106-
[[4, 5], :on_sp, " ", CMDARG],
107-
[[4, 6], :on_ident, "x", END|LABEL],
108-
[[4, 7], :on_nl, "\n", BEG],
109-
[[5, 0], :on_kw, "end", END],
110-
[[5, 3], :on_nl, "\n", BEG],
111-
[[6, 0], :on_ignored_nl, "\n", BEG],
112-
[[7, 0], :on_ignored_nl, "\n", BEG]]

solidity/lib/solidity/lexer.rb

+19-26
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,14 @@ def initialize( txt )
4343
## SingleQuotedStringCharacter
4444
## : ~['\r\n\\] | ('\\' .) ;
4545

46+
DOUBLE_QUOTE = %r{"
47+
( \\\\. | [^"\r\n\\] )*
48+
"}x
4649

4750
SINGLE_QUOTE = %r{'
48-
( \\\\. | [^'] )*
51+
( \\\\. | [^'\r\n\\] )*
4952
'}x
5053

51-
DOUBLE_QUOTE = %r{"
52-
( \\\\. | [^"] )*
53-
"}x
54-
5554

5655
## from the solidity grammar
5756
## > An identifier in solidity has to start with a letter,
@@ -76,40 +75,34 @@ def initialize( txt )
7675
##
7776
## COMMENT
7877
## : '/*' .*? '*/' ;
79-
##
8078
## LINE_COMMENT
8179
## : '//' ~[\r\n]* ;
8280

81+
COMMENT = %r{/\*
82+
.*?
83+
\*/}x
84+
85+
LINE_COMMENT = %r{//
86+
[^\r\n]*}x
8387

8488
def tokenize
8589
t = []
8690
s = StringScanner.new( @txt )
8791

8892
until s.eos? ## loop until hitting end-of-string (file)
89-
if s.check( /[ \t]*\/\*/ )
90-
## note: auto-slurp leading (optinal) spaces!!!! - why? why not?
91-
comment = s.scan_until( /\*\// )
92-
## print "multi-line comment:"
93-
## pp comment
94-
t << [:comment, comment.lstrip]
95-
elsif s.check( /[ \t]*\/\// )
96-
## note: auto-slurp leading (optinal) spaces!!!! - why? why not?
97-
## note: auto-remove newline AND trailing whitespace - why? why not?
98-
comment = s.scan_until( /\n|$/ ).strip
99-
## print "comment:"
100-
## pp comment
101-
t << [:comment, comment]
102-
elsif s.scan( /[ \t]+/ ) ## one or more spaces
93+
if s.scan( /[ \t]+/ ) ## one or more spaces
10394
## note: (auto-)convert tab to space - why? why not?
10495
t << [:sp, s.matched.gsub( /[\t]/, ' ') ]
10596
elsif s.scan( /\r?\n/ ) ## check for (windows) carriage return (\r) - why? why not?
10697
t << [:nl, "\n" ]
107-
elsif s.check( "'" ) ## single-quoted string
108-
str = s.scan( SINGLE_QUOTE )
109-
t << [:string, str]
110-
elsif s.check( '"' ) ## double-quoted string
111-
str = s.scan( DOUBLE_QUOTE )
112-
t << [:string, str]
98+
elsif s.scan( COMMENT )
99+
t << [:comment, s.matched]
100+
elsif s.scan( LINE_COMMENT )
101+
t << [:comment, s.matched]
102+
elsif s.scan( DOUBLE_QUOTE ) ## double-quoted string
103+
t << [:string, s.matched]
104+
elsif s.scan( SINGLE_QUOTE ) ## single-quoted string
105+
t << [:string, s.matched]
113106
elsif s.scan( NAME )
114107
name = s.matched
115108
case name

solidity/lib/solidity/parser.rb

+2-4
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,11 @@ def _quick_pass_one
2323
lex = Lexer.new( @txt )
2424

2525
until lex.eos?
26-
while lex.peek == :sp do ## note: do NOT skip newlines here; pass along blank/empty lines for now - why? why not?
27-
lex.next
28-
end
29-
3026
case lex.peek
3127
when :comment ## single or multi-line comment
3228
tree << [:comment, lex.next]
29+
## note: if next token is newline - slurp / ignore
30+
lex.next if lex.peek == :nl
3331
when :pragma
3432
code = lex.scan_until( :';',
3533
include: true )

solidity/lib/solidity/version.rb

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11

22
module Solidity
33
MAJOR = 0
4-
MINOR = 1
5-
PATCH = 5
4+
MINOR = 2
5+
PATCH = 0
66
VERSION = [MAJOR,MINOR,PATCH].join('.')
77

88
def self.version

solidity/sandbox/test_lexer_ruby.rb

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
###
2+
# test ruby built-in lexers
3+
# answer questions
4+
# does end-of-line comment include newline in lexeme - yes/no?
5+
#
6+
# - [[6, 21], :on_comment, "## a comment here\n", END],
7+
8+
9+
10+
require 'ripper'
11+
require 'pp'
12+
13+
code = <<STR
14+
15+
16+
5.times do | x |
17+
puts x
18+
puts "hello"
19+
puts 'hello' ## a comment here
20+
## another comment here
21+
## another here
22+
23+
## yet another here
24+
end
25+
26+
27+
STR
28+
29+
30+
puts code
31+
pp Ripper.lex(code)
32+
33+
34+
puts code
35+
## unknown keyword: :keep_tokens
36+
## note: requires ruby 3.2+ or such - double check!!!!
37+
pp RubyVM::AbstractSyntaxTree.parse( code,
38+
keep_tokens: true).tokens
39+
40+
# =>
41+
# [[0, :tIDENTIFIER, "puts", [1, 0, 1, 4]],
42+
# [1, :"(", "(", [1, 4, 1, 5]],
43+
# [2, :tSTRING_BEG, "'", [1, 5, 1, 6]],
44+
# [3, :tSTRING_CONTENT, "test", [1, 6, 1, 10]],
45+
# [4, :tSTRING_END, "'", [1, 10, 1, 11]],
46+
# [5, :",", ",", [1, 11, 1, 12]],
47+
# [6, :tSP, " ", [1, 12, 1, 13]],
48+
# [7, :")", ")", [1, 13, 1, 14]]]
49+
50+
51+
__END__
52+
53+
[[[1, 0], :on_ignored_nl, "\n", BEG],
54+
[[2, 0], :on_ignored_nl, "\n", BEG],
55+
[[3, 0], :on_int, "5", END],
56+
[[3, 1], :on_period, ".", DOT],
57+
[[3, 2], :on_ident, "times", ARG],
58+
[[3, 7], :on_sp, " ", ARG],
59+
[[3, 11], :on_kw, "do", BEG],
60+
[[3, 13], :on_sp, " ", BEG],
61+
[[3, 17], :on_op, "|", BEG|LABEL],
62+
[[3, 18], :on_sp, " ", BEG|LABEL],
63+
[[3, 22], :on_ident, "x", ARG],
64+
[[3, 23], :on_sp, " ", ARG],
65+
[[3, 27], :on_op, "|", BEG|LABEL],
66+
[[3, 28], :on_ignored_nl, "\n", BEG|LABEL],
67+
[[4, 0], :on_sp, "\t", BEG|LABEL],
68+
[[4, 1], :on_ident, "puts", CMDARG],
69+
[[4, 5], :on_sp, " ", CMDARG],
70+
[[4, 6], :on_ident, "x", END|LABEL],
71+
[[4, 7], :on_nl, "\n", BEG],
72+
[[5, 0], :on_sp, " ", BEG],
73+
[[5, 2], :on_ident, "puts", CMDARG],
74+
[[5, 6], :on_sp, " ", CMDARG],
75+
[[5, 7], :on_tstring_beg, "\"", CMDARG],
76+
[[5, 8], :on_tstring_content, "hello", CMDARG],
77+
[[5, 13], :on_tstring_end, "\"", END],
78+
[[5, 14], :on_nl, "\n", BEG],
79+
[[6, 0], :on_sp, " ", BEG],
80+
[[6, 2], :on_ident, "puts", CMDARG],
81+
[[6, 6], :on_sp, " ", CMDARG],
82+
[[6, 7], :on_tstring_beg, "'", CMDARG],
83+
[[6, 8], :on_tstring_content, "hello", CMDARG],
84+
[[6, 13], :on_tstring_end, "'", END],
85+
[[6, 14], :on_sp, " ", END],
86+
[[6, 21], :on_comment, "## a comment here\n", END],
87+
[[7, 0], :on_kw, "end", END],
88+
[[7, 3], :on_nl, "\n", BEG],
89+
[[8, 0], :on_ignored_nl, "\n", BEG],
90+
[[9, 0], :on_ignored_nl, "\n", BEG]]

solidity/test/test_lexer.rb

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
##
2+
# to run use
3+
# ruby -I ./lib -I ./test test/test_lexer.rb
4+
5+
6+
require 'helper'
7+
8+
9+
10+
class TestLexer < MiniTest::Test
11+
12+
def _untokenize( tokens )
13+
buf = String.new('')
14+
tokens.each do |t|
15+
buf << (t.is_a?( String ) ? t : t[1])
16+
17+
## dump some token types
18+
pp t if [:comment, :string].include?( t[0] )
19+
end
20+
buf
21+
end
22+
23+
24+
def test_contracts
25+
['contract1',
26+
'contract2',
27+
'contract3'].each do |name, exp|
28+
path = "./contracts/#{name}.sol"
29+
lexer = Solidity::Lexer.read( path )
30+
31+
tokens = lexer.tokenize
32+
33+
txt = read_text( path )
34+
assert_equal txt, _untokenize( tokens )
35+
end
36+
end
37+
end ## class TestLexer

0 commit comments

Comments
 (0)