Skip to content

Commit 7d3c967

Browse files
author
KristofferC
committed
get rid of the startpos field
this makes untokenization slightly more annoying
1 parent 2e1e8e7 commit 7d3c967

File tree

2 files changed

+231
-193
lines changed

2 files changed

+231
-193
lines changed

src/tokenize.jl

+6-30
Original file line numberDiff line numberDiff line change
@@ -25,32 +25,23 @@ TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}(
2525
struct Token
2626
kind::Kind
2727
# Offsets into a string or buffer
28-
startbyte::Int # The byte where the token start in the buffer
2928
endbyte::Int # The byte where the token ended in the buffer
3029
dotop::Bool
3130
suffix::Bool
3231
end
33-
function Token(kind::Kind, startbyte::Int, endbyte::Int)
34-
Token(kind, startbyte, endbyte, false, false)
32+
function Token(kind::Kind, endbyte::Int)
33+
Token(kind, endbyte, false, false)
3534
end
36-
Token() = Token(K"error", 0, 0, false, false)
35+
Token() = Token(K"error", 0, false, false)
3736

3837
const EMPTY_TOKEN = Token()
3938

4039
kind(t::Token) = t.kind
4140

42-
startbyte(t::Token) = t.startbyte
4341
endbyte(t::Token) = t.endbyte
4442

4543

46-
function untokenize(t::Token, str::String)
47-
String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)])
48-
end
4944

50-
function Base.show(io::IO, t::Token)
51-
print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " "))
52-
print(io, rpad(kind(t), 15, " "))
53-
end
5445

5546
#-------------------------------------------------------------------------------
5647
# Lexer
@@ -77,9 +68,7 @@ Ideally a lexer is stateless but some state is needed here for:
7768
"""
7869
mutable struct Lexer{IO_t <: IO}
7970
io::IO_t
80-
8171
token_startpos::Int
82-
8372
last_token::Kind
8473
string_states::Vector{StringState}
8574
chars::Tuple{Char,Char,Char,Char}
@@ -156,13 +145,6 @@ Return the latest `Token`'s starting position.
156145
"""
157146
startpos(l::Lexer) = l.token_startpos
158147

159-
"""
160-
startpos!(l::Lexer, i::Integer)
161-
162-
Set a new starting position.
163-
"""
164-
startpos!(l::Lexer, i::Integer) = l.token_startpos = i
165-
166148
"""
167149
peekchar(l::Lexer)
168150
@@ -171,14 +153,14 @@ Returns the next character without changing the lexer's state.
171153
peekchar(l::Lexer) = l.chars[2]
172154

173155
"""
174-
dpeekchar(l::Lexer)
156+
dpeekchar(l::Lexer)
175157
176158
Returns the next two characters without changing the lexer's state.
177159
"""
178160
dpeekchar(l::Lexer) = l.chars[2], l.chars[3]
179161

180162
"""
181-
peekchar3(l::Lexer)
163+
peekchar3(l::Lexer)
182164
183165
Returns the next three characters without changing the lexer's state.
184166
"""
@@ -198,8 +180,6 @@ Determine whether the end of the lexer's underlying buffer has been reached.
198180
"""
199181
Base.eof(l::Lexer) = eof(l.io)
200182

201-
Base.seek(l::Lexer, pos) = seek(l.io, pos)
202-
203183
"""
204184
start_token!(l::Lexer)
205185
@@ -215,9 +195,6 @@ end
215195
216196
Returns the next character and increments the current position.
217197
"""
218-
function readchar end
219-
220-
221198
function readchar(l::Lexer)
222199
c = readchar(l.io)
223200
l.chars = (l.chars[2], l.chars[3], l.chars[4], c)
@@ -271,8 +248,7 @@ function emit(l::Lexer, kind::Kind)
271248
suffix = true
272249
end
273250
end
274-
275-
tok = Token(kind, startpos(l), position(l) - 1, l.dotop, suffix)
251+
tok = Token(kind, position(l) - 1, l.dotop, suffix)
276252

277253
l.dotop = false
278254
l.last_token = kind

0 commit comments

Comments
 (0)