From ed8a8d1b72e4ba945bc89425ee0b40cf782aa6f3 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Sun, 9 Feb 2025 14:15:06 +0100 Subject: [PATCH 01/18] Add relative mode for tokenizer --- lib/elixir/src/elixir.hrl | 4 +- lib/elixir/src/elixir_interpolation.erl | 1 + lib/elixir/src/elixir_tokenizer.erl | 372 ++++++++++++++-------- lib/elixir/test/erlang/tokenizer_test.erl | 7 +- 4 files changed, 253 insertions(+), 131 deletions(-) diff --git a/lib/elixir/src/elixir.hrl b/lib/elixir/src/elixir.hrl index eee33351ea..ef661d32f7 100644 --- a/lib/elixir/src/elixir.hrl +++ b/lib/elixir/src/elixir.hrl @@ -77,5 +77,7 @@ indentation=0, column=1, mismatch_hints=[], - warnings=[] + warnings=[], + mode=absolute, + prev_pos={1, 1} }). diff --git a/lib/elixir/src/elixir_interpolation.erl b/lib/elixir/src/elixir_interpolation.erl index 16670d5f3e..004fb96f1a 100644 --- a/lib/elixir/src/elixir_interpolation.erl +++ b/lib/elixir/src/elixir_interpolation.erl @@ -123,6 +123,7 @@ strip_horizontal_space(T, Buffer, Counter) -> {T, Buffer, Counter}. cursor_complete(Line, Column, Terminators) -> + % TODO handle relative position in inserted cursor lists:mapfoldl( fun({Start, _, _}, AccColumn) -> End = elixir_tokenizer:terminator(Start), diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 231466d28f..bffe4a2afa 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -5,7 +5,7 @@ -module(elixir_tokenizer). -include("elixir.hrl"). -include("elixir_tokenizer.hrl"). --export([tokenize/1, tokenize/3, tokenize/4, invalid_do_error/1, terminator/1]). +-export([tokenize/1, tokenize/3, tokenize/4, invalid_do_error/1, terminator/1, to_absolute_tokens/2]). -define(at_op(T), T =:= $@). @@ -110,6 +110,12 @@ -define(xor_op3(T1, T2, T3), T1 =:= $^, T2 =:= $^, T3 =:= $^). +%% Computes a relative position given the previous and current absolute positions. +token_position({Line, Col}, #elixir_tokenizer{mode=absolute} = Scope) -> + {{Line, Col}, Scope}; +token_position({Line, Col}, #elixir_tokenizer{mode=relative, prev_pos={PrevLine, PrevCol}} = Scope) -> + {{Line - PrevLine, Col - PrevCol}, Scope#elixir_tokenizer{prev_pos={Line, Col}}}. + tokenize(String, Line, Column, #elixir_tokenizer{} = Scope) -> tokenize(String, Line, Column, Scope, []); @@ -130,6 +136,8 @@ tokenize(String, Line, Column, Opts) -> Acc#elixir_tokenizer{preserve_comments=PreserveComments}; ({unescape, Unescape}, Acc) when is_boolean(Unescape) -> Acc#elixir_tokenizer{unescape=Unescape}; + ({mode, Mode}, Acc) when Mode =:= absolute; Mode =:= relative -> + Acc#elixir_tokenizer{mode=Mode}; (_, Acc) -> Acc end, #elixir_tokenizer{identifier_tokenizer=IdentifierTokenizer, column=Column}, Opts), @@ -179,18 +187,21 @@ tokenize(("<<<<<<<" ++ _) = Original, Line, 1, Scope, Tokens) -> tokenize([$0, $x, H | T], Line, Column, Scope, Tokens) when ?is_hex(H) -> {Rest, Number, OriginalRepresentation, Length} = tokenize_hex(T, [H], 1), - Token = {int, {Line, Column, Number}, OriginalRepresentation}, - tokenize(Rest, Line, Column + 2 + Length, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {int, {Line1, Column1, Number}, OriginalRepresentation}, + tokenize(Rest, Line, Column + 2 + Length, Scope1, [Token | Tokens]); tokenize([$0, $b, H | T], Line, Column, Scope, Tokens) when ?is_bin(H) -> {Rest, Number, OriginalRepresentation, Length} = tokenize_bin(T, [H], 1), - Token = {int, {Line, Column, Number}, OriginalRepresentation}, - tokenize(Rest, Line, Column + 2 + Length, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {int, {Line1, Column1, Number}, OriginalRepresentation}, + tokenize(Rest, Line, Column + 2 + Length, Scope1, [Token | Tokens]); tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) -> {Rest, Number, OriginalRepresentation, Length} = tokenize_octal(T, [H], 1), - Token = {int, {Line, Column, Number}, OriginalRepresentation}, - tokenize(Rest, Line, Column + 2 + Length, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {int, {Line1, Column1, Number}, OriginalRepresentation}, + tokenize(Rest, Line, Column + 2 + Length, Scope1, [Token | Tokens]); % Comments @@ -238,8 +249,9 @@ tokenize([$?, $\\, H | T], Line, Column, Scope, Tokens) -> Scope end, - Token = {char, {Line, Column, [$?, $\\, H]}, Char}, - tokenize(T, Line, Column + 3, NewScope, [Token | Tokens]); + {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope), + Token = {char, {Line1, Column1, [$?, $\\, H]}, Char}, + tokenize(T, Line, Column + 3, NewScope1, [Token | Tokens]); tokenize([$?, Char | T], Line, Column, Scope, Tokens) -> NewScope = case handle_char(Char) of @@ -250,8 +262,9 @@ tokenize([$?, Char | T], Line, Column, Scope, Tokens) -> false -> Scope end, - Token = {char, {Line, Column, [$?, Char]}, Char}, - tokenize(T, Line, Column + 2, NewScope, [Token | Tokens]); + {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope), + Token = {char, {Line1, Column1, [$?, Char]}, Char}, + tokenize(T, Line, Column + 2, NewScope1, [Token | Tokens]); % Heredocs @@ -275,72 +288,90 @@ tokenize([$' | T], Line, Column, Scope, Tokens) -> % Operator atoms tokenize(".:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 2, Scope, [{kw_identifier, {Line, Column, nil}, '.'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 2, Scope1, [{kw_identifier, {Line1, Column1, nil}, '.'} | Tokens]); tokenize("<<>>:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 5, Scope, [{kw_identifier, {Line, Column, nil}, '<<>>'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 5, Scope1, [{kw_identifier, {Line1, Column1, nil}, '<<>>'} | Tokens]); tokenize("%{}:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 4, Scope, [{kw_identifier, {Line, Column, nil}, '%{}'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 4, Scope1, [{kw_identifier, {Line1, Column1, nil}, '%{}'} | Tokens]); tokenize("%:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 2, Scope, [{kw_identifier, {Line, Column, nil}, '%'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 2, Scope1, [{kw_identifier, {Line1, Column1, nil}, '%'} | Tokens]); tokenize("&:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 2, Scope, [{kw_identifier, {Line, Column, nil}, '&'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 2, Scope1, [{kw_identifier, {Line1, Column1, nil}, '&'} | Tokens]); tokenize("{}:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 3, Scope, [{kw_identifier, {Line, Column, nil}, '{}'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 3, Scope1, [{kw_identifier, {Line1, Column1, nil}, '{}'} | Tokens]); tokenize("..//:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 5, Scope, [{kw_identifier, {Line, Column, nil}, '..//'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 5, Scope1, [{kw_identifier, {Line1, Column1, nil}, '..//'} | Tokens]); tokenize(":<<>>" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 5, Scope, [{atom, {Line, Column, nil}, '<<>>'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 5, Scope1, [{atom, {Line1, Column1, nil}, '<<>>'} | Tokens]); tokenize(":%{}" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, Column, nil}, '%{}'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 4, Scope1, [{atom, {Line1, Column1, nil}, '%{}'} | Tokens]); tokenize(":%" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 2, Scope, [{atom, {Line, Column, nil}, '%'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 2, Scope1, [{atom, {Line1, Column1, nil}, '%'} | Tokens]); tokenize(":{}" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 3, Scope, [{atom, {Line, Column, nil}, '{}'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 3, Scope1, [{atom, {Line1, Column1, nil}, '{}'} | Tokens]); tokenize(":..//" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 5, Scope, [{atom, {Line, Column, nil}, '..//'} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 5, Scope1, [{atom, {Line1, Column1, nil}, '..//'} | Tokens]); % ## Three Token Operators tokenize([$:, T1, T2, T3 | Rest], Line, Column, Scope, Tokens) when ?unary_op3(T1, T2, T3); ?comp_op3(T1, T2, T3); ?and_op3(T1, T2, T3); ?or_op3(T1, T2, T3); ?arrow_op3(T1, T2, T3); ?xor_op3(T1, T2, T3); ?concat_op3(T1, T2, T3); ?ellipsis_op3(T1, T2, T3) -> - Token = {atom, {Line, Column, nil}, list_to_atom([T1, T2, T3])}, - tokenize(Rest, Line, Column + 4, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {atom, {Line1, Column1, nil}, list_to_atom([T1, T2, T3])}, + tokenize(Rest, Line, Column + 4, Scope1, [Token | Tokens]); % ## Two Token Operators tokenize([$:, $:, $: | Rest], Line, Column, Scope, Tokens) -> Message = "atom ::: must be written between quotes, as in :\"::\", to avoid ambiguity", NewScope = prepend_warning(Line, Column, Message, Scope), - Token = {atom, {Line, Column, nil}, '::'}, - tokenize(Rest, Line, Column + 3, NewScope, [Token | Tokens]); + {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope), + Token = {atom, {Line1, Column1, nil}, '::'}, + tokenize(Rest, Line, Column + 3, NewScope1, [Token | Tokens]); tokenize([$:, T1, T2 | Rest], Line, Column, Scope, Tokens) when ?comp_op2(T1, T2); ?rel_op2(T1, T2); ?and_op(T1, T2); ?or_op(T1, T2); ?arrow_op(T1, T2); ?in_match_op(T1, T2); ?concat_op(T1, T2); ?power_op(T1, T2); ?stab_op(T1, T2); ?range_op(T1, T2) -> - Token = {atom, {Line, Column, nil}, list_to_atom([T1, T2])}, - tokenize(Rest, Line, Column + 3, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {atom, {Line1, Column1, nil}, list_to_atom([T1, T2])}, + tokenize(Rest, Line, Column + 3, Scope1, [Token | Tokens]); % ## Single Token Operators tokenize([$:, T | Rest], Line, Column, Scope, Tokens) when ?at_op(T); ?unary_op(T); ?capture_op(T); ?dual_op(T); ?mult_op(T); ?rel_op(T); ?match_op(T); ?pipe_op(T); T =:= $. -> - Token = {atom, {Line, Column, nil}, list_to_atom([T])}, - tokenize(Rest, Line, Column + 2, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {atom, {Line1, Column1, nil}, list_to_atom([T])}, + tokenize(Rest, Line, Column + 2, Scope1, [Token | Tokens]); % ## Stand-alone tokens tokenize("=>" ++ Rest, Line, Column, Scope, Tokens) -> - Token = {assoc_op, {Line, Column, previous_was_eol(Tokens)}, '=>'}, - tokenize(Rest, Line, Column + 2, Scope, add_token_with_eol(Token, Tokens)); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {assoc_op, {Line1, Column1, previous_was_eol(Tokens)}, '=>'}, + tokenize(Rest, Line, Column + 2, Scope1, add_token_with_eol(Scope1, Token, Tokens)); tokenize("..//" ++ Rest = String, Line, Column, Scope, Tokens) -> case strip_horizontal_space(Rest, 0) of {[$/ | _] = Remaining, Extra} -> - Token = {identifier, {Line, Column, nil}, '..//'}, - tokenize(Remaining, Line, Column + 4 + Extra, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {identifier, {Line1, Column1, nil}, '..//'}, + tokenize(Remaining, Line, Column + 4 + Extra, Scope1, [Token | Tokens]); {_, _} -> unexpected_token(String, Line, Column, Scope, Tokens) end; @@ -378,16 +409,19 @@ tokenize([T1, T2, T3 | Rest], Line, Column, Scope, Tokens) when ?arrow_op3(T1, T % ## Containers + punctuation tokens tokenize([$, | Rest], Line, Column, Scope, Tokens) -> - Token = {',', {Line, Column, 0}}, - tokenize(Rest, Line, Column + 1, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {',', {Line1, Column1, 0}}, + tokenize(Rest, Line, Column + 1, Scope1, [Token | Tokens]); tokenize([$<, $< | Rest], Line, Column, Scope, Tokens) -> - Token = {'<<', {Line, Column, nil}}, - handle_terminator(Rest, Line, Column + 2, Scope, Token, Tokens); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {'<<', {Line1, Column1, nil}}, + handle_terminator(Rest, Line, Column + 2, Scope1, Token, Tokens); tokenize([$>, $> | Rest], Line, Column, Scope, Tokens) -> - Token = {'>>', {Line, Column, previous_was_eol(Tokens)}}, - handle_terminator(Rest, Line, Column + 2, Scope, Token, Tokens); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {'>>', {Line1, Column1, previous_was_eol(Tokens)}}, + handle_terminator(Rest, Line, Column + 2, Scope1, Token, Tokens); tokenize([${ | Rest], Line, Column, Scope, [{'%', _} | _] = Tokens) -> Message = @@ -398,18 +432,21 @@ tokenize([${ | Rest], Line, Column, Scope, [{'%', _} | _] = Tokens) -> error({?LOC(Line, Column), Message, [${]}, Rest, Scope, Tokens); tokenize([T | Rest], Line, Column, Scope, Tokens) when T =:= $(; T =:= ${; T =:= $[ -> - Token = {list_to_atom([T]), {Line, Column, nil}}, - handle_terminator(Rest, Line, Column + 1, Scope, Token, Tokens); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {list_to_atom([T]), {Line1, Column1, nil}}, + handle_terminator(Rest, Line, Column + 1, Scope1, Token, Tokens); tokenize([T | Rest], Line, Column, Scope, Tokens) when T =:= $); T =:= $}; T =:= $] -> - Token = {list_to_atom([T]), {Line, Column, previous_was_eol(Tokens)}}, - handle_terminator(Rest, Line, Column + 1, Scope, Token, Tokens); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {list_to_atom([T]), {Line1, Column1, previous_was_eol(Tokens)}}, + handle_terminator(Rest, Line, Column + 1, Scope1, Token, Tokens); % ## Two Token Operators tokenize([T1, T2 | Rest], Line, Column, Scope, Tokens) when ?ternary_op(T1, T2) -> Op = list_to_atom([T1, T2]), - Token = {ternary_op, {Line, Column, previous_was_eol(Tokens)}, Op}, - tokenize(Rest, Line, Column + 2, Scope, add_token_with_eol(Token, Tokens)); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {ternary_op, {Line1, Column1, previous_was_eol(Tokens)}, Op}, + tokenize(Rest, Line, Column + 2, Scope1, add_token_with_eol(Scope1, Token, Tokens)); tokenize([T1, T2 | Rest], Line, Column, Scope, Tokens) when ?power_op(T1, T2) -> handle_op(Rest, Line, Column, power_op, 2, list_to_atom([T1, T2]), Scope, Tokens); @@ -462,8 +499,9 @@ tokenize([$& | Rest], Line, Column, Scope, Tokens) -> capture_op end, - Token = {Kind, {Line, Column, nil}, '&'}, - tokenize(Rest, Line, Column + 1, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {Kind, {Line1, Column1, nil}, '&'}, + tokenize(Rest, Line, Column + 1, Scope1, [Token | Tokens]); tokenize([T | Rest], Line, Column, Scope, Tokens) when ?at_op(T) -> handle_unary_op(Rest, Line, Column, at_op, 1, list_to_atom([T]), Scope, Tokens); @@ -518,8 +556,9 @@ tokenize([$:, H | T] = Original, Line, Column, BaseScope, Tokens) when ?is_quote {ok, [Part]} when is_binary(Part) -> case unsafe_to_atom(Part, Line, Column, Scope) of {ok, Atom} -> - Token = {atom_quoted, {Line, Column, H}, Atom}, - tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]); + {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope), + Token = {atom_quoted, {Line1, Column1, H}, Atom}, + tokenize(Rest, NewLine, NewColumn, NewScope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, NewScope, Tokens) @@ -530,8 +569,9 @@ tokenize([$:, H | T] = Original, Line, Column, BaseScope, Tokens) when ?is_quote true -> atom_safe; false -> atom_unsafe end, - Token = {Key, {Line, Column, H}, Unescaped}, - tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]); + {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope), + Token = {Key, {Line1, Column1, H}, Unescaped}, + tokenize(Rest, NewLine, NewColumn, NewScope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, NewScope, Tokens) @@ -547,8 +587,9 @@ tokenize([$: | String] = Original, Line, Column, Scope, Tokens) -> {_Kind, Unencoded, Atom, Rest, Length, Ascii, _Special} -> NewScope = maybe_warn_for_ambiguous_bang_before_equals(atom, Unencoded, Rest, Line, Column, Scope), TrackedScope = track_ascii(Ascii, NewScope), - Token = {atom, {Line, Column, Unencoded}, Atom}, - tokenize(Rest, Line, Column + 1 + Length, TrackedScope, [Token | Tokens]); + {{Line1, Column1}, TrackedScope1} = token_position({Line, Column}, TrackedScope), + Token = {atom, {Line1, Column1, Unencoded}, Atom}, + tokenize(Rest, Line, Column + 1 + Length, TrackedScope1, [Token | Tokens]); empty when Scope#elixir_tokenizer.cursor_completion == false -> unexpected_token(Original, Line, Column, Scope, Tokens); empty -> @@ -586,11 +627,13 @@ tokenize([H | T], Line, Column, Scope, Tokens) when ?is_digit(H) -> error({?LOC(Line, Column), Msg, [I]}, T, Scope, Tokens) end; {Rest, Number, Original, Length} when is_integer(Number) -> - Token = {int, {Line, Column, Number}, Original}, - tokenize(Rest, Line, Column + Length, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {int, {Line1, Column1, Number}, Original}, + tokenize(Rest, Line, Column + Length, Scope1, [Token | Tokens]); {Rest, Number, Original, Length} -> - Token = {flt, {Line, Column, Number}, Original}, - tokenize(Rest, Line, Column + Length, Scope, [Token | Tokens]) + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {flt, {Line1, Column1, Number}, Original}, + tokenize(Rest, Line, Column + Length, Scope1, [Token | Tokens]) end; % Spaces @@ -602,10 +645,12 @@ tokenize([T | Rest], Line, Column, Scope, Tokens) when ?is_horizontal_space(T) - % End of line tokenize(";" ++ Rest, Line, Column, Scope, []) -> - tokenize(Rest, Line, Column + 1, Scope, [{';', {Line, Column, 0}}]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 1, Scope1, [{';', {Line1, Column1, 0}}]); tokenize(";" ++ Rest, Line, Column, Scope, [Top | _] = Tokens) when element(1, Top) /= ';' -> - tokenize(Rest, Line, Column + 1, Scope, [{';', {Line, Column, 0}} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(Rest, Line, Column + 1, Scope1, [{';', {Line1, Column1, 0}} | Tokens]); tokenize("\\" = Original, Line, Column, Scope, Tokens) -> error({?LOC(Line, Column), "invalid escape \\ at end of file", []}, Original, Scope, Tokens); @@ -623,10 +668,12 @@ tokenize("\\\r\n" ++ Rest, Line, _Column, Scope, Tokens) -> tokenize_eol(Rest, Line, Scope, Tokens); tokenize("\n" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize_eol(Rest, Line, Scope, eol(Line, Column, Tokens)); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize_eol(Rest, Line, Scope1, eol(Line1, Column1, Tokens)); tokenize("\r\n" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize_eol(Rest, Line, Scope, eol(Line, Column, Tokens)); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize_eol(Rest, Line, Scope1, eol(Line1, Column1, Tokens)); % Others @@ -639,11 +686,14 @@ tokenize([$%, $[ | Rest], Line, Column, Scope, Tokens) -> error(Reason, Rest, Scope, Tokens); tokenize([$%, ${ | T], Line, Column, Scope, Tokens) -> - Token = {'{', {Line, Column, nil}}, - handle_terminator(T, Line, Column + 2, Scope, Token, [{'%{}', {Line, Column, nil}} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + {{Line2, Column2}, Scope2} = token_position({Line, Column}, Scope1), + Token = {'{', {Line2, Column2, nil}}, + handle_terminator(T, Line, Column + 2, Scope2, Token, [{'%{}', {Line1, Column1, nil}} | Tokens]); tokenize([$% | T], Line, Column, Scope, Tokens) -> - tokenize(T, Line, Column + 1, Scope, [{'%', {Line, Column, nil}} | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + tokenize(T, Line, Column + 1, Scope1, [{'%', {Line1, Column1, nil}} | Tokens]); tokenize([$. | T], Line, Column, Scope, Tokens) -> tokenize_dot(T, Line, Column + 1, {Line, Column, nil}, Scope, Tokens); @@ -658,8 +708,9 @@ tokenize(String, Line, Column, OriginalScope, Tokens) -> case Rest of [$: | T] when ?is_space(hd(T)) -> - Token = {kw_identifier, {Line, Column, Unencoded}, Atom}, - tokenize(T, Line, Column + Length + 1, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {kw_identifier, {Line1, Column1, Unencoded}, Atom}, + tokenize(T, Line, Column + Length + 1, Scope1, [Token | Tokens]); [$: | T] when hd(T) =/= $: -> AtomName = atom_to_list(Atom) ++ [$:], @@ -678,8 +729,9 @@ tokenize(String, Line, Column, OriginalScope, Tokens) -> _ when Kind == identifier -> NewScope = maybe_warn_for_ambiguous_bang_before_equals(identifier, Unencoded, Rest, Line, Column, Scope), - Token = check_call_identifier(Line, Column, Unencoded, Atom, Rest), - tokenize(Rest, Line, Column + Length, NewScope, [Token | Tokens]); + {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope), + Token = check_call_identifier(Line1, Column1, Unencoded, Atom, Rest), + tokenize(Rest, Line, Column + Length, NewScope1, [Token | Tokens]); _ -> unexpected_token(String, Line, Column, Scope, Tokens) @@ -767,8 +819,9 @@ handle_heredocs(T, Line, Column, H, Scope, Tokens) -> {ok, NewLine, NewColumn, Parts, Rest, NewScope} -> case unescape_tokens(Parts, Line, Column, NewScope) of {ok, Unescaped} -> - Token = {heredoc_type(H), {Line, Column, nil}, NewColumn - 4, Unescaped}, - tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]); + {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope), + Token = {heredoc_type(H), {Line1, Column1, nil}, NewColumn - 4, Unescaped}, + tokenize(Rest, NewLine, NewColumn, NewScope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, Scope, Tokens) @@ -808,8 +861,9 @@ handle_strings(T, Line, Column, H, Scope, Tokens) -> {ok, [Part]} when is_binary(Part) -> case unsafe_to_atom(Part, Line, Column - 1, Scope) of {ok, Atom} -> - Token = {kw_identifier, {Line, Column - 1, H}, Atom}, - tokenize(Rest, NewLine, NewColumn + 1, NewScope, [Token | Tokens]); + {{Line1, Column1}, NewScope1} = token_position({Line, Column - 1}, NewScope), + Token = {kw_identifier, {Line1, Column1, H}, Atom}, + tokenize(Rest, NewLine, NewColumn + 1, NewScope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, NewScope, Tokens) end; @@ -819,8 +873,9 @@ handle_strings(T, Line, Column, H, Scope, Tokens) -> true -> kw_identifier_safe; false -> kw_identifier_unsafe end, - Token = {Key, {Line, Column - 1, H}, Unescaped}, - tokenize(Rest, NewLine, NewColumn + 1, NewScope, [Token | Tokens]); + {{Line1, Column1}, NewScope1} = token_position({Line, Column - 1}, NewScope), + Token = {Key, {Line1, Column1, H}, Unescaped}, + tokenize(Rest, NewLine, NewColumn + 1, NewScope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, NewScope, Tokens) @@ -842,8 +897,9 @@ handle_strings(T, Line, Column, H, Scope, Tokens) -> case unescape_tokens(Parts, Line, Column, NewScope) of {ok, Unescaped} -> - Token = {string_type(H), {Line, Column - 1, nil}, Unescaped}, - tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]); + {{Line1, Column1}, NewScope1} = token_position({Line, Column - 1}, NewScope), + Token = {string_type(H), {Line1, Column1, nil}, Unescaped}, + tokenize(Rest, NewLine, NewColumn, NewScope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, NewScope, Tokens) @@ -851,28 +907,33 @@ handle_strings(T, Line, Column, H, Scope, Tokens) -> end. handle_unary_op([$: | Rest], Line, Column, _Kind, Length, Op, Scope, Tokens) when ?is_space(hd(Rest)) -> - Token = {kw_identifier, {Line, Column, nil}, Op}, - tokenize(Rest, Line, Column + Length + 1, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {kw_identifier, {Line1, Column1, nil}, Op}, + tokenize(Rest, Line, Column + Length + 1, Scope1, [Token | Tokens]); handle_unary_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) -> case strip_horizontal_space(Rest, 0) of {[$/ | _] = Remaining, Extra} -> - Token = {identifier, {Line, Column, nil}, Op}, - tokenize(Remaining, Line, Column + Length + Extra, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {identifier, {Line1, Column1, nil}, Op}, + tokenize(Remaining, Line, Column + Length + Extra, Scope1, [Token | Tokens]); {Remaining, Extra} -> - Token = {Kind, {Line, Column, nil}, Op}, - tokenize(Remaining, Line, Column + Length + Extra, Scope, [Token | Tokens]) + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {Kind, {Line1, Column1, nil}, Op}, + tokenize(Remaining, Line, Column + Length + Extra, Scope1, [Token | Tokens]) end. handle_op([$: | Rest], Line, Column, _Kind, Length, Op, Scope, Tokens) when ?is_space(hd(Rest)) -> - Token = {kw_identifier, {Line, Column, nil}, Op}, - tokenize(Rest, Line, Column + Length + 1, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {kw_identifier, {Line1, Column1, nil}, Op}, + tokenize(Rest, Line, Column + Length + 1, Scope1, [Token | Tokens]); handle_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) -> case strip_horizontal_space(Rest, 0) of {[$/ | _] = Remaining, Extra} -> - Token = {identifier, {Line, Column, nil}, Op}, - tokenize(Remaining, Line, Column + Length + Extra, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {identifier, {Line1, Column1, nil}, Op}, + tokenize(Remaining, Line, Column + Length + Extra, Scope1, [Token | Tokens]); {Remaining, Extra} -> NewScope = %% TODO: Remove these deprecations on Elixir v2.0 @@ -893,8 +954,9 @@ handle_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) -> Scope end, - Token = {Kind, {Line, Column, previous_was_eol(Tokens)}, Op}, - tokenize(Remaining, Line, Column + Length + Extra, NewScope, add_token_with_eol(Token, Tokens)) + {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope), + Token = {Kind, {Line1, Column1, previous_was_eol(Tokens)}, Op}, + tokenize(Remaining, Line, Column + Length + Extra, NewScope1, add_token_with_eol(NewScope1, Token, Tokens)) end. % ## Three Token Operators @@ -918,8 +980,10 @@ handle_dot([$., T | Rest], Line, Column, DotInfo, Scope, Tokens) when % ## Exception for .( as it needs to be treated specially in the parser handle_dot([$., $( | Rest], Line, Column, DotInfo, Scope, Tokens) -> - TokensSoFar = add_token_with_eol({dot_call_op, DotInfo, '.'}, Tokens), - tokenize([$( | Rest], Line, Column, Scope, TokensSoFar); + {DotLine, DotColumn, DotOther} = DotInfo, + {{DotLine1, DotColumn1}, Scope1} = token_position({DotLine, DotColumn}, Scope), + TokensSoFar = add_token_with_eol(Scope1, {dot_call_op, {DotLine1, DotColumn1, DotOther}, '.'}, Tokens), + tokenize([$( | Rest], Line, Column, Scope1, TokensSoFar); handle_dot([$., H | T] = Original, Line, Column, DotInfo, BaseScope, Tokens) when ?is_quote(H) -> Scope = case H == $' of @@ -950,9 +1014,14 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, BaseScope, Tokens) whe case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of {ok, Atom} -> - Token = check_call_identifier(Line, Column, H, Atom, Rest), - TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens), - tokenize(Rest, NewLine, NewColumn, NewScope, [Token | TokensSoFar]); + {DotLine, DotColumn, DotOther} = DotInfo, + {{DotLine1, DotColumn1}, NewScope1} = token_position({DotLine, DotColumn}, NewScope), + TokensSoFar = add_token_with_eol(NewScope1, {'.', {DotLine1, DotColumn1, DotOther}}, Tokens), + + {{Line1, Column1}, NewScope2} = token_position({Line, Column}, NewScope1), + Token = check_call_identifier(Line1, Column1, H, Atom, Rest), + + tokenize(Rest, NewLine, NewColumn, NewScope2, [Token | TokensSoFar]); {error, Reason} -> error(Reason, Original, NewScope, Tokens) @@ -965,13 +1034,19 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, BaseScope, Tokens) whe end; handle_dot([$. | Rest], Line, Column, DotInfo, Scope, Tokens) -> - TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens), - tokenize(Rest, Line, Column, Scope, TokensSoFar). + {DotLine, DotColumn, DotOther} = DotInfo, + {{DotLine1, DotColumn1}, Scope1} = token_position({DotLine, DotColumn}, Scope), + + TokensSoFar = add_token_with_eol(Scope1, {'.', {DotLine1, DotColumn1, DotOther}}, Tokens), + tokenize(Rest, Line, Column, Scope1, TokensSoFar). handle_call_identifier(Rest, Line, Column, DotInfo, Length, UnencodedOp, Scope, Tokens) -> - Token = check_call_identifier(Line, Column, UnencodedOp, list_to_atom(UnencodedOp), Rest), - TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens), - tokenize(Rest, Line, Column + Length, Scope, [Token | TokensSoFar]). + {DotLine, DotColumn, DotOther} = DotInfo, + {{DotLine1, DotColumn1}, Scope1} = token_position({DotLine, DotColumn}, Scope), + TokensSoFar = add_token_with_eol(Scope1, {'.', {DotLine1, DotColumn1, DotOther}}, Tokens), + {{Line1, Column1}, Scope2} = token_position({Line, Column}, Scope1), + Token = check_call_identifier(Line1, Column1, UnencodedOp, list_to_atom(UnencodedOp), Rest), + tokenize(Rest, Line, Column + Length, Scope2, [Token | TokensSoFar]). % ## Ambiguous unary/binary operators tokens % Keywords are not ambiguous operators @@ -982,13 +1057,15 @@ handle_space_sensitive_tokens([Sign, $:, Space | _] = String, Line, Column, Scop handle_space_sensitive_tokens([Sign, NotMarker | T], Line, Column, Scope, [{identifier, _, _} = H | Tokens]) when ?dual_op(Sign), not(?is_space(NotMarker)), NotMarker =/= Sign, NotMarker =/= $/, NotMarker =/= $> -> Rest = [NotMarker | T], - DualOpToken = {dual_op, {Line, Column, nil}, list_to_atom([Sign])}, - tokenize(Rest, Line, Column + 1, Scope, [DualOpToken, setelement(1, H, op_identifier) | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + DualOpToken = {dual_op, {Line1, Column1, nil}, list_to_atom([Sign])}, + tokenize(Rest, Line, Column + 1, Scope1, [DualOpToken, setelement(1, H, op_identifier) | Tokens]); % Handle cursor completion handle_space_sensitive_tokens([], Line, Column, #elixir_tokenizer{cursor_completion=Cursor} = Scope, [{identifier, Info, Identifier} | Tokens]) when Cursor /= false -> + % no need to update positions tokenize([$(], Line, Column+1, Scope, [{paren_identifier, Info, Identifier} | Tokens]); handle_space_sensitive_tokens(String, Line, Column, Scope, Tokens) -> @@ -1356,8 +1433,9 @@ tokenize_alias(Rest, Line, Column, Unencoded, Atom, Length, Ascii, Special, Scop error(Reason, Unencoded ++ Rest, Scope, Tokens); true -> - AliasesToken = {alias, {Line, Column, Unencoded}, Atom}, - tokenize(Rest, Line, Column + Length, Scope, [AliasesToken | Tokens]) + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + AliasesToken = {alias, {Line1, Column1, Unencoded}, Atom}, + tokenize(Rest, Line, Column + Length, Scope1, [AliasesToken | Tokens]) end. %% Check if it is a call identifier (paren | bracket | do) @@ -1369,9 +1447,19 @@ check_call_identifier(Line, Column, Info, Atom, [$[ | _]) -> check_call_identifier(Line, Column, Info, Atom, _Rest) -> {identifier, {Line, Column, Info}, Atom}. -add_token_with_eol({unary_op, _, _} = Left, T) -> [Left | T]; -add_token_with_eol(Left, [{eol, _} | T]) -> [Left | T]; -add_token_with_eol(Left, T) -> [Left | T]. +add_token_with_eol(_Scope, {unary_op, _, _} = Left, T) -> [Left | T]; +add_token_with_eol(#elixir_tokenizer{mode=absolute}, Left, [{eol, _} | T]) -> [Left | T]; +add_token_with_eol(#elixir_tokenizer{mode=relative}, Left, [{eol, {EolLineDiff, EolColumnDiff, _}} | T]) -> + LeftInfo = element(2, Left), + LeftLineDiff = element(1, LeftInfo), + LeftColumnDiff = element(2, LeftInfo), + NewLeftLineDiff = LeftLineDiff + EolLineDiff, + NewLeftColumnDiff = LeftColumnDiff + EolColumnDiff, + NewLeftInfo1 = setelement(1, LeftInfo, NewLeftLineDiff), + NewLeftInfo2 = setelement(2, NewLeftInfo1, NewLeftColumnDiff), + NewLeft = setelement(2, Left, NewLeftInfo2), + [NewLeft | T]; +add_token_with_eol(_Scope, Left, T) -> [Left | T]. previous_was_eol([{',', {_, _, Count}} | _]) when Count > 0 -> Count; previous_was_eol([{';', {_, _, Count}} | _]) when Count > 0 -> Count; @@ -1553,38 +1641,43 @@ keyword_or_unsafe_to_atom(_, Part, Line, Column, Scope) -> unsafe_to_atom(Part, Line, Column, Scope). tokenize_keyword(terminator, Rest, Line, Column, Atom, Length, Scope, Tokens) -> - case tokenize_keyword_terminator(Line, Column, Atom, Tokens) of - {ok, [Check | T]} -> - handle_terminator(Rest, Line, Column + Length, Scope, Check, T); + case tokenize_keyword_terminator(Line, Column, Atom, Scope, Tokens) of + {ok, {NewScope, [Check | T]}} -> + handle_terminator(Rest, Line, Column + Length, NewScope, Check, T); {error, Message, Token} -> error({?LOC(Line, Column), Message, Token}, Token ++ Rest, Scope, Tokens) end; tokenize_keyword(token, Rest, Line, Column, Atom, Length, Scope, Tokens) -> - Token = {Atom, {Line, Column, nil}}, - tokenize(Rest, Line, Column + Length, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {Atom, {Line1, Column1, nil}}, + tokenize(Rest, Line, Column + Length, Scope1, [Token | Tokens]); tokenize_keyword(block, Rest, Line, Column, Atom, Length, Scope, Tokens) -> - Token = {block_identifier, {Line, Column, nil}, Atom}, - tokenize(Rest, Line, Column + Length, Scope, [Token | Tokens]); + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + Token = {block_identifier, {Line1, Column1, nil}, Atom}, + tokenize(Rest, Line, Column + Length, Scope1, [Token | Tokens]); tokenize_keyword(Kind, Rest, Line, Column, Atom, Length, Scope, Tokens) -> - NewTokens = + {NewScope, NewTokens} = case strip_horizontal_space(Rest, 0) of {[$/ | _], _} -> - [{identifier, {Line, Column, nil}, Atom} | Tokens]; + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + {Scope1, [{identifier, {Line1, Column1, nil}, Atom} | Tokens]}; _ -> case {Kind, Tokens} of {in_op, [{unary_op, NotInfo, 'not'} | T]} -> - add_token_with_eol({in_op, NotInfo, 'not in'}, T); + % No need to call token_position here + {Scope, add_token_with_eol(Scope, {in_op, NotInfo, 'not in'}, T)}; {_, _} -> - add_token_with_eol({Kind, {Line, Column, previous_was_eol(Tokens)}, Atom}, Tokens) + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + {Scope1, add_token_with_eol(Scope1, {Kind, {Line1, Column1, previous_was_eol(Tokens)}, Atom}, Tokens)} end end, - tokenize(Rest, Line, Column + Length, Scope, NewTokens). + tokenize(Rest, Line, Column + Length, NewScope, NewTokens). tokenize_sigil([$~ | T], Line, Column, Scope, Tokens) -> case tokenize_sigil_name(T, [], Line, Column + 1, Scope, Tokens) of @@ -1670,9 +1763,10 @@ add_sigil_token(SigilName, Line, Column, NewLine, NewColumn, Parts, Rest, Scope, case MaybeEncoded of {ok, Atom} -> {Final, Modifiers} = collect_modifiers(Rest, []), - Token = {sigil, {Line, TokenColumn, nil}, Atom, Parts, Modifiers, Indentation, Delimiter}, + {{Line1, TokenColumn1}, Scope1} = token_position({Line, TokenColumn}, Scope), + Token = {sigil, {Line1, TokenColumn1, nil}, Atom, Parts, Modifiers, Indentation, Delimiter}, NewColumnWithModifiers = NewColumn + length(Modifiers), - tokenize(Final, NewLine, NewColumnWithModifiers, Scope, [Token | Tokens]); + tokenize(Final, NewLine, NewColumnWithModifiers, Scope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, Scope, Tokens) @@ -1680,18 +1774,22 @@ add_sigil_token(SigilName, Line, Column, NewLine, NewColumn, Parts, Rest, Scope, %% Fail early on invalid do syntax. For example, after %% most keywords, after comma and so on. -tokenize_keyword_terminator(DoLine, DoColumn, do, [{identifier, {Line, Column, Meta}, Atom} | T]) -> - {ok, add_token_with_eol({do, {DoLine, DoColumn, nil}}, - [{do_identifier, {Line, Column, Meta}, Atom} | T])}; -tokenize_keyword_terminator(_Line, _Column, do, [{'fn', _} | _]) -> +tokenize_keyword_terminator(DoLine, DoColumn, do, Scope, [{identifier, {Line, Column, Meta}, Atom} | T]) -> + {{DoLine1, DoColumn1}, Scope1} = token_position({DoLine, DoColumn}, Scope), + {ok, {Scope1, add_token_with_eol(Scope1, {do, {DoLine1, DoColumn1, nil}}, + [{do_identifier, {Line, Column, Meta}, Atom} | T])}}; +tokenize_keyword_terminator(_Line, _Column, do, _Scope, [{'fn', _} | _]) -> {error, invalid_do_with_fn_error("unexpected reserved word: "), "do"}; -tokenize_keyword_terminator(Line, Column, do, Tokens) -> +tokenize_keyword_terminator(Line, Column, do, Scope, Tokens) -> case is_valid_do(Tokens) of - true -> {ok, add_token_with_eol({do, {Line, Column, nil}}, Tokens)}; + true -> + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + {ok, {Scope1, add_token_with_eol(Scope1, {do, {Line1, Column1, nil}}, Tokens)}}; false -> {error, invalid_do_error("unexpected reserved word: "), "do"} end; -tokenize_keyword_terminator(Line, Column, Atom, Tokens) -> - {ok, [{Atom, {Line, Column, nil}} | Tokens]}. +tokenize_keyword_terminator(Line, Column, Atom, Scope, Tokens) -> + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + {ok, {Scope1, [{Atom, {Line1, Column1, nil}} | Tokens]}}. is_valid_do([{Atom, _} | _]) -> case Atom of @@ -1782,6 +1880,7 @@ error(Reason, Rest, #elixir_tokenizer{warnings=Warnings}, Tokens) -> add_cursor(_Line, Column, noprune, Terminators, Tokens) -> {Column, Terminators, Tokens}; add_cursor(Line, Column, prune_and_cursor, Terminators, Tokens) -> + % TODO handle relative positions in cursor and backtrack in prune PrePrunedTokens = prune_identifier(Tokens), PrunedTokens = prune_tokens(PrePrunedTokens, []), CursorTokens = [ @@ -1861,3 +1960,18 @@ prune_tokens([_ | Tokens], Opener) -> prune_tokens(Tokens, Opener); prune_tokens([], _Opener) -> []. + +to_absolute_tokens(RelTokens, {StartLine, StartColumn}) -> + to_absolute_tokens(RelTokens, {StartLine, StartColumn}, []). +to_absolute_tokens([], _CurrentAbs, Acc) -> + lists:reverse(Acc); +to_absolute_tokens([Token | Rest], {CurrLine, CurrCol}, Acc) -> + Info = element(2, Token), + RelLine = element(1, Info), + RelCol = element(2, Info), + NewLine = CurrLine + RelLine, + NewCol = CurrCol + RelCol, + NewInfo1 = setelement(1, Info, NewLine), + NewInfo2 = setelement(2, NewInfo1, NewCol), + NewToken = setelement(2, Token, NewInfo2), + to_absolute_tokens(Rest, {NewLine, NewCol}, [NewToken | Acc]). diff --git a/lib/elixir/test/erlang/tokenizer_test.erl b/lib/elixir/test/erlang/tokenizer_test.erl index b383a74b1c..4b0753f605 100644 --- a/lib/elixir/test/erlang/tokenizer_test.erl +++ b/lib/elixir/test/erlang/tokenizer_test.erl @@ -10,7 +10,12 @@ tokenize(String) -> tokenize(String, Opts) -> {ok, _Line, _Column, _Warnings, Result, []} = elixir_tokenizer:tokenize(String, 1, Opts), - lists:reverse(Result). + ReversedResult = lists:reverse(Result), + {ok, _Line1, _Column1, _Warnings1, ResultRelative, []} = elixir_tokenizer:tokenize(String, 1, [{mode, relative} | Opts]), + ReversedResultRelative = lists:reverse(ResultRelative), + Converted = elixir_tokenizer:to_absolute_tokens(ReversedResultRelative, {1, 1}), + ?assertEqual(ReversedResult, Converted), + ReversedResult. tokenize_error(String) -> {error, Error, _, _, _} = elixir_tokenizer:tokenize(String, 1, []), From 16c55f695333c89751334642325506d4c32658c0 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Sun, 9 Feb 2025 16:06:58 +0100 Subject: [PATCH 02/18] fix newline handling --- lib/elixir/src/elixir_tokenizer.erl | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index bffe4a2afa..f31920e677 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -668,12 +668,12 @@ tokenize("\\\r\n" ++ Rest, Line, _Column, Scope, Tokens) -> tokenize_eol(Rest, Line, Scope, Tokens); tokenize("\n" ++ Rest, Line, Column, Scope, Tokens) -> - {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), - tokenize_eol(Rest, Line, Scope1, eol(Line1, Column1, Tokens)); + {NewScope, NewTokens} = eol(Line, Column, Scope, Tokens), + tokenize_eol(Rest, Line, NewScope, NewTokens); tokenize("\r\n" ++ Rest, Line, Column, Scope, Tokens) -> - {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), - tokenize_eol(Rest, Line, Scope1, eol(Line1, Column1, Tokens)); + {NewScope, NewTokens} = eol(Line, Column, Scope, Tokens), + tokenize_eol(Rest, Line, NewScope, NewTokens); % Others @@ -1073,14 +1073,15 @@ handle_space_sensitive_tokens(String, Line, Column, Scope, Tokens) -> %% Helpers -eol(_Line, _Column, [{',', {Line, Column, Count}} | Tokens]) -> - [{',', {Line, Column, Count + 1}} | Tokens]; -eol(_Line, _Column, [{';', {Line, Column, Count}} | Tokens]) -> - [{';', {Line, Column, Count + 1}} | Tokens]; -eol(_Line, _Column, [{eol, {Line, Column, Count}} | Tokens]) -> - [{eol, {Line, Column, Count + 1}} | Tokens]; -eol(Line, Column, Tokens) -> - [{eol, {Line, Column, 1}} | Tokens]. +eol(_Line, _Column, Scope, [{',', {Line, Column, Count}} | Tokens]) -> + {Scope, [{',', {Line, Column, Count + 1}} | Tokens]}; +eol(_Line, _Column, Scope, [{';', {Line, Column, Count}} | Tokens]) -> + {Scope, [{';', {Line, Column, Count + 1}} | Tokens]}; +eol(_Line, _Column, Scope, [{eol, {Line, Column, Count}} | Tokens]) -> + {Scope, [{eol, {Line, Column, Count + 1}} | Tokens]}; +eol(Line, Column, Scope, Tokens) -> + {{Line1, Column1}, Scope1} = token_position({Line, Column}, Scope), + {Scope1, [{eol, {Line1, Column1, 1}} | Tokens]}. is_unnecessary_quote([Part], Scope) when is_list(Part) -> case (Scope#elixir_tokenizer.identifier_tokenizer):tokenize(Part) of From 2e7b64fb1852c5eed783b433bac91c5a6abf5ec6 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Sun, 9 Feb 2025 23:54:49 +0100 Subject: [PATCH 03/18] Make interpolation tests pass --- lib/elixir/src/elixir_interpolation.erl | 12 +++- lib/elixir/src/elixir_tokenizer.erl | 77 ++++++++++++++++++++----- lib/elixir/test/erlang/string_test.erl | 5 ++ 3 files changed, 78 insertions(+), 16 deletions(-) diff --git a/lib/elixir/src/elixir_interpolation.erl b/lib/elixir/src/elixir_interpolation.erl index 004fb96f1a..9c79322740 100644 --- a/lib/elixir/src/elixir_interpolation.erl +++ b/lib/elixir/src/elixir_interpolation.erl @@ -57,12 +57,20 @@ extract([$\\, $#, ${ | Rest], Buffer, Output, Line, Column, Scope, true, Last) - extract([$#, ${ | Rest], Buffer, Output, Line, Column, Scope, true, Last) -> Output1 = build_string(Buffer, Output), - case elixir_tokenizer:tokenize(Rest, Line, Column + 2, Scope#elixir_tokenizer{terminators=[]}) of + case elixir_tokenizer:tokenize(Rest, Line, Column + 2, Scope#elixir_tokenizer{terminators=[], prev_pos={Line, Column}}) of {error, {Location, _, "}"}, [$} | NewRest], Warnings, Tokens} -> NewScope = Scope#elixir_tokenizer{warnings=Warnings}, {line, EndLine} = lists:keyfind(line, 1, Location), {column, EndColumn} = lists:keyfind(column, 1, Location), - Output2 = build_interpol(Line, Column, EndLine, EndColumn, lists:reverse(Tokens), Output1), + + {Line1, Column1, EndLine1, EndColumn1} = case Scope of + #elixir_tokenizer{mode=relative, prev_pos={PrevLine, PrevColumn}} -> + {Line - PrevLine, Column - PrevColumn, EndLine - PrevLine, EndColumn - PrevColumn}; + _ -> + {Line, Column, EndLine, EndColumn} + end, + + Output2 = build_interpol(Line1, Column1, EndLine1, EndColumn1, lists:reverse(Tokens), Output1), extract(NewRest, [], Output2, EndLine, EndColumn + 1, NewScope, true, Last); {error, Reason, _, _, _} -> {error, Reason}; diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index f31920e677..199b7e7800 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -161,6 +161,7 @@ tokenize([], EndLine, EndColumn, #elixir_tokenizer{terminators=[{Start, {StartLi Hint = missing_terminator_hint(Start, End, Scope), Message = "missing terminator: ~ts", Formatted = io_lib:format(Message, [End]), + % TODO StartLine StartColumn should be converted to absolute Meta = [ {opening_delimiter, Start}, {expected_delimiter, End}, @@ -1473,6 +1474,7 @@ interpolation_error(Reason, Rest, Scope, Tokens, Extension, Args, Line, Column, error(interpolation_format(Reason, Extension, Args, Line, Column, Opening, Closing), Rest, Scope, Tokens). interpolation_format({string, EndLine, EndColumn, Message, Token}, Extension, Args, Line, Column, Opening, Closing) -> + % TODO check if lines and columns need to be converted to absolute here Meta = [ {opening_delimiter, list_to_atom(Opening)}, {expected_delimiter, list_to_atom(Closing)}, @@ -1481,6 +1483,7 @@ interpolation_format({string, EndLine, EndColumn, Message, Token}, Extension, Ar {end_line, EndLine}, {end_column, EndColumn} ], + io:format("interpolation_format meta ~p~n", [Meta]), {Meta, [Message, io_lib:format(Extension, Args)], Token}; interpolation_format({_, _, _} = Reason, _Extension, _Args, _Line, _Column, _Opening, _Closing) -> Reason. @@ -1517,10 +1520,12 @@ handle_terminator(Rest, Line, Column, Scope, Token, Tokens) -> check_terminator({Start, Meta}, Terminators, Scope) when Start == '('; Start == '['; Start == '{'; Start == '<<' -> Indentation = Scope#elixir_tokenizer.indentation, - {ok, Scope#elixir_tokenizer{terminators=[{Start, Meta, Indentation} | Terminators]}}; + PrevPos = Scope#elixir_tokenizer.prev_pos, + {ok, Scope#elixir_tokenizer{terminators=[{Start, Meta, Indentation, PrevPos} | Terminators]}}; check_terminator({Start, Meta}, Terminators, Scope) when Start == 'fn'; Start == 'do' -> Indentation = Scope#elixir_tokenizer.indentation, + PrevPos = Scope#elixir_tokenizer.prev_pos, NewScope = case Terminators of @@ -1532,9 +1537,9 @@ check_terminator({Start, Meta}, Terminators, Scope) when Start == 'fn'; Start == Scope end, - {ok, NewScope#elixir_tokenizer{terminators=[{Start, Meta, Indentation} | Terminators]}}; + {ok, NewScope#elixir_tokenizer{terminators=[{Start, Meta, Indentation, PrevPos} | Terminators]}}; -check_terminator({'end', {EndLine, _, _}}, [{'do', _, Indentation} | Terminators], Scope) -> +check_terminator({'end', {EndLine, _, _}}, [{'do', _, Indentation, _} | Terminators], Scope) -> NewScope = %% If the end is more indented than the do, it may be a missing do error! case Scope#elixir_tokenizer.indentation > Indentation of @@ -1548,18 +1553,26 @@ check_terminator({'end', {EndLine, _, _}}, [{'do', _, Indentation} | Terminators {ok, NewScope#elixir_tokenizer{terminators=Terminators}}; -check_terminator({End, {EndLine, EndColumn, _}}, [{Start, {StartLine, StartColumn, _}, _} | Terminators], Scope) +check_terminator({End, {EndLine, EndColumn, _}}, [{Start, {StartLine, StartColumn, _}, _, {StartPrevLine, StartPrevColumn}} | Terminators], Scope) when End == 'end'; End == ')'; End == ']'; End == '}'; End == '>>' -> case terminator(Start) of End -> {ok, Scope#elixir_tokenizer{terminators=Terminators}}; ExpectedEnd -> + {EndPrevLine, EndPrevColumn} = Scope#elixir_tokenizer.prev_pos, + {StartLine1, StartColumn1, EndLine1, EndColumn1} = case Scope#elixir_tokenizer.mode of + relative -> + {StartLine + StartPrevLine, StartColumn + StartPrevColumn, EndLine + EndPrevLine, EndColumn + EndPrevColumn}; + absolute -> + {StartLine, StartColumn, EndLine, EndColumn} + end, + Meta = [ - {line, StartLine}, - {column, StartColumn}, - {end_line, EndLine}, - {end_column, EndColumn}, + {line, StartLine1}, + {column, StartColumn1}, + {end_line, EndLine1}, + {end_column, EndColumn1}, {error_type, mismatched_delimiter}, {opening_delimiter, Start}, {closing_delimiter, End}, @@ -1568,7 +1581,7 @@ check_terminator({End, {EndLine, EndColumn, _}}, [{Start, {StartLine, StartColum {error, {Meta, unexpected_token_or_reserved(End), [atom_to_list(End)]}} end; -check_terminator({'end', {Line, Column, _}}, [], #elixir_tokenizer{mismatch_hints=Hints}) -> +check_terminator({'end', {Line, Column, _}}, [], #elixir_tokenizer{mismatch_hints=Hints} = Scope) -> Suffix = case lists:keyfind('end', 1, Hints) of {'end', HintLine, _Indentation} -> @@ -1577,12 +1590,23 @@ check_terminator({'end', {Line, Column, _}}, [], #elixir_tokenizer{mismatch_hint false -> "" end, + Loc = case Scope of + #elixir_tokenizer{mode=relative, prev_pos={PrevLine, PrevColumn}} -> + ?LOC(PrevLine, PrevColumn); + _ -> + ?LOC(Line, Column) + end, + {error, {Loc, {"unexpected reserved word: ", Suffix}, "end"}}; - {error, {?LOC(Line, Column), {"unexpected reserved word: ", Suffix}, "end"}}; - -check_terminator({End, {Line, Column, _}}, [], _Scope) +check_terminator({End, {Line, Column, _}}, [], Scope) when End == ')'; End == ']'; End == '}'; End == '>>' -> - {error, {?LOC(Line, Column), "unexpected token: ", atom_to_list(End)}}; + Loc = case Scope of + #elixir_tokenizer{mode=relative, prev_pos={PrevLine, PrevColumn}} -> + ?LOC(PrevLine, PrevColumn); + _ -> + ?LOC(Line, Column) + end, + {error, {Loc, "unexpected token: ", atom_to_list(End)}}; check_terminator(_, _, Scope) -> {ok, Scope}. @@ -1975,4 +1999,29 @@ to_absolute_tokens([Token | Rest], {CurrLine, CurrCol}, Acc) -> NewInfo1 = setelement(1, Info, NewLine), NewInfo2 = setelement(2, NewInfo1, NewCol), NewToken = setelement(2, Token, NewInfo2), - to_absolute_tokens(Rest, {NewLine, NewCol}, [NewToken | Acc]). + NewTokenWithSubtokens = case NewToken of + {Key, Meta, Unescaped} when key =:= atom_safe; Key =:= atom_unsafe; Key =:= kw_identifier_safe; Key =:= kw_identifier_unsafe; Key =:= bin_string; Key =:= list_string -> + NewUnescaped = to_absolute_interpolation(Unescaped, {NewLine, NewCol}), + {Key, Meta, NewUnescaped}; + {Key, Meta, Indentation, Unescaped} when Key =:= bin_heredoc; Key =:= list_heredoc -> + NewUnescaped = to_absolute_interpolation(Unescaped, {NewLine, NewCol}), + {Key, Meta, Indentation, NewUnescaped}; + {sigil, Meta, Atom, Parts, Modifiers, Indentation, Delimiter} -> + NewParts = to_absolute_interpolation(Parts, {NewLine, NewCol}), + {sigil, Meta, Atom, NewParts, Modifiers, Indentation, Delimiter}; + Other -> + Other + end, + to_absolute_tokens(Rest, {NewLine, NewCol}, [NewTokenWithSubtokens | Acc]). + +to_absolute_interpolation(Parts, CurrentAbs) -> + to_absolute_interpolation(Parts, CurrentAbs, []). +to_absolute_interpolation([], _CurrentAbs, Acc) -> + lists:reverse(Acc); +to_absolute_interpolation([Binary | Rest], CurrentAbs, Acc) when is_binary(Binary); is_list(Binary) -> + to_absolute_interpolation(Rest, CurrentAbs, [Binary | Acc]); +to_absolute_interpolation([{{BeginLine, BeginColumn, nil}, {EndLine, EndColumn, nil}, Tokens} | Rest], {CurrentLine, CurrentCol}, Acc) -> + NewBegin = {BeginLine + CurrentLine, BeginColumn + CurrentCol, nil}, + NewEnd = {EndLine + CurrentLine, EndColumn + CurrentCol, nil}, + NewTokens = to_absolute_tokens(Tokens, {BeginLine + CurrentLine, BeginColumn + CurrentCol}), + to_absolute_interpolation(Rest, {CurrentLine, CurrentCol}, [{NewBegin, NewEnd, NewTokens} | Acc]). diff --git a/lib/elixir/test/erlang/string_test.erl b/lib/elixir/test/erlang/string_test.erl index f74a0fae2d..c220bd0cd4 100644 --- a/lib/elixir/test/erlang/string_test.erl +++ b/lib/elixir/test/erlang/string_test.erl @@ -14,8 +14,13 @@ eval(Content) -> extract_interpolations(String) -> case elixir_interpolation:extract(1, 1, #elixir_tokenizer{}, true, String ++ [$"], $") of {error, Error} -> + {error, RelError} = elixir_interpolation:extract(1, 1, #elixir_tokenizer{mode=relative, prev_pos={1, 1}}, true, String ++ [$"], $"), + ?assertEqual(Error, RelError), Error; {_, _, Parts, _, _} -> + {_, _, PartsRel, _, _} = elixir_interpolation:extract(1, 1, #elixir_tokenizer{mode=relative, prev_pos={1, 1}}, true, String ++ [$"], $"), + [{bin_string, {1, 1, nil}, PartsRelConverted}] = elixir_tokenizer:to_absolute_tokens([{bin_string, {0, 0, nil}, PartsRel}], {1, 1}), + ?assertEqual(Parts, PartsRelConverted), Parts end. From 7da4f3bf7eec441e0ee5ff08f5a74b9f112b4da2 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Mon, 10 Feb 2025 11:01:47 +0100 Subject: [PATCH 04/18] Correctly handle offsets in interpolation --- lib/elixir/src/elixir_interpolation.erl | 2 +- lib/elixir/src/elixir_tokenizer.erl | 36 ++++++++++++------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/lib/elixir/src/elixir_interpolation.erl b/lib/elixir/src/elixir_interpolation.erl index 9c79322740..8afee50831 100644 --- a/lib/elixir/src/elixir_interpolation.erl +++ b/lib/elixir/src/elixir_interpolation.erl @@ -57,7 +57,7 @@ extract([$\\, $#, ${ | Rest], Buffer, Output, Line, Column, Scope, true, Last) - extract([$#, ${ | Rest], Buffer, Output, Line, Column, Scope, true, Last) -> Output1 = build_string(Buffer, Output), - case elixir_tokenizer:tokenize(Rest, Line, Column + 2, Scope#elixir_tokenizer{terminators=[], prev_pos={Line, Column}}) of + case elixir_tokenizer:tokenize(Rest, Line, Column + 2, Scope#elixir_tokenizer{terminators=[]}) of {error, {Location, _, "}"}, [$} | NewRest], Warnings, Tokens} -> NewScope = Scope#elixir_tokenizer{warnings=Warnings}, {line, EndLine} = lists:keyfind(line, 1, Location), diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 199b7e7800..e2984d74bc 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -536,7 +536,7 @@ tokenize([$:, H | T] = Original, Line, Column, BaseScope, Tokens) when ?is_quote BaseScope end, - case elixir_interpolation:extract(Line, Column + 2, Scope, true, T, H) of + case elixir_interpolation:extract(Line, Column + 2, Scope#elixir_tokenizer{prev_pos={Line, Column}}, true, T, H) of {NewLine, NewColumn, Parts, Rest, InterScope} -> NewScope = case is_unnecessary_quote(Parts, InterScope) of true -> @@ -557,7 +557,7 @@ tokenize([$:, H | T] = Original, Line, Column, BaseScope, Tokens) when ?is_quote {ok, [Part]} when is_binary(Part) -> case unsafe_to_atom(Part, Line, Column, Scope) of {ok, Atom} -> - {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope), + {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}), Token = {atom_quoted, {Line1, Column1, H}, Atom}, tokenize(Rest, NewLine, NewColumn, NewScope1, [Token | Tokens]); @@ -570,12 +570,12 @@ tokenize([$:, H | T] = Original, Line, Column, BaseScope, Tokens) when ?is_quote true -> atom_safe; false -> atom_unsafe end, - {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope), + {{Line1, Column1}, NewScope1} = token_position({Line, Column}, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}), Token = {Key, {Line1, Column1, H}, Unescaped}, tokenize(Rest, NewLine, NewColumn, NewScope1, [Token | Tokens]); {error, Reason} -> - error(Reason, Rest, NewScope, Tokens) + error(Reason, Rest, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}, Tokens) end; {error, Reason} -> @@ -833,7 +833,7 @@ handle_heredocs(T, Line, Column, H, Scope, Tokens) -> end. handle_strings(T, Line, Column, H, Scope, Tokens) -> - case elixir_interpolation:extract(Line, Column, Scope, true, T, H) of + case elixir_interpolation:extract(Line, Column, Scope#elixir_tokenizer{prev_pos={Line, Column - 1}}, true, T, H) of {error, Reason} -> interpolation_error(Reason, [H | T], Scope, Tokens, " (for string starting at line ~B)", [Line], Line, Column-1, [H], [H]); @@ -874,12 +874,12 @@ handle_strings(T, Line, Column, H, Scope, Tokens) -> true -> kw_identifier_safe; false -> kw_identifier_unsafe end, - {{Line1, Column1}, NewScope1} = token_position({Line, Column - 1}, NewScope), + {{Line1, Column1}, NewScope1} = token_position({Line, Column - 1}, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}), Token = {Key, {Line1, Column1, H}, Unescaped}, tokenize(Rest, NewLine, NewColumn + 1, NewScope1, [Token | Tokens]); {error, Reason} -> - error(Reason, Rest, NewScope, Tokens) + error(Reason, Rest, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}, Tokens) end; {NewLine, NewColumn, Parts, Rest, InterScope} -> @@ -898,12 +898,12 @@ handle_strings(T, Line, Column, H, Scope, Tokens) -> case unescape_tokens(Parts, Line, Column, NewScope) of {ok, Unescaped} -> - {{Line1, Column1}, NewScope1} = token_position({Line, Column - 1}, NewScope), + {{Line1, Column1}, NewScope1} = token_position({Line, Column - 1}, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}), Token = {string_type(H), {Line1, Column1, nil}, Unescaped}, tokenize(Rest, NewLine, NewColumn, NewScope1, [Token | Tokens]); {error, Reason} -> - error(Reason, Rest, NewScope, Tokens) + error(Reason, Rest, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}, Tokens) end end. @@ -995,7 +995,7 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, BaseScope, Tokens) whe BaseScope end, - case elixir_interpolation:extract(Line, Column + 1, Scope, true, T, H) of + case elixir_interpolation:extract(Line, Column + 1, Scope#elixir_tokenizer{prev_pos={Line, Column}}, true, T, H) of {NewLine, NewColumn, [Part], Rest, InterScope} when is_list(Part) -> NewScope = case is_unnecessary_quote([Part], InterScope) of true -> @@ -1016,7 +1016,7 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, BaseScope, Tokens) whe case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of {ok, Atom} -> {DotLine, DotColumn, DotOther} = DotInfo, - {{DotLine1, DotColumn1}, NewScope1} = token_position({DotLine, DotColumn}, NewScope), + {{DotLine1, DotColumn1}, NewScope1} = token_position({DotLine, DotColumn}, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}), TokensSoFar = add_token_with_eol(NewScope1, {'.', {DotLine1, DotColumn1, DotOther}}, Tokens), {{Line1, Column1}, NewScope2} = token_position({Line, Column}, NewScope1), @@ -1025,11 +1025,11 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, BaseScope, Tokens) whe tokenize(Rest, NewLine, NewColumn, NewScope2, [Token | TokensSoFar]); {error, Reason} -> - error(Reason, Original, NewScope, Tokens) + error(Reason, Original, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}, Tokens) end; {_NewLine, _NewColumn, _Parts, Rest, NewScope} -> Message = "interpolation is not allowed when calling function/macro. Found interpolation in a call starting with: ", - error({?LOC(Line, Column), Message, [H]}, Rest, NewScope, Tokens); + error({?LOC(Line, Column), Message, [H]}, Rest, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}, Tokens); {error, Reason} -> interpolation_error(Reason, Original, Scope, Tokens, " (for function name starting at line ~B)", [Line], Line, Column, [H], [H]) end; @@ -1136,13 +1136,13 @@ extract_heredoc_with_interpolation(Line, Column, Scope, Interpol, T, H) -> %% We prepend a new line so we can transparently remove %% spaces later. This new line is removed by calling "tl" %% in the final heredoc body three lines below. - case elixir_interpolation:extract(Line, Column, Scope, Interpol, [$\n|Headerless], [H,H,H]) of + case elixir_interpolation:extract(Line, Column, Scope#elixir_tokenizer{prev_pos={Line, Column}}, Interpol, [$\n|Headerless], [H,H,H]) of {NewLine, NewColumn, Parts0, Rest, InterScope} -> Indent = NewColumn - 4, Fun = fun(Part, Acc) -> extract_heredoc_indent(Part, Acc, Indent) end, {Parts1, {ShouldWarn, _}} = lists:mapfoldl(Fun, {false, Line}, Parts0), Parts2 = extract_heredoc_head(Parts1), - NewScope = maybe_heredoc_warn(ShouldWarn, Column, InterScope, H), + NewScope = maybe_heredoc_warn(ShouldWarn, Column, InterScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}, H), {ok, NewLine, NewColumn, tokens_to_binary(Parts2), Rest, NewScope}; {error, Reason} -> @@ -1755,10 +1755,10 @@ tokenize_sigil_contents([H, H, H | T] = Original, [S | _] = SigilName, Line, Col tokenize_sigil_contents([H | T] = Original, [S | _] = SigilName, Line, Column, Scope, Tokens) when ?is_sigil(H) -> - case elixir_interpolation:extract(Line, Column + 1, Scope, ?is_downcase(S), T, sigil_terminator(H)) of + case elixir_interpolation:extract(Line, Column + 1, Scope#elixir_tokenizer{prev_pos={Line, Column}}, ?is_downcase(S), T, sigil_terminator(H)) of {NewLine, NewColumn, Parts, Rest, NewScope} -> Indentation = nil, - add_sigil_token(SigilName, Line, Column, NewLine, NewColumn, tokens_to_binary(Parts), Rest, NewScope, Tokens, Indentation, <>); + add_sigil_token(SigilName, Line, Column, NewLine, NewColumn, tokens_to_binary(Parts), Rest, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}, Tokens, Indentation, <>); {error, Reason} -> Sigil = [$~, S, H], @@ -2023,5 +2023,5 @@ to_absolute_interpolation([Binary | Rest], CurrentAbs, Acc) when is_binary(Binar to_absolute_interpolation([{{BeginLine, BeginColumn, nil}, {EndLine, EndColumn, nil}, Tokens} | Rest], {CurrentLine, CurrentCol}, Acc) -> NewBegin = {BeginLine + CurrentLine, BeginColumn + CurrentCol, nil}, NewEnd = {EndLine + CurrentLine, EndColumn + CurrentCol, nil}, - NewTokens = to_absolute_tokens(Tokens, {BeginLine + CurrentLine, BeginColumn + CurrentCol}), + NewTokens = to_absolute_tokens(Tokens, {CurrentLine, CurrentCol}), to_absolute_interpolation(Rest, {CurrentLine, CurrentCol}, [{NewBegin, NewEnd, NewTokens} | Acc]). From 617e1bafcd0321edf053423b5bbf9c600afb93d0 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Mon, 10 Feb 2025 13:59:54 +0100 Subject: [PATCH 05/18] Fix sigil positions --- lib/elixir/src/elixir_tokenizer.erl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index e2984d74bc..0231bac023 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -816,7 +816,7 @@ handle_char(_) -> false. %% Handlers handle_heredocs(T, Line, Column, H, Scope, Tokens) -> - case extract_heredoc_with_interpolation(Line, Column, Scope, true, T, H) of + case extract_heredoc_with_interpolation(Line, Column, Column, Scope, true, T, H) of {ok, NewLine, NewColumn, Parts, Rest, NewScope} -> case unescape_tokens(Parts, Line, Column, NewScope) of {ok, Unescaped} -> @@ -1130,13 +1130,13 @@ collect_modifiers(Rest, Buffer) -> %% Heredocs -extract_heredoc_with_interpolation(Line, Column, Scope, Interpol, T, H) -> +extract_heredoc_with_interpolation(Line, Column, TokenColumn, Scope, Interpol, T, H) -> case extract_heredoc_header(T) of {ok, Headerless} -> %% We prepend a new line so we can transparently remove %% spaces later. This new line is removed by calling "tl" %% in the final heredoc body three lines below. - case elixir_interpolation:extract(Line, Column, Scope#elixir_tokenizer{prev_pos={Line, Column}}, Interpol, [$\n|Headerless], [H,H,H]) of + case elixir_interpolation:extract(Line, Column, Scope#elixir_tokenizer{prev_pos={Line, TokenColumn}}, Interpol, [$\n|Headerless], [H,H,H]) of {NewLine, NewColumn, Parts0, Rest, InterScope} -> Indent = NewColumn - 4, Fun = fun(Part, Acc) -> extract_heredoc_indent(Part, Acc, Indent) end, @@ -1744,7 +1744,7 @@ sigil_name_error() -> tokenize_sigil_contents([H, H, H | T] = Original, [S | _] = SigilName, Line, Column, Scope, Tokens) when ?is_quote(H) -> - case extract_heredoc_with_interpolation(Line, Column, Scope, ?is_downcase(S), T, H) of + case extract_heredoc_with_interpolation(Line, Column, Column - 1 - length(SigilName), Scope, ?is_downcase(S), T, H) of {ok, NewLine, NewColumn, Parts, Rest, NewScope} -> Indentation = NewColumn - 4, add_sigil_token(SigilName, Line, Column, NewLine, NewColumn, Parts, Rest, NewScope, Tokens, Indentation, <>); @@ -1755,7 +1755,7 @@ tokenize_sigil_contents([H, H, H | T] = Original, [S | _] = SigilName, Line, Col tokenize_sigil_contents([H | T] = Original, [S | _] = SigilName, Line, Column, Scope, Tokens) when ?is_sigil(H) -> - case elixir_interpolation:extract(Line, Column + 1, Scope#elixir_tokenizer{prev_pos={Line, Column}}, ?is_downcase(S), T, sigil_terminator(H)) of + case elixir_interpolation:extract(Line, Column + 1, Scope#elixir_tokenizer{prev_pos={Line, Column - 1 - length(SigilName)}}, ?is_downcase(S), T, sigil_terminator(H)) of {NewLine, NewColumn, Parts, Rest, NewScope} -> Indentation = nil, add_sigil_token(SigilName, Line, Column, NewLine, NewColumn, tokens_to_binary(Parts), Rest, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}, Tokens, Indentation, <>); From 6c19bd0e8611531129de21b784f537804ea70c6f Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Mon, 10 Feb 2025 14:53:37 +0100 Subject: [PATCH 06/18] Fix kw_identifier --- lib/elixir/src/elixir_tokenizer.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 0231bac023..6179c8f943 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -862,11 +862,11 @@ handle_strings(T, Line, Column, H, Scope, Tokens) -> {ok, [Part]} when is_binary(Part) -> case unsafe_to_atom(Part, Line, Column - 1, Scope) of {ok, Atom} -> - {{Line1, Column1}, NewScope1} = token_position({Line, Column - 1}, NewScope), + {{Line1, Column1}, NewScope1} = token_position({Line, Column - 1}, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}), Token = {kw_identifier, {Line1, Column1, H}, Atom}, tokenize(Rest, NewLine, NewColumn + 1, NewScope1, [Token | Tokens]); {error, Reason} -> - error(Reason, Rest, NewScope, Tokens) + error(Reason, Rest, NewScope#elixir_tokenizer{prev_pos=Scope#elixir_tokenizer.prev_pos}, Tokens) end; {ok, Unescaped} -> From 41829785e82098be8a8f5744de0b408ab6576719 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Mon, 10 Feb 2025 16:21:45 +0100 Subject: [PATCH 07/18] Fix match on terminator --- lib/elixir/src/elixir_tokenizer.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 6179c8f943..b8d757db82 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -156,7 +156,7 @@ tokenize([], Line, Column, #elixir_tokenizer{cursor_completion=Cursor} = Scope, AllWarnings = maybe_unicode_lint_warnings(Ascii, Tokens, Warnings), {ok, Line, CursorColumn, AllWarnings, AccTokens, AccTerminators}; -tokenize([], EndLine, EndColumn, #elixir_tokenizer{terminators=[{Start, {StartLine, StartColumn, _}, _} | _]} = Scope, Tokens) -> +tokenize([], EndLine, EndColumn, #elixir_tokenizer{terminators=[{Start, {StartLine, StartColumn, _}, _, _} | _]} = Scope, Tokens) -> End = terminator(Start), Hint = missing_terminator_hint(Start, End, Scope), Message = "missing terminator: ~ts", From caf1ae92072bd4eea1e70513e8ee5e5e7e0a8fdc Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Mon, 10 Feb 2025 17:20:41 +0100 Subject: [PATCH 08/18] Fix variable name --- lib/elixir/src/elixir_tokenizer.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index b8d757db82..a09b428521 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -2000,7 +2000,7 @@ to_absolute_tokens([Token | Rest], {CurrLine, CurrCol}, Acc) -> NewInfo2 = setelement(2, NewInfo1, NewCol), NewToken = setelement(2, Token, NewInfo2), NewTokenWithSubtokens = case NewToken of - {Key, Meta, Unescaped} when key =:= atom_safe; Key =:= atom_unsafe; Key =:= kw_identifier_safe; Key =:= kw_identifier_unsafe; Key =:= bin_string; Key =:= list_string -> + {Key, Meta, Unescaped} when Key =:= atom_safe; Key =:= atom_unsafe; Key =:= kw_identifier_safe; Key =:= kw_identifier_unsafe; Key =:= bin_string; Key =:= list_string -> NewUnescaped = to_absolute_interpolation(Unescaped, {NewLine, NewCol}), {Key, Meta, NewUnescaped}; {Key, Meta, Indentation, Unescaped} when Key =:= bin_heredoc; Key =:= list_heredoc -> From d06715daf34a57232c8c44e4ac7a792fd967a890 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Mon, 10 Feb 2025 23:10:21 +0100 Subject: [PATCH 09/18] Temp fix for crashes --- lib/elixir/lib/code/fragment.ex | 3 ++- lib/elixir/src/elixir_interpolation.erl | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/elixir/lib/code/fragment.ex b/lib/elixir/lib/code/fragment.ex index 567f31348c..352ba964cf 100644 --- a/lib/elixir/lib/code/fragment.ex +++ b/lib/elixir/lib/code/fragment.ex @@ -1195,7 +1195,8 @@ defmodule Code.Fragment do defp reverse_tokens(line, column, tokens, terminators) do {terminators, _} = - Enum.map_reduce(terminators, column, fn {start, _, _}, column -> + Enum.map_reduce(terminators, column, fn {start, _, _, _}, column -> + # TODO handle relative positions atom = :elixir_tokenizer.terminator(start) {{atom, {line, column, nil}}, column + length(Atom.to_charlist(atom))} diff --git a/lib/elixir/src/elixir_interpolation.erl b/lib/elixir/src/elixir_interpolation.erl index 8afee50831..5aaaa7c1e2 100644 --- a/lib/elixir/src/elixir_interpolation.erl +++ b/lib/elixir/src/elixir_interpolation.erl @@ -133,7 +133,7 @@ strip_horizontal_space(T, Buffer, Counter) -> cursor_complete(Line, Column, Terminators) -> % TODO handle relative position in inserted cursor lists:mapfoldl( - fun({Start, _, _}, AccColumn) -> + fun({Start, _, _, _StartPos}, AccColumn) -> End = elixir_tokenizer:terminator(Start), {{End, {Line, AccColumn, nil}}, AccColumn + length(erlang:atom_to_list(End))} end, From 0c8e39b14bb40aaafd661183729bb86d2f506757 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Mon, 10 Feb 2025 23:10:52 +0100 Subject: [PATCH 10/18] Correct error meta --- lib/elixir/src/elixir_tokenizer.erl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index a09b428521..c44f30b8ac 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -156,17 +156,24 @@ tokenize([], Line, Column, #elixir_tokenizer{cursor_completion=Cursor} = Scope, AllWarnings = maybe_unicode_lint_warnings(Ascii, Tokens, Warnings), {ok, Line, CursorColumn, AllWarnings, AccTokens, AccTerminators}; -tokenize([], EndLine, EndColumn, #elixir_tokenizer{terminators=[{Start, {StartLine, StartColumn, _}, _, _} | _]} = Scope, Tokens) -> +tokenize([], EndLine, EndColumn, #elixir_tokenizer{terminators=[{Start, {StartLine, StartColumn, _}, _, {StartPrevLine, StartPrevColumn}} | _]} = Scope, Tokens) -> End = terminator(Start), Hint = missing_terminator_hint(Start, End, Scope), Message = "missing terminator: ~ts", Formatted = io_lib:format(Message, [End]), - % TODO StartLine StartColumn should be converted to absolute + + {StartLine1, StartColumn1} = case Scope#elixir_tokenizer.mode of + relative -> + {StartPrevLine, StartPrevColumn}; + absolute -> + {StartLine, StartColumn} + end, + Meta = [ {opening_delimiter, Start}, {expected_delimiter, End}, - {line, StartLine}, - {column, StartColumn}, + {line, StartLine1}, + {column, StartColumn1}, {end_line, EndLine}, {end_column, EndColumn} ], From cacf7f14a7e482e0c392d654011e869d9de8188b Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Mon, 10 Feb 2025 23:55:14 +0100 Subject: [PATCH 11/18] Fix position in errors --- lib/elixir/src/elixir_tokenizer.erl | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index c44f30b8ac..2fc6dc45c2 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -1481,7 +1481,6 @@ interpolation_error(Reason, Rest, Scope, Tokens, Extension, Args, Line, Column, error(interpolation_format(Reason, Extension, Args, Line, Column, Opening, Closing), Rest, Scope, Tokens). interpolation_format({string, EndLine, EndColumn, Message, Token}, Extension, Args, Line, Column, Opening, Closing) -> - % TODO check if lines and columns need to be converted to absolute here Meta = [ {opening_delimiter, list_to_atom(Opening)}, {expected_delimiter, list_to_atom(Closing)}, @@ -1490,7 +1489,6 @@ interpolation_format({string, EndLine, EndColumn, Message, Token}, Extension, Ar {end_line, EndLine}, {end_column, EndColumn} ], - io:format("interpolation_format meta ~p~n", [Meta]), {Meta, [Message, io_lib:format(Extension, Args)], Token}; interpolation_format({_, _, _} = Reason, _Extension, _Args, _Line, _Column, _Opening, _Closing) -> Reason. @@ -1510,8 +1508,13 @@ handle_terminator(Rest, _, _, Scope, {'(', {Line, Column, _}}, [{alias, _, Alias "Unexpected token: ", [Alias] ), - - error({?LOC(Line, Column), Reason, ["("]}, atom_to_list(Alias) ++ [$( | Rest], Scope, Tokens); + Loc = case Scope of + #elixir_tokenizer{mode=relative, prev_pos={PrevLine, PrevColumn}} -> + ?LOC(PrevLine, PrevColumn); + _ -> + ?LOC(Line, Column) + end, + error({Loc, Reason, ["("]}, atom_to_list(Alias) ++ [$( | Rest], Scope, Tokens); handle_terminator(Rest, Line, Column, #elixir_tokenizer{terminators=none} = Scope, Token, Tokens) -> tokenize(Rest, Line, Column, Scope, [Token | Tokens]); handle_terminator(Rest, Line, Column, Scope, Token, Tokens) -> @@ -1537,7 +1540,7 @@ check_terminator({Start, Meta}, Terminators, Scope) when Start == 'fn'; Start == NewScope = case Terminators of %% If the do is indented equally or less than the previous do, it may be a missing end error! - [{Start, _, PreviousIndentation} = Previous | _] when Indentation =< PreviousIndentation -> + [{Start, _, PreviousIndentation, _} = Previous | _] when Indentation =< PreviousIndentation -> Scope#elixir_tokenizer{mismatch_hints=[Previous | Scope#elixir_tokenizer.mismatch_hints]}; _ -> @@ -1551,7 +1554,13 @@ check_terminator({'end', {EndLine, _, _}}, [{'do', _, Indentation, _} | Terminat %% If the end is more indented than the do, it may be a missing do error! case Scope#elixir_tokenizer.indentation > Indentation of true -> - Hint = {'end', EndLine, Scope#elixir_tokenizer.indentation}, + EndLine1 = case Scope of + #elixir_tokenizer{mode=relative, prev_pos={PrevLine, _PrevColumn}} -> + PrevLine; + _ -> + EndLine + end, + Hint = {'end', EndLine1, Scope#elixir_tokenizer.indentation}, Scope#elixir_tokenizer{mismatch_hints=[Hint | Scope#elixir_tokenizer.mismatch_hints]}; false -> @@ -1570,7 +1579,7 @@ check_terminator({End, {EndLine, EndColumn, _}}, [{Start, {StartLine, StartColum {EndPrevLine, EndPrevColumn} = Scope#elixir_tokenizer.prev_pos, {StartLine1, StartColumn1, EndLine1, EndColumn1} = case Scope#elixir_tokenizer.mode of relative -> - {StartLine + StartPrevLine, StartColumn + StartPrevColumn, EndLine + EndPrevLine, EndColumn + EndPrevColumn}; + {StartPrevLine, StartPrevColumn, EndPrevLine, EndPrevColumn}; absolute -> {StartLine, StartColumn, EndLine, EndColumn} end, From c28c98c84d94e86353d76414ea2b6eedbc86c51c Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Thu, 13 Feb 2025 00:16:28 +0100 Subject: [PATCH 12/18] handle cursor --- lib/elixir/src/elixir_tokenizer.erl | 154 +++++++++++++++------------- 1 file changed, 84 insertions(+), 70 deletions(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 2fc6dc45c2..3e4be1ceb0 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -151,7 +151,7 @@ tokenize([], Line, Column, #elixir_tokenizer{cursor_completion=Cursor} = Scope, #elixir_tokenizer{ascii_identifiers_only=Ascii, terminators=Terminators, warnings=Warnings} = Scope, {CursorColumn, AccTerminators, AccTokens} = - add_cursor(Line, Column, Cursor, Terminators, Tokens), + add_cursor(Line, Column, Cursor, Scope, Terminators, Tokens), AllWarnings = maybe_unicode_lint_warnings(Ascii, Tokens, Warnings), {ok, Line, CursorColumn, AllWarnings, AccTokens, AccTerminators}; @@ -1918,89 +1918,103 @@ error(Reason, Rest, #elixir_tokenizer{warnings=Warnings}, Tokens) -> %% Cursor handling -add_cursor(_Line, Column, noprune, Terminators, Tokens) -> +add_cursor(_Line, Column, noprune, _Scope, Terminators, Tokens) -> {Column, Terminators, Tokens}; -add_cursor(Line, Column, prune_and_cursor, Terminators, Tokens) -> - % TODO handle relative positions in cursor and backtrack in prune - PrePrunedTokens = prune_identifier(Tokens), - PrunedTokens = prune_tokens(PrePrunedTokens, []), - CursorTokens = [ - {')', {Line, Column + 11, nil}}, - {'(', {Line, Column + 10, nil}}, - {paren_identifier, {Line, Column, nil}, '__cursor__'} - | PrunedTokens - ], +add_cursor(Line, Column, prune_and_cursor, Scope, Terminators, Tokens) -> + {Scope1, PrePrunedTokens} = prune_identifier(Scope, Tokens), + {Scope2, PrunedTokens} = prune_tokens(PrePrunedTokens, [], Scope1), + CursorTokens = case Scope2 of + #elixir_tokenizer{mode=relative, prev_pos={PrevLine, PrevColumn}} -> [ + {')', {0, 1, nil}}, + {'(', {0, 10, nil}}, + {paren_identifier, {Line - PrevLine, Column - PrevColumn, nil}, '__cursor__'} + | PrunedTokens + ]; + _ -> [ + {')', {Line, Column + 11, nil}}, + {'(', {Line, Column + 10, nil}}, + {paren_identifier, {Line, Column, nil}, '__cursor__'} + | PrunedTokens + ] + end, {Column + 12, Terminators, CursorTokens}. -prune_identifier([{identifier, _, _} | Tokens]) -> Tokens; -prune_identifier(Tokens) -> Tokens. +revert_prev_pos(Token, #elixir_tokenizer{prev_pos={PrevLine, PrevColumn, mode=relative}} = Scope) -> + Info = element(2, Token), + Line = element(1, Info), + Column = element(2, Info), + Scope#elixir_tokenizer{prev_pos={PrevLine - Line, PrevColumn - Column}}; +revert_prev_pos(_Token, Scope) -> Scope. + +prune_identifier(Scope, [{identifier, _, _} = Token | Tokens]) -> {revert_prev_pos(Token, Scope), Tokens}; +prune_identifier(Scope, Tokens) -> {Scope, Tokens}. %%% Any terminator needs to be closed -prune_tokens([{'end', _} | Tokens], Opener) -> - prune_tokens(Tokens, ['end' | Opener]); -prune_tokens([{')', _} | Tokens], Opener) -> - prune_tokens(Tokens, [')' | Opener]); -prune_tokens([{']', _} | Tokens], Opener) -> - prune_tokens(Tokens, [']' | Opener]); -prune_tokens([{'}', _} | Tokens], Opener) -> - prune_tokens(Tokens, ['}' | Opener]); -prune_tokens([{'>>', _} | Tokens], Opener) -> - prune_tokens(Tokens, ['>>' | Opener]); +prune_tokens([{'end', _} = Token | Tokens], Opener, Scope) -> + prune_tokens(Tokens, ['end' | Opener], revert_prev_pos(Token, Scope)); +prune_tokens([{')', _} = Token | Tokens], Opener, Scope) -> + prune_tokens(Tokens, [')' | Opener], revert_prev_pos(Token, Scope)); +prune_tokens([{']', _} = Token | Tokens], Opener, Scope) -> + prune_tokens(Tokens, [']' | Opener], revert_prev_pos(Token, Scope)); +prune_tokens([{'}', _} = Token | Tokens], Opener, Scope) -> + prune_tokens(Tokens, ['}' | Opener], revert_prev_pos(Token, Scope)); +prune_tokens([{'>>', _} = Token | Tokens], Opener, Scope) -> + prune_tokens(Tokens, ['>>' | Opener], revert_prev_pos(Token, Scope)); %%% Close opened terminators -prune_tokens([{'fn', _} | Tokens], ['end' | Opener]) -> - prune_tokens(Tokens, Opener); -prune_tokens([{'do', _} | Tokens], ['end' | Opener]) -> - prune_tokens(Tokens, Opener); -prune_tokens([{'(', _} | Tokens], [')' | Opener]) -> - prune_tokens(Tokens, Opener); -prune_tokens([{'[', _} | Tokens], [']' | Opener]) -> - prune_tokens(Tokens, Opener); -prune_tokens([{'{', _} | Tokens], ['}' | Opener]) -> - prune_tokens(Tokens, Opener); -prune_tokens([{'<<', _} | Tokens], ['>>' | Opener]) -> - prune_tokens(Tokens, Opener); +prune_tokens([{'fn', _} = Token | Tokens], ['end' | Opener], Scope) -> + prune_tokens(Tokens, Opener, revert_prev_pos(Token, Scope)); +prune_tokens([{'do', _} = Token | Tokens], ['end' | Opener], Scope) -> + prune_tokens(Tokens, Opener, revert_prev_pos(Token, Scope)); +prune_tokens([{'(', _} = Token | Tokens], [')' | Opener], Scope) -> + prune_tokens(Tokens, Opener, revert_prev_pos(Token, Scope)); +prune_tokens([{'[', _} = Token | Tokens], [']' | Opener], Scope) -> + prune_tokens(Tokens, Opener, revert_prev_pos(Token, Scope)); +prune_tokens([{'{', _} = Token | Tokens], ['}' | Opener], Scope) -> + prune_tokens(Tokens, Opener, revert_prev_pos(Token, Scope)); +prune_tokens([{'<<', _} = Token | Tokens], ['>>' | Opener], Scope) -> + prune_tokens(Tokens, Opener, revert_prev_pos(Token, Scope)); %%% or it is time to stop... -prune_tokens([{';', _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{'eol', _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{',', _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{'fn', _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{'do', _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{'(', _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{'[', _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{'{', _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{'<<', _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{identifier, _, _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{block_identifier, _, _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{kw_identifier, _, _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{kw_identifier_safe, _, _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{kw_identifier_unsafe, _, _} | _] = Tokens, []) -> - Tokens; -prune_tokens([{OpType, _, _} | _] = Tokens, []) +prune_tokens([{';', _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{'eol', _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{',', _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{'fn', _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{'do', _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{'(', _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{'[', _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{'{', _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{'<<', _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{identifier, _, _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{block_identifier, _, _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{kw_identifier, _, _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{kw_identifier_safe, _, _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{kw_identifier_unsafe, _, _} | _] = Tokens, [], Scope) -> + {Scope, Tokens}; +prune_tokens([{OpType, _, _} | _] = Tokens, [], Scope) when OpType =:= comp_op; OpType =:= at_op; OpType =:= unary_op; OpType =:= and_op; OpType =:= or_op; OpType =:= arrow_op; OpType =:= match_op; OpType =:= in_op; OpType =:= in_match_op; OpType =:= type_op; OpType =:= dual_op; OpType =:= mult_op; OpType =:= power_op; OpType =:= concat_op; OpType =:= range_op; OpType =:= xor_op; OpType =:= pipe_op; OpType =:= stab_op; OpType =:= when_op; OpType =:= assoc_op; OpType =:= rel_op; OpType =:= ternary_op; OpType =:= capture_op; OpType =:= ellipsis_op -> - Tokens; + {Scope, Tokens}; %%% or we traverse until the end. -prune_tokens([_ | Tokens], Opener) -> - prune_tokens(Tokens, Opener); -prune_tokens([], _Opener) -> - []. +prune_tokens([Token | Tokens], Opener, Scope) -> + prune_tokens(Tokens, Opener, revert_prev_pos(Token, Scope)); +prune_tokens([], _Opener, Scope) -> + {Scope, []}. to_absolute_tokens(RelTokens, {StartLine, StartColumn}) -> to_absolute_tokens(RelTokens, {StartLine, StartColumn}, []). From 80e17f87710dee68d1950d4ea0e002d4a286e9fa Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Sat, 15 Feb 2025 21:31:21 +0100 Subject: [PATCH 13/18] fix confusables warning --- lib/elixir/src/elixir_tokenizer.erl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 3e4be1ceb0..ade40a3a15 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -153,7 +153,10 @@ tokenize([], Line, Column, #elixir_tokenizer{cursor_completion=Cursor} = Scope, {CursorColumn, AccTerminators, AccTokens} = add_cursor(Line, Column, Cursor, Scope, Terminators, Tokens), - AllWarnings = maybe_unicode_lint_warnings(Ascii, Tokens, Warnings), + AllWarnings = maybe_unicode_lint_warnings(Ascii, case Scope of + #elixir_tokenizer{mode=relative} -> lists:reverse(to_absolute_tokens(lists:reverse(Tokens), {1, 1})); + _ -> Tokens + end, Warnings), {ok, Line, CursorColumn, AllWarnings, AccTokens, AccTerminators}; tokenize([], EndLine, EndColumn, #elixir_tokenizer{terminators=[{Start, {StartLine, StartColumn, _}, _, {StartPrevLine, StartPrevColumn}} | _]} = Scope, Tokens) -> @@ -181,7 +184,10 @@ tokenize([], EndLine, EndColumn, #elixir_tokenizer{terminators=[{Start, {StartLi tokenize([], Line, Column, #elixir_tokenizer{} = Scope, Tokens) -> #elixir_tokenizer{ascii_identifiers_only=Ascii, warnings=Warnings} = Scope, - AllWarnings = maybe_unicode_lint_warnings(Ascii, Tokens, Warnings), + AllWarnings = maybe_unicode_lint_warnings(Ascii, case Scope of + #elixir_tokenizer{mode=relative} -> lists:reverse(to_absolute_tokens(lists:reverse(Tokens), {1, 1})); + _ -> Tokens + end, Warnings), {ok, Line, Column, AllWarnings, Tokens, []}; % VC merge conflict From 5be62d71a6c39739d72abd2db2b575d9204811cf Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Sun, 16 Feb 2025 08:41:18 +0100 Subject: [PATCH 14/18] fix invalid match --- lib/elixir/src/elixir_tokenizer.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index ade40a3a15..5f1c89e79f 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -1945,7 +1945,7 @@ add_cursor(Line, Column, prune_and_cursor, Scope, Terminators, Tokens) -> end, {Column + 12, Terminators, CursorTokens}. -revert_prev_pos(Token, #elixir_tokenizer{prev_pos={PrevLine, PrevColumn, mode=relative}} = Scope) -> +revert_prev_pos(Token, #elixir_tokenizer{prev_pos={PrevLine, PrevColumn}, mode=relative} = Scope) -> Info = element(2, Token), Line = element(1, Info), Column = element(2, Info), From 377e7c88a1e6c472ba2c3179b77ad5a54af774d1 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Sun, 16 Feb 2025 08:41:46 +0100 Subject: [PATCH 15/18] add helper --- lib/elixir/src/elixir_tokenizer.erl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 5f1c89e79f..549dda25dd 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -5,7 +5,7 @@ -module(elixir_tokenizer). -include("elixir.hrl"). -include("elixir_tokenizer.hrl"). --export([tokenize/1, tokenize/3, tokenize/4, invalid_do_error/1, terminator/1, to_absolute_tokens/2]). +-export([tokenize/1, tokenize/3, tokenize/4, invalid_do_error/1, terminator/1, to_absolute_tokens/2, to_absolute_terminators/1]). -define(at_op(T), T =:= $@). @@ -2061,3 +2061,9 @@ to_absolute_interpolation([{{BeginLine, BeginColumn, nil}, {EndLine, EndColumn, NewEnd = {EndLine + CurrentLine, EndColumn + CurrentCol, nil}, NewTokens = to_absolute_tokens(Tokens, {CurrentLine, CurrentCol}), to_absolute_interpolation(Rest, {CurrentLine, CurrentCol}, [{NewBegin, NewEnd, NewTokens} | Acc]). + +to_absolute_terminators(List) -> to_absolute_terminators(List, []). + +to_absolute_terminators([], Acc) -> lists:reverse(Acc); +to_absolute_terminators([{Terminator, {_Line, _Column, nil}, X, {PrevLine, PrevColumn}} | Rest], Acc) -> + to_absolute_terminators(Rest, [{Terminator, {PrevLine, PrevColumn, nil}, X, {1, 1}} | Acc]). From c25cfc36dd6d8edb9224e58a33293a2a1ee3332a Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Sun, 16 Feb 2025 08:52:39 +0100 Subject: [PATCH 16/18] handle cursor --- lib/elixir/src/elixir_interpolation.erl | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/lib/elixir/src/elixir_interpolation.erl b/lib/elixir/src/elixir_interpolation.erl index 5aaaa7c1e2..6109ea88c6 100644 --- a/lib/elixir/src/elixir_interpolation.erl +++ b/lib/elixir/src/elixir_interpolation.erl @@ -76,8 +76,14 @@ extract([$#, ${ | Rest], Buffer, Output, Line, Column, Scope, true, Last) -> {error, Reason}; {ok, EndLine, EndColumn, Warnings, Tokens, Terminators} when Scope#elixir_tokenizer.cursor_completion /= false -> NewScope = Scope#elixir_tokenizer{warnings=Warnings, cursor_completion=noprune}, - {CursorTerminators, _} = cursor_complete(EndLine, EndColumn, Terminators), - Output2 = build_interpol(Line, Column, EndLine, EndColumn, lists:reverse(Tokens, CursorTerminators), Output1), + {CursorTerminators, _} = cursor_complete(EndLine, EndColumn, Terminators, NewScope), + {Line1, Column1, EndLine1, EndColumn1} = case Scope of + #elixir_tokenizer{mode=relative, prev_pos={PrevLine, PrevColumn}} -> + {Line - PrevLine, Column - PrevColumn, EndLine - PrevLine, EndColumn - PrevColumn}; + _ -> + {Line, Column, EndLine, EndColumn} + end, + Output2 = build_interpol(Line1, Column1, EndLine1, EndColumn1, lists:reverse(Tokens, CursorTerminators), Output1), extract([], [], Output2, EndLine, EndColumn, NewScope, true, Last); {ok, _, _, _, _, _} -> {error, {string, Line, Column, "missing interpolation terminator: \"}\"", []}} @@ -130,14 +136,19 @@ strip_horizontal_space([H | T], Buffer, Counter) when H =:= $\s; H =:= $\t -> strip_horizontal_space(T, Buffer, Counter) -> {T, Buffer, Counter}. -cursor_complete(Line, Column, Terminators) -> - % TODO handle relative position in inserted cursor +cursor_complete(Line, Column, Terminators, Scope) -> lists:mapfoldl( - fun({Start, _, _, _StartPos}, AccColumn) -> + fun({Start, _, _, _StartPos}, {AccColumn, PrevLength}) -> End = elixir_tokenizer:terminator(Start), - {{End, {Line, AccColumn, nil}}, AccColumn + length(erlang:atom_to_list(End))} + Meta = case Scope of + #elixir_tokenizer{mode=relative} -> + {0, PrevLength, nil}; + _ -> {Line, AccColumn, nil} + end, + Length = length(erlang:atom_to_list(End)), + {{End, Meta}, {AccColumn + Length, Length}} end, - Column, + {Column, 1}, Terminators ). From 7e2c313a74c08d41e6ebbdbb7367aab51fa967b2 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Sun, 16 Feb 2025 12:43:19 +0100 Subject: [PATCH 17/18] handle relative tokens in fragment --- lib/elixir/lib/code/fragment.ex | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/lib/elixir/lib/code/fragment.ex b/lib/elixir/lib/code/fragment.ex index 352ba964cf..fe0a4a5c37 100644 --- a/lib/elixir/lib/code/fragment.ex +++ b/lib/elixir/lib/code/fragment.ex @@ -1138,12 +1138,13 @@ defmodule Code.Fragment do {:ok, Macro.t()} | {:error, {location :: keyword, binary | {binary, binary}, binary}} def container_cursor_to_quoted(fragment, opts \\ []) do {trailing_fragment, opts} = Keyword.pop(opts, :trailing_fragment) - opts = Keyword.take(opts, [:columns, :token_metadata, :literal_encoder]) + opts = Keyword.take(opts, [:columns, :token_metadata, :literal_encoder, :mode]) opts = [check_terminators: {:cursor, []}, emit_warnings: false] ++ opts file = Keyword.get(opts, :file, "nofile") line = Keyword.get(opts, :line, 1) column = Keyword.get(opts, :column, 1) + mode = Keyword.get(opts, :mode, :absolute) case :elixir_tokenizer.tokenize(to_charlist(fragment), line, column, opts) do {:ok, line, column, _warnings, rev_tokens, rev_terminators} @@ -1158,7 +1159,7 @@ defmodule Code.Fragment do _ -> {rev_tokens, rev_terminators} end - tokens = reverse_tokens(line, column, rev_tokens, rev_terminators) + tokens = reverse_tokens(line, column, rev_tokens, rev_terminators, mode) :elixir.tokens_to_quoted(tokens, file, opts) {:ok, line, column, _warnings, rev_tokens, rev_terminators} -> @@ -1171,7 +1172,7 @@ defmodule Code.Fragment do {:error, {meta, _, ~c"end"}, _rest, _warnings, trailing_rev_tokens} <- :elixir_tokenizer.tokenize(to_charlist(trailing_fragment), line, column, opts) do trailing_tokens = - reverse_tokens(meta[:line], meta[:column], trailing_rev_tokens, after_start) + reverse_tokens(meta[:line], meta[:column], trailing_rev_tokens, after_start, mode) # If the cursor has its own line, then we do not trim new lines trailing tokens. # Otherwise we want to drop any newline so we drop the next tokens after eol. @@ -1183,7 +1184,7 @@ defmodule Code.Fragment do Enum.reverse(rev_tokens, drop_tokens(trailing_tokens, 0)) else - _ -> reverse_tokens(line, column, rev_tokens, rev_terminators) + _ -> reverse_tokens(line, column, rev_tokens, rev_terminators, mode) end :elixir.tokens_to_quoted(tokens, file, opts) @@ -1193,13 +1194,16 @@ defmodule Code.Fragment do end end - defp reverse_tokens(line, column, tokens, terminators) do + defp reverse_tokens(line, column, tokens, terminators, mode) do {terminators, _} = - Enum.map_reduce(terminators, column, fn {start, _, _, _}, column -> - # TODO handle relative positions + Enum.map_reduce(terminators, {column, 1}, fn {start, _, _, _}, {column, prev_length} -> atom = :elixir_tokenizer.terminator(start) - - {{atom, {line, column, nil}}, column + length(Atom.to_charlist(atom))} + meta = case mode do + :relative -> {0, prev_length, nil} + _ -> {line, column, nil} + end + length = length(Atom.to_charlist(atom)) + {{atom, meta}, {column + length, length}} end) Enum.reverse(tokens, terminators) From 14d0e1223d67e0e5aa03e1a0e8963d0e97190367 Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Sun, 16 Feb 2025 15:22:06 +0100 Subject: [PATCH 18/18] format --- lib/elixir/lib/code/fragment.ex | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/elixir/lib/code/fragment.ex b/lib/elixir/lib/code/fragment.ex index fe0a4a5c37..41b0d24390 100644 --- a/lib/elixir/lib/code/fragment.ex +++ b/lib/elixir/lib/code/fragment.ex @@ -1198,10 +1198,13 @@ defmodule Code.Fragment do {terminators, _} = Enum.map_reduce(terminators, {column, 1}, fn {start, _, _, _}, {column, prev_length} -> atom = :elixir_tokenizer.terminator(start) - meta = case mode do - :relative -> {0, prev_length, nil} - _ -> {line, column, nil} - end + + meta = + case mode do + :relative -> {0, prev_length, nil} + _ -> {line, column, nil} + end + length = length(Atom.to_charlist(atom)) {{atom, meta}, {column + length, length}} end)