1- function align_fst! (fst:: FST , opts:: Options )
1+ """
2+ align_fst!(fst::FST, doc::Document, opts::Options)
3+
4+ Walk `fst` and apply the alignment passes enabled by `opts`.
5+
6+ `doc` is the original source document and is used to translate parser line offsets into
7+ display columns before padding is adjusted. This keeps alignment stable for source text
8+ containing combining and other zero-width characters.
9+ """
10+ function align_fst! (fst:: FST , doc:: Document , opts:: Options )
211 if is_leaf (fst)
312 return
413 end
@@ -9,15 +18,15 @@ function align_fst!(fst::FST, opts::Options)
918 if is_leaf (n)
1019 continue
1120 elseif opts. align_struct_field && (n. typ === Struct || n. typ === Mutable)
12- align_struct! (n)
21+ align_struct! (doc, n)
1322 elseif opts. align_conditional && n. typ === Conditional
14- align_conditional! (n)
23+ align_conditional! (doc, n)
1524 elseif opts. align_matrix && (
1625 n. typ === Vcat || n. typ === TypedVcat || n. typ === Ncat || n. typ === TypedNcat
1726 )
18- align_matrix! (n)
27+ align_matrix! (doc, n)
1928 else
20- align_fst! (n, opts)
29+ align_fst! (n, doc, opts)
2130 end
2231
2332 if opts. align_assignment && (is_assignment (n) || n. typ === Kw)
@@ -29,8 +38,8 @@ function align_fst!(fst::FST, opts::Options)
2938 end
3039 end
3140
32- align_binaryopcalls! (fst, assignment_inds)
33- align_binaryopcalls! (fst, pair_arrow_inds)
41+ align_binaryopcalls! (doc, fst, assignment_inds)
42+ align_binaryopcalls! (doc, fst, pair_arrow_inds)
3443end
3544
3645"""
@@ -40,8 +49,8 @@ Group of FST node indices and required metadata to potentially align them.
4049
4150- `node_inds`. Indices of FST nodes affected by alignment.
4251- `nodes`. FST nodes affected by alignment.
43- - `line_offsets`. Line offset of the character nodes may be aligned to
44- in the source file.
52+ - `line_offsets`. Display line offset of the character nodes may be aligned
53+ to in the source file.
4554- `lens`. Length of the FST node prior to the alignment character. Used
4655 to calculate extra whitespace padding.
4756- `whitespaces`. Number of whitespaces between the alignment character and
@@ -57,6 +66,28 @@ struct AlignGroup
5766end
5867AlignGroup () = AlignGroup (FST[], Int[], Int[], Int[], Int[])
5968
69+ """
70+ source_display_line_offset(doc::Document, line::Int, line_offset::Int) -> Int
71+
72+ Convert a 1-based source line offset into a 1-based display column for `line` in `doc`.
73+
74+ `line_offset` follows the offsets recorded in the parsed source and therefore advances
75+ through the original text representation. Alignment, however, needs display width semantics
76+ so that combining marks and other zero-width codepoints do not consume extra padding. This
77+ helper measures the source line prefix with `textwidth` and returns the corresponding
78+ display column.
79+ """
80+ function source_display_line_offset (doc:: Document , line:: Int , line_offset:: Int )
81+ line_offset <= 1 && return 1
82+
83+ code = JuliaSyntax. sourcetext (doc. srcfile)
84+ line_start = doc. srcfile. line_starts[line]
85+ offset = line_start + line_offset - 1
86+ prefix_end = prevind (code, offset)
87+
88+ return textwidth (code[line_start: prefix_end]) + 1
89+ end
90+
6091function Base. push! (g:: AlignGroup , n:: FST , ind:: Int , line_offset:: Int , len:: Int , ws:: Int )
6192 push! (g. nodes, n)
6293 push! (g. node_inds, ind)
@@ -113,10 +144,9 @@ function align_binaryopcall!(fst::FST, diff::Int)
113144 end
114145end
115146
116- # we need to use ncodeunits
117147function node_align_length (n:: FST )
118148 if is_leaf (n)
119- return ncodeunits (n. val)
149+ return textwidth (n. val)
120150 end
121151 margin = 0
122152 for nn in n. nodes
@@ -134,14 +164,15 @@ function node_align_length(nodes::Vector{FST})
134164end
135165
136166"""
137- align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
167+ align_binaryopcalls!(doc::Document, fst::FST, op_inds::Vector{Int})
138168
139169Aligns binary operator expressions.
140170
141- Additionally handles the case where a keyword such as `const` is used
142- prior to the binary op call.
171+ Additionally handles the case where a keyword such as `const` is used prior to the binary op
172+ call. `doc` is used to compare source locations in display columns rather than raw source
173+ offsets.
143174"""
144- function align_binaryopcalls! (fst:: FST , op_inds:: Vector{Int} )
175+ function align_binaryopcalls! (doc :: Document , fst:: FST , op_inds:: Vector{Int} )
145176 if ! (length (op_inds) > 1 )
146177 return
147178 end
@@ -158,7 +189,11 @@ function align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
158189
159190 binop, nlen, ws = if n. typ === Binary || n. typ === Kw
160191 nlen = node_align_length (n[1 ])
161- n, nlen, (n[3 ]. line_offset - n. line_offset) - nlen
192+ start_offset = source_display_line_offset (doc, n. startline, n. line_offset)
193+ op_offset =
194+ source_display_line_offset (doc, n[3 ]. startline, n[3 ]. line_offset)
195+
196+ n, nlen, op_offset - start_offset - nlen
162197 else
163198 binop:: Union{FST,Nothing} = nothing
164199 sn = n
@@ -175,7 +210,16 @@ function align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
175210 else
176211 binop = (sn. nodes:: Vector{FST} )[binop_ind]
177212 nlen += node_align_length (binop[1 ])
178- ws = (binop[3 ]. line_offset - n. line_offset) - nlen
213+ start_offset =
214+ source_display_line_offset (doc, n. startline, n. line_offset)
215+
216+ op_offset = source_display_line_offset (
217+ doc,
218+ binop[3 ]. startline,
219+ binop[3 ]. line_offset,
220+ )
221+
222+ ws = op_offset - start_offset - nlen
179223 break
180224 end
181225 end
@@ -189,7 +233,10 @@ function align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
189233
190234 push! (g. nodes, binop)
191235 push! (g. node_inds, i)
192- push! (g. line_offsets, binop[3 ]. line_offset)
236+ push! (
237+ g. line_offsets,
238+ source_display_line_offset (doc, binop[3 ]. startline, binop[3 ]. line_offset),
239+ )
193240 push! (g. lens, nlen)
194241 push! (g. whitespaces, ws)
195242
@@ -214,11 +261,14 @@ function align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
214261end
215262
216263"""
217- align_struct!(fst::FST)
264+ align_struct!(doc::Document, fst::FST)
218265
219266Aligns struct fields.
267+
268+ `doc` supplies the original source text so field and type operator columns can be measured
269+ using display width.
220270"""
221- function align_struct! (fst:: FST )
271+ function align_struct! (doc :: Document , fst:: FST )
222272 ind = findfirst (n -> n. typ === Block, fst. nodes)
223273 if isnothing (ind) || length (fst[ind]) == 0
224274 return
@@ -248,11 +298,14 @@ function align_struct!(fst::FST)
248298 continue
249299 end
250300
251- ws = n[ind]. line_offset - (n. line_offset + nlen)
301+ start_offset = source_display_line_offset (doc, n. startline, n. line_offset)
302+ op_offset =
303+ source_display_line_offset (doc, n[ind]. startline, n[ind]. line_offset)
304+ ws = op_offset - start_offset - nlen
252305
253306 push! (g. nodes, n)
254307 push! (g. node_inds, i)
255- push! (g. line_offsets, n[ind] . line_offset )
308+ push! (g. line_offsets, op_offset )
256309 push! (g. lens, nlen)
257310 push! (g. whitespaces, ws)
258311
@@ -270,11 +323,17 @@ function align_struct!(fst::FST)
270323 if ind === nothing
271324 continue
272325 end
273- ws = binop[ind]. line_offset - (n. line_offset + nlen)
326+ start_offset = source_display_line_offset (doc, n. startline, n. line_offset)
327+ op_offset = source_display_line_offset (
328+ doc,
329+ binop[ind]. startline,
330+ binop[ind]. line_offset,
331+ )
332+ ws = op_offset - start_offset - nlen
274333
275334 push! (g. nodes, binop)
276335 push! (g. node_inds, i)
277- push! (g. line_offsets, binop[ind] . line_offset )
336+ push! (g. line_offsets, op_offset )
278337 push! (g. lens, nlen)
279338 push! (g. whitespaces, ws)
280339
@@ -297,11 +356,14 @@ function align_struct!(fst::FST)
297356end
298357
299358"""
300- align_conditional!(fst::FST)
359+ align_conditional!(doc::Document, fst::FST)
301360
302361Aligns a conditional expression.
362+
363+ `doc` supplies the original source text so `?` and `:` alignment is computed using display
364+ columns.
303365"""
304- function align_conditional! (fst:: FST )
366+ function align_conditional! (doc :: Document , fst:: FST )
305367 nodes = flatten_conditionalopcall (fst)
306368
307369 cond_group = AlignGroup ()
@@ -314,23 +376,35 @@ function align_conditional!(fst::FST)
314376 if n. typ === OPERATOR && n. val == " ?"
315377 if cond_prev_endline != n. endline
316378 nlen = node_align_length (nodes[i- 2 ])
317- ws = n. line_offset - (nodes[i- 2 ]. line_offset + nlen)
379+ start_offset = source_display_line_offset (
380+ doc,
381+ nodes[i- 2 ]. startline,
382+ nodes[i- 2 ]. line_offset,
383+ )
384+ op_offset = source_display_line_offset (doc, n. startline, n. line_offset)
385+ ws = op_offset - start_offset - nlen
318386
319387 push! (cond_group. nodes, n)
320388 push! (cond_group. node_inds, i)
321- push! (cond_group. line_offsets, n . line_offset )
389+ push! (cond_group. line_offsets, op_offset )
322390 push! (cond_group. lens, nlen)
323391 push! (cond_group. whitespaces, ws)
324392 end
325393 cond_prev_endline = n. endline
326394 elseif n. typ === OPERATOR && n. val == " :"
327395 if colon_prev_endline != n. endline
328396 nlen = node_align_length (nodes[i- 2 ])
329- ws = n. line_offset - (nodes[i- 2 ]. line_offset + nlen)
397+ start_offset = source_display_line_offset (
398+ doc,
399+ nodes[i- 2 ]. startline,
400+ nodes[i- 2 ]. line_offset,
401+ )
402+ op_offset = source_display_line_offset (doc, n. startline, n. line_offset)
403+ ws = op_offset - start_offset - nlen
330404
331405 push! (colon_group. nodes, n)
332406 push! (colon_group. node_inds, i)
333- push! (colon_group. line_offsets, n . line_offset )
407+ push! (colon_group. line_offsets, op_offset )
334408 push! (colon_group. lens, nlen)
335409 push! (colon_group. whitespaces, ws)
336410 end
@@ -376,24 +450,30 @@ function align_conditional!(fst::FST)
376450end
377451
378452"""
379- Adjust whitespace in between matrix elements such that it's the same as the original source file.
453+ align_matrix!(doc::Document, fst::FST)
454+
455+ Adjust whitespace between matrix elements so it matches the original source layout.
456+
457+ `doc` is used to recover the source display columns for each element, which avoids
458+ overpadding when the source contains zero-width characters.
380459"""
381- function align_matrix! (fst:: FST )
460+ function align_matrix! (doc :: Document , fst:: FST )
382461 rows = filter (n -> n. typ === Row, fst. nodes:: Vector{FST} )
383462 if length (rows) == 0
384463 return
385464 end
386465
387466 min_offset = minimum (map (rows) do r
388- r[1 ]. line_offset
467+ source_display_line_offset (doc, r[1 ]. startline, r[ 1 ] . line_offset)
389468 end )
390469
391470 line = 0
392471 # add whitespace prior to initial element if elements are aligned to the right and
393472 # it's the first row on that line.
394473 for r in rows
395- if r[1 ]. line_offset > min_offset && line != r. startline
396- diff = r[1 ]. line_offset - min_offset
474+ row_offset = source_display_line_offset (doc, r[1 ]. startline, r[1 ]. line_offset)
475+ if row_offset > min_offset && line != r. startline
476+ diff = row_offset - min_offset
397477 if diff > 0
398478 insert! (r. nodes:: Vector{FST} , 1 , Whitespace (diff))
399479 end
@@ -408,7 +488,10 @@ function align_matrix!(fst::FST)
408488 n1 = r[i- 1 ]
409489 n2 = r[i+ 1 ]
410490
411- diff = n2. line_offset - n1. line_offset - node_align_length (n1)
491+ diff =
492+ source_display_line_offset (doc, n2. startline, n2. line_offset) -
493+ source_display_line_offset (doc, n1. startline, n1. line_offset) -
494+ node_align_length (n1)
412495
413496 # fix #694 and #713
414497 if diff > 0
0 commit comments