Skip to content

Commit 5a05d49

Browse files
authored
Merge pull request #976 from ronisbr/zero_width_char_alignment
Add support for alignment in lines with zero-width characters
2 parents a91aef1 + bd6912c commit 5a05d49

3 files changed

Lines changed: 127 additions & 37 deletions

File tree

src/JuliaFormatter.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ function format_text(node::JuliaSyntax.GreenNode, style::AbstractStyle, s::State
237237
end
238238

239239
if needs_alignment(s.opts)
240-
align_fst!(fst, s.opts)
240+
align_fst!(fst, s.doc, s.opts)
241241
end
242242

243243
nest!(style, fst, s)

src/align.jl

Lines changed: 119 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
1-
function align_fst!(fst::FST, opts::Options)
1+
"""
2+
align_fst!(fst::FST, doc::Document, opts::Options)
3+
4+
Walk `fst` and apply the alignment passes enabled by `opts`.
5+
6+
`doc` is the original source document and is used to translate parser line offsets into
7+
display columns before padding is adjusted. This keeps alignment stable for source text
8+
containing combining and other zero-width characters.
9+
"""
10+
function align_fst!(fst::FST, doc::Document, opts::Options)
211
if is_leaf(fst)
312
return
413
end
@@ -9,15 +18,15 @@ function align_fst!(fst::FST, opts::Options)
918
if is_leaf(n)
1019
continue
1120
elseif opts.align_struct_field && (n.typ === Struct || n.typ === Mutable)
12-
align_struct!(n)
21+
align_struct!(doc, n)
1322
elseif opts.align_conditional && n.typ === Conditional
14-
align_conditional!(n)
23+
align_conditional!(doc, n)
1524
elseif opts.align_matrix && (
1625
n.typ === Vcat || n.typ === TypedVcat || n.typ === Ncat || n.typ === TypedNcat
1726
)
18-
align_matrix!(n)
27+
align_matrix!(doc, n)
1928
else
20-
align_fst!(n, opts)
29+
align_fst!(n, doc, opts)
2130
end
2231

2332
if opts.align_assignment && (is_assignment(n) || n.typ === Kw)
@@ -29,8 +38,8 @@ function align_fst!(fst::FST, opts::Options)
2938
end
3039
end
3140

32-
align_binaryopcalls!(fst, assignment_inds)
33-
align_binaryopcalls!(fst, pair_arrow_inds)
41+
align_binaryopcalls!(doc, fst, assignment_inds)
42+
align_binaryopcalls!(doc, fst, pair_arrow_inds)
3443
end
3544

3645
"""
@@ -40,8 +49,8 @@ Group of FST node indices and required metadata to potentially align them.
4049
4150
- `node_inds`. Indices of FST nodes affected by alignment.
4251
- `nodes`. FST nodes affected by alignment.
43-
- `line_offsets`. Line offset of the character nodes may be aligned to
44-
in the source file.
52+
- `line_offsets`. Display line offset of the character nodes may be aligned
53+
to in the source file.
4554
- `lens`. Length of the FST node prior to the alignment character. Used
4655
to calculate extra whitespace padding.
4756
- `whitespaces`. Number of whitespaces between the alignment character and
@@ -57,6 +66,28 @@ struct AlignGroup
5766
end
5867
AlignGroup() = AlignGroup(FST[], Int[], Int[], Int[], Int[])
5968

69+
"""
70+
source_display_line_offset(doc::Document, line::Int, line_offset::Int) -> Int
71+
72+
Convert a 1-based source line offset into a 1-based display column for `line` in `doc`.
73+
74+
`line_offset` follows the offsets recorded in the parsed source and therefore advances
75+
through the original text representation. Alignment, however, needs display width semantics
76+
so that combining marks and other zero-width codepoints do not consume extra padding. This
77+
helper measures the source line prefix with `textwidth` and returns the corresponding
78+
display column.
79+
"""
80+
function source_display_line_offset(doc::Document, line::Int, line_offset::Int)
81+
line_offset <= 1 && return 1
82+
83+
code = JuliaSyntax.sourcetext(doc.srcfile)
84+
line_start = doc.srcfile.line_starts[line]
85+
offset = line_start + line_offset - 1
86+
prefix_end = prevind(code, offset)
87+
88+
return textwidth(code[line_start:prefix_end]) + 1
89+
end
90+
6091
function Base.push!(g::AlignGroup, n::FST, ind::Int, line_offset::Int, len::Int, ws::Int)
6192
push!(g.nodes, n)
6293
push!(g.node_inds, ind)
@@ -113,10 +144,9 @@ function align_binaryopcall!(fst::FST, diff::Int)
113144
end
114145
end
115146

116-
# we need to use ncodeunits
117147
function node_align_length(n::FST)
118148
if is_leaf(n)
119-
return ncodeunits(n.val)
149+
return textwidth(n.val)
120150
end
121151
margin = 0
122152
for nn in n.nodes
@@ -134,14 +164,15 @@ function node_align_length(nodes::Vector{FST})
134164
end
135165

136166
"""
137-
align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
167+
align_binaryopcalls!(doc::Document, fst::FST, op_inds::Vector{Int})
138168
139169
Aligns binary operator expressions.
140170
141-
Additionally handles the case where a keyword such as `const` is used
142-
prior to the binary op call.
171+
Additionally handles the case where a keyword such as `const` is used prior to the binary op
172+
call. `doc` is used to compare source locations in display columns rather than raw source
173+
offsets.
143174
"""
144-
function align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
175+
function align_binaryopcalls!(doc::Document, fst::FST, op_inds::Vector{Int})
145176
if !(length(op_inds) > 1)
146177
return
147178
end
@@ -158,7 +189,11 @@ function align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
158189

159190
binop, nlen, ws = if n.typ === Binary || n.typ === Kw
160191
nlen = node_align_length(n[1])
161-
n, nlen, (n[3].line_offset - n.line_offset) - nlen
192+
start_offset = source_display_line_offset(doc, n.startline, n.line_offset)
193+
op_offset =
194+
source_display_line_offset(doc, n[3].startline, n[3].line_offset)
195+
196+
n, nlen, op_offset - start_offset - nlen
162197
else
163198
binop::Union{FST,Nothing} = nothing
164199
sn = n
@@ -175,7 +210,16 @@ function align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
175210
else
176211
binop = (sn.nodes::Vector{FST})[binop_ind]
177212
nlen += node_align_length(binop[1])
178-
ws = (binop[3].line_offset - n.line_offset) - nlen
213+
start_offset =
214+
source_display_line_offset(doc, n.startline, n.line_offset)
215+
216+
op_offset = source_display_line_offset(
217+
doc,
218+
binop[3].startline,
219+
binop[3].line_offset,
220+
)
221+
222+
ws = op_offset - start_offset - nlen
179223
break
180224
end
181225
end
@@ -189,7 +233,10 @@ function align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
189233

190234
push!(g.nodes, binop)
191235
push!(g.node_inds, i)
192-
push!(g.line_offsets, binop[3].line_offset)
236+
push!(
237+
g.line_offsets,
238+
source_display_line_offset(doc, binop[3].startline, binop[3].line_offset),
239+
)
193240
push!(g.lens, nlen)
194241
push!(g.whitespaces, ws)
195242

@@ -214,11 +261,14 @@ function align_binaryopcalls!(fst::FST, op_inds::Vector{Int})
214261
end
215262

216263
"""
217-
align_struct!(fst::FST)
264+
align_struct!(doc::Document, fst::FST)
218265
219266
Aligns struct fields.
267+
268+
`doc` supplies the original source text so field and type operator columns can be measured
269+
using display width.
220270
"""
221-
function align_struct!(fst::FST)
271+
function align_struct!(doc::Document, fst::FST)
222272
ind = findfirst(n -> n.typ === Block, fst.nodes)
223273
if isnothing(ind) || length(fst[ind]) == 0
224274
return
@@ -248,11 +298,14 @@ function align_struct!(fst::FST)
248298
continue
249299
end
250300

251-
ws = n[ind].line_offset - (n.line_offset + nlen)
301+
start_offset = source_display_line_offset(doc, n.startline, n.line_offset)
302+
op_offset =
303+
source_display_line_offset(doc, n[ind].startline, n[ind].line_offset)
304+
ws = op_offset - start_offset - nlen
252305

253306
push!(g.nodes, n)
254307
push!(g.node_inds, i)
255-
push!(g.line_offsets, n[ind].line_offset)
308+
push!(g.line_offsets, op_offset)
256309
push!(g.lens, nlen)
257310
push!(g.whitespaces, ws)
258311

@@ -270,11 +323,17 @@ function align_struct!(fst::FST)
270323
if ind === nothing
271324
continue
272325
end
273-
ws = binop[ind].line_offset - (n.line_offset + nlen)
326+
start_offset = source_display_line_offset(doc, n.startline, n.line_offset)
327+
op_offset = source_display_line_offset(
328+
doc,
329+
binop[ind].startline,
330+
binop[ind].line_offset,
331+
)
332+
ws = op_offset - start_offset - nlen
274333

275334
push!(g.nodes, binop)
276335
push!(g.node_inds, i)
277-
push!(g.line_offsets, binop[ind].line_offset)
336+
push!(g.line_offsets, op_offset)
278337
push!(g.lens, nlen)
279338
push!(g.whitespaces, ws)
280339

@@ -297,11 +356,14 @@ function align_struct!(fst::FST)
297356
end
298357

299358
"""
300-
align_conditional!(fst::FST)
359+
align_conditional!(doc::Document, fst::FST)
301360
302361
Aligns a conditional expression.
362+
363+
`doc` supplies the original source text so `?` and `:` alignment is computed using display
364+
columns.
303365
"""
304-
function align_conditional!(fst::FST)
366+
function align_conditional!(doc::Document, fst::FST)
305367
nodes = flatten_conditionalopcall(fst)
306368

307369
cond_group = AlignGroup()
@@ -314,23 +376,35 @@ function align_conditional!(fst::FST)
314376
if n.typ === OPERATOR && n.val == "?"
315377
if cond_prev_endline != n.endline
316378
nlen = node_align_length(nodes[i-2])
317-
ws = n.line_offset - (nodes[i-2].line_offset + nlen)
379+
start_offset = source_display_line_offset(
380+
doc,
381+
nodes[i-2].startline,
382+
nodes[i-2].line_offset,
383+
)
384+
op_offset = source_display_line_offset(doc, n.startline, n.line_offset)
385+
ws = op_offset - start_offset - nlen
318386

319387
push!(cond_group.nodes, n)
320388
push!(cond_group.node_inds, i)
321-
push!(cond_group.line_offsets, n.line_offset)
389+
push!(cond_group.line_offsets, op_offset)
322390
push!(cond_group.lens, nlen)
323391
push!(cond_group.whitespaces, ws)
324392
end
325393
cond_prev_endline = n.endline
326394
elseif n.typ === OPERATOR && n.val == ":"
327395
if colon_prev_endline != n.endline
328396
nlen = node_align_length(nodes[i-2])
329-
ws = n.line_offset - (nodes[i-2].line_offset + nlen)
397+
start_offset = source_display_line_offset(
398+
doc,
399+
nodes[i-2].startline,
400+
nodes[i-2].line_offset,
401+
)
402+
op_offset = source_display_line_offset(doc, n.startline, n.line_offset)
403+
ws = op_offset - start_offset - nlen
330404

331405
push!(colon_group.nodes, n)
332406
push!(colon_group.node_inds, i)
333-
push!(colon_group.line_offsets, n.line_offset)
407+
push!(colon_group.line_offsets, op_offset)
334408
push!(colon_group.lens, nlen)
335409
push!(colon_group.whitespaces, ws)
336410
end
@@ -376,24 +450,30 @@ function align_conditional!(fst::FST)
376450
end
377451

378452
"""
379-
Adjust whitespace in between matrix elements such that it's the same as the original source file.
453+
align_matrix!(doc::Document, fst::FST)
454+
455+
Adjust whitespace between matrix elements so it matches the original source layout.
456+
457+
`doc` is used to recover the source display columns for each element, which avoids
458+
overpadding when the source contains zero-width characters.
380459
"""
381-
function align_matrix!(fst::FST)
460+
function align_matrix!(doc::Document, fst::FST)
382461
rows = filter(n -> n.typ === Row, fst.nodes::Vector{FST})
383462
if length(rows) == 0
384463
return
385464
end
386465

387466
min_offset = minimum(map(rows) do r
388-
r[1].line_offset
467+
source_display_line_offset(doc, r[1].startline, r[1].line_offset)
389468
end)
390469

391470
line = 0
392471
# add whitespace prior to initial element if elements are aligned to the right and
393472
# it's the first row on that line.
394473
for r in rows
395-
if r[1].line_offset > min_offset && line != r.startline
396-
diff = r[1].line_offset - min_offset
474+
row_offset = source_display_line_offset(doc, r[1].startline, r[1].line_offset)
475+
if row_offset > min_offset && line != r.startline
476+
diff = row_offset - min_offset
397477
if diff > 0
398478
insert!(r.nodes::Vector{FST}, 1, Whitespace(diff))
399479
end
@@ -408,7 +488,10 @@ function align_matrix!(fst::FST)
408488
n1 = r[i-1]
409489
n2 = r[i+1]
410490

411-
diff = n2.line_offset - n1.line_offset - node_align_length(n1)
491+
diff =
492+
source_display_line_offset(doc, n2.startline, n2.line_offset) -
493+
source_display_line_offset(doc, n1.startline, n1.line_offset) -
494+
node_align_length(n1)
412495

413496
# fix #694 and #713
414497
if diff > 0

test/options.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,6 +1211,13 @@
12111211
"""
12121212
@test fmt(str, 4, 1; align_assignment = true) == str
12131213

1214+
str = """
1215+
eclipse = true
1216+
s̄_b = SVector{3,T}(0, 0, 0)
1217+
css_axes = :css_axes_eclipse
1218+
"""
1219+
@test fmt(str; align_assignment = true) == str
1220+
12141221
str = """
12151222
vcat(X::T...) where {T} = T[X[i] for i = 1:length(X)]
12161223
vcat(X::T...) where {T<:Number} = T[X[i] for i = 1:length(X)]

0 commit comments

Comments
 (0)