Skip to content

Commit b61153d

Browse files
committed
fix: Handle margin cutting when encountering multibyte chars
1 parent 08c6692 commit b61153d

File tree

2 files changed

+71
-31
lines changed

2 files changed

+71
-31
lines changed

src/renderer/mod.rs

+56-16
Original file line numberDiff line numberDiff line change
@@ -954,12 +954,19 @@ impl Renderer {
954954
let line_offset = buffer.num_lines();
955955

956956
// Left trim
957-
let left = margin.left(source_string.len());
957+
let left = margin.left(str_width(&source_string));
958958

959959
// FIXME: This looks fishy. See #132860.
960960
// Account for unicode characters of width !=0 that were removed.
961-
let left = source_string.chars().take(left).map(char_width).sum();
961+
let mut taken = 0;
962+
source_string.chars().for_each(|ch| {
963+
let next = char_width(ch);
964+
if taken + next <= left {
965+
taken += next;
966+
}
967+
});
962968

969+
let left = taken;
963970
self.draw_line(
964971
buffer,
965972
&source_string,
@@ -2018,48 +2025,81 @@ impl Renderer {
20182025
) {
20192026
// Tabs are assumed to have been replaced by spaces in calling code.
20202027
debug_assert!(!source_string.contains('\t'));
2021-
let line_len = source_string.len();
2028+
let line_len = str_width(source_string);
20222029
// Create the source line we will highlight.
20232030
let left = margin.left(line_len);
20242031
let right = margin.right(line_len);
20252032
// FIXME: The following code looks fishy. See #132860.
20262033
// On long lines, we strip the source line, accounting for unicode.
20272034
let mut taken = 0;
2035+
let mut skipped = 0;
20282036
let code: String = source_string
20292037
.chars()
2030-
.skip(left)
2038+
.skip_while(|ch| {
2039+
skipped += char_width(*ch);
2040+
skipped <= left
2041+
})
20312042
.take_while(|ch| {
20322043
// Make sure that the trimming on the right will fall within the terminal width.
2033-
let next = char_width(*ch);
2034-
if taken + next > right - left {
2035-
return false;
2036-
}
2037-
taken += next;
2038-
true
2044+
taken += char_width(*ch);
2045+
taken <= (right - left)
20392046
})
20402047
.collect();
20412048

20422049
buffer.puts(line_offset, code_offset, &code, ElementStyle::Quotation);
20432050
let placeholder = self.margin();
2044-
if margin.was_cut_left() {
2051+
let padding = str_width(placeholder);
2052+
let (width_taken, bytes_taken) = if margin.was_cut_left() {
20452053
// We have stripped some code/whitespace from the beginning, make it clear.
2054+
let mut bytes_taken = 0;
2055+
let mut width_taken = 0;
2056+
for ch in code.chars() {
2057+
width_taken += char_width(ch);
2058+
bytes_taken += ch.len_utf8();
2059+
2060+
if width_taken >= padding {
2061+
break;
2062+
}
2063+
}
20462064
buffer.puts(
20472065
line_offset,
20482066
code_offset,
2049-
placeholder,
2067+
&format!("{placeholder:>width_taken$}"),
20502068
ElementStyle::LineNumber,
20512069
);
2052-
}
2070+
(width_taken, bytes_taken)
2071+
} else {
2072+
(0, 0)
2073+
};
2074+
2075+
buffer.puts(
2076+
line_offset,
2077+
code_offset + width_taken,
2078+
&code[bytes_taken..],
2079+
ElementStyle::Quotation,
2080+
);
2081+
20532082
if margin.was_cut_right(line_len) {
2054-
let padding = str_width(placeholder);
2055-
// We have stripped some code after the rightmost span end, make it clear we did so.
2083+
// We have stripped some code/whitespace from the beginning, make it clear.
2084+
let mut char_taken = 0;
2085+
let mut width_taken_inner = 0;
2086+
for ch in code.chars().rev() {
2087+
width_taken_inner += char_width(ch);
2088+
char_taken += 1;
2089+
2090+
if width_taken_inner >= padding {
2091+
break;
2092+
}
2093+
}
2094+
20562095
buffer.puts(
20572096
line_offset,
2058-
code_offset + taken - padding,
2097+
code_offset + width_taken + code[bytes_taken..].chars().count() - char_taken,
20592098
placeholder,
20602099
ElementStyle::LineNumber,
20612100
);
20622101
}
2102+
20632103
buffer.puts(
20642104
line_offset,
20652105
0,

tests/formatter.rs

+15-15
Original file line numberDiff line numberDiff line change
@@ -2136,7 +2136,7 @@ error: title
21362136
2 | # Ensure that the spans from toml handle utf-8 correctly
21372137
3 | authors = [
21382138
| ___________^
2139-
4 | | { name = "Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯...A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘", email = 1 }
2139+
4 | | { name = "Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘", email = 1 }
21402140
5 | | ]
21412141
| |_^ annotation
21422142
"#]];
@@ -2162,8 +2162,8 @@ fn unicode_cut_handling2() {
21622162
let expected = str![[r#"
21632163
error: expected item, found `?`
21642164
|
2165-
1.|....
2166-
|^ expected item
2165+
1 | ...的。这是宽的。这是宽的。这是宽的。这是宽的。这是宽的。*/?
2166+
| ^ expected item
21672167
= note: for a full list of items that can appear in modules, see <https://doc.rust-lang.org/reference/items.html>
21682168
"#]];
21692169

@@ -2189,8 +2189,8 @@ fn unicode_cut_handling3() {
21892189
let expected = str![[r#"
21902190
error: expected item, found `?`
21912191
|
2192-
1 | ...。这是宽的。*/? ...
2193-
^ | expected item
2192+
1 | ...。这是宽的。这是宽的。这是宽的...
2193+
| ^^ expected item
21942194
= note: for a full list of items that can appear in modules, see <https://doc.rust-lang.org/reference/items.html>
21952195
"#]];
21962196

@@ -2256,10 +2256,10 @@ fn main() {
22562256
error[E0308]: mismatched types
22572257
--> $DIR/non-whitespace-trimming-unicode.rs:4:415
22582258
|
2259-
LL | ...♰♱♲♳♴♵♶♷♸♹♺♻♼♽♾♿⚀⚁⚂⚃⚄⚅⚆⚈⚉4"; let _: () = 42; let _: &str = "🦀☀☁☂☃☄★☆☇☈☉☊☋☌☍☎☏☐☑☒☓ ☖☗☘☙☚☛☜☝☞☟☠☡☢☣☤☥☦☧☨☩☪☫☬☭☮☯☰☱☲☳☴☵☶☷☸☹☺☻☼☽☾☿♀♁♂...
2260-
| -- ^^ expected `()`, found integer
2261-
| |
2262-
| expected due to this
2259+
LL | ...♧♨♩♪♫♬♭♮♯♰♱♲♳♴♵♶♷♸♹♺♻♼♽♾♿⚀⚁⚂⚃⚄⚅⚆⚈⚉4"; let _: () = 42; let _: &str = "🦀☀☁☂☃☄★☆☇☈☉☊☋☌☍☎☏☐☑☒☓ ☖☗☘☙☚☛☜☝☞☟☠☡☢☣☤☥☦☧☨☩☪☫☬☭☮☯☰☱☲☳☴☵☶☷...
2260+
| -- ^^ expected `()`, found integer
2261+
| |
2262+
| expected due to this
22632263
"#]];
22642264

22652265
let renderer = Renderer::plain().anonymized_line_numbers(true);
@@ -2315,11 +2315,11 @@ fn main() {
23152315
error[E0369]: cannot add `&str` to `&str`
23162316
╭▸ $DIR/non-1-width-unicode-multiline-label.rs:7:260
23172317
2318-
LL │ …ཽཾཿ྄ཱྀྀྂྃ྅྆྇ྈྉྊྋྌྍྎྏྐྑྒྒྷྔྕྖྗ྘ྙྚྛྜྜྷྞྟྠྡྡྷྣྤྥྦྦྷྨྩྪྫྫྷྭྮྯྰྱྲླྴྵྶྷྸྐྵྺྻྼ྽྾྿࿀࿁࿂࿃࿄࿅࿆࿇࿈࿉࿊࿋࿍࿎࿏࿐࿑࿒࿓࿔࿕࿖࿗࿘࿙࿚"; let _a = unicode_is_fun + " really fun!";
2319-
┬───────────── ┯ ────────────── &str
2320-
│ │
2321-
│ `+` cannot be used to concatenate two `&str` strings
2322-
&str
2318+
LL │ …࿆࿇࿈࿉࿊࿋࿍࿎࿏࿐࿑࿒࿓࿔࿕࿖࿗࿘࿙࿚"; let _a = unicode_is_fun + " really fun!";
2319+
│ ┬───────────── ┯ ────────────── &str
2320+
│ │ │
2321+
│ │ `+` cannot be used to concatenate two `&str` strings
2322+
│ &str
23232323
23242324
╰ note: string concatenation requires an owned `String` on the left
23252325
help: create an owned `String` from a string reference
@@ -2377,7 +2377,7 @@ LL | include!("not-utf8.bin");
23772377
note: byte `193` is not valid utf-8
23782378
--> $DIR/not-utf8.bin:1:1
23792379
|
2380-
LL | �|�␂!5�cc␕␂�Ӻi��WWj�ȥ�'�}�␒�J�ȉ��W�␞O�@����␜w�V���LO����␔[ ␃_�'���SQ�~ذ��ų&��- ��lN~��!@␌ _#���kQ��h�␝�:�...
2380+
LL | �|�␂!5�cc␕␂�Ӻi��WWj�ȥ�'�}�␒�J�ȉ��W�␞O�@����␜w�V���LO����␔[ ␃_�'���SQ�~ذ��ų&��- ��lN~��!@␌ _#���kQ��h�␝�:�␜␇�
23812381
| ^
23822382
= note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info)
23832383
"#]];

0 commit comments

Comments
 (0)