@@ -49,6 +49,10 @@ function onPaste(event: ClipboardEvent) {
49
49
const parser = new DOMParser ( )
50
50
const doc = parser . parseFromString ( textHTMLClean , 'text/html' )
51
51
52
+ // Replace all line-break elements with line break characters that will appear in `textContent`
53
+ for ( const br of doc . querySelectorAll ( 'br' ) ) br . replaceWith ( '\n' )
54
+ doc . normalize ( )
55
+
52
56
const markdown = convertToMarkdown ( plaintext , doc )
53
57
54
58
// If no changes made by transforming
@@ -163,11 +167,20 @@ function hasHTML(transfer: DataTransfer): boolean {
163
167
164
168
/** Collapse whitespace in HTML to normalize it with the plain-text representation. Also convert nbsp into regular space. */
165
169
function normalizeHtmlWhitespace ( text : string ) : string {
166
- // Collapse regular whitespace characters but preserve non-breaking spaces without collapsing
167
- return text
168
- . replace ( / [ \t \n \r ] + / g, ' ' )
169
- . trim ( )
170
- . replace ( / [ \u00A0 \uC2A0 ] / g, ' ' )
170
+ // The problem is that the HTML is not actually rendered onto the page, so the browser does not do the normal
171
+ // whitespace normalizing. This means textContent and innerText both just return the raw text of the node, ignoring
172
+ // `br` tags. So to be able to compare the parsed HTML with the plain-text variant, we need to make the whitespace
173
+ // in the HTML match what it would look like when rendered.
174
+
175
+ // We don't need to handle block breaks like p tags since we will work across those as separate nodes.
176
+ return (
177
+ text
178
+ // Collapse whitespace that would be collapsed if rendered
179
+ . replace ( / [ \t \n \r ] + / g, ' ' )
180
+ // Replace non-breaking space (nbsp) with regular space
181
+ . replace ( / [ \u00A0 \uC2A0 ] / g, ' ' )
182
+ . trim ( )
183
+ )
171
184
}
172
185
173
186
// Makes markdown link from a link element, avoiding special GitHub links
0 commit comments