Skip to content

Commit 3ca15b1

Browse files
authored
Handle br tags
1 parent 1ae93ac commit 3ca15b1

File tree

1 file changed

+18
-5
lines changed

1 file changed

+18
-5
lines changed

src/paste-markdown-html.ts

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ function onPaste(event: ClipboardEvent) {
4949
const parser = new DOMParser()
5050
const doc = parser.parseFromString(textHTMLClean, 'text/html')
5151

52+
// Replace all line-break elements with line break characters that will appear in `textContent`
53+
for (const br of doc.querySelectorAll('br')) br.replaceWith('\n')
54+
doc.normalize()
55+
5256
const markdown = convertToMarkdown(plaintext, doc)
5357

5458
// If no changes made by transforming
@@ -163,11 +167,20 @@ function hasHTML(transfer: DataTransfer): boolean {
163167

164168
/** Collapse whitespace in HTML to normalize it with the plain-text representation. Also convert nbsp into regular space. */
165169
function normalizeHtmlWhitespace(text: string): string {
166-
// Collapse regular whitespace characters but preserve non-breaking spaces without collapsing
167-
return text
168-
.replace(/[\t\n\r ]+/g, ' ')
169-
.trim()
170-
.replace(/[\u00A0\uC2A0]/g, ' ')
170+
// The problem is that the HTML is not actually rendered onto the page, so the browser does not do the normal
171+
// whitespace normalizing. This means textContent and innerText both just return the raw text of the node, ignoring
172+
// `br` tags. So to be able to compare the parsed HTML with the plain-text variant, we need to make the whitespace
173+
// in the HTML match what it would look like when rendered.
174+
175+
// We don't need to handle block breaks like p tags since we will work across those as separate nodes.
176+
return (
177+
text
178+
// Collapse whitespace that would be collapsed if rendered
179+
.replace(/[\t\n\r ]+/g, ' ')
180+
// Replace non-breaking space (nbsp) with regular space
181+
.replace(/[\u00A0\uC2A0]/g, ' ')
182+
.trim()
183+
)
171184
}
172185

173186
// Makes markdown link from a link element, avoiding special GitHub links

0 commit comments

Comments
 (0)