Skip to content

Commit c66a565

Browse files
Feat: Add blacklist param for latex and mathml detection (#995)
* fix: change the latex and mathml detection * feat: Add a blacklist for the latex node selector and a node size limit * fix: Remove console.log statement in latex.ts file * feat: Delete the node size limit for the input node * refactor: Update || for ?? Co-authored-by: Jose González <[email protected]> * refactor: Add performance improvements, error handling, and optimizations * feat: Update ignored_latex_containers parameter name in latex.ts and properties.ts * feat: Add blacklist for MathML node selectors * feat: fused mathml and latex ignore containers --------- Co-authored-by: Jose González <[email protected]>
1 parent 60b03e2 commit c66a565

File tree

3 files changed

+86
-26
lines changed

3 files changed

+86
-26
lines changed

packages/viewer/src/latex.ts

+51-24
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export async function renderLatex(properties: Properties, root: HTMLElement) {
1515
if (properties.viewer !== "image" && properties.viewer !== "latex") {
1616
return;
1717
}
18-
const latexNodes = findLatexTextNodes(root);
18+
const latexNodes = findLatexTextNodes(root, properties.ignored_containers);
1919

2020
for (const latexNode of latexNodes) {
2121
await replaceLatexInTextNode(properties, latexNode);
@@ -36,55 +36,82 @@ async function replaceLatexInTextNode(properties: Properties, node: Node) {
3636
if (nextLatexPosition) {
3737
// Get left non LaTeX text.
3838
const leftText: string = textContent.substring(pos, nextLatexPosition.start);
39-
const leftTextNode = document.createTextNode(leftText);
40-
// Create a node with left text.
41-
node.parentNode?.insertBefore(leftTextNode, node);
42-
node.nodeValue = node.nodeValue?.substring(pos, nextLatexPosition.start) ?? "";
39+
if (leftText) {
40+
const leftTextNode = document.createTextNode(leftText);
41+
// Create a node with left text.
42+
node.parentNode?.insertBefore(leftTextNode, node);
43+
}
4344

4445
// Get LaTeX text.
4546
const latex = textContent.substring(nextLatexPosition.start + "$$".length, nextLatexPosition.end);
46-
// Convert LaTeX to mathml.
47+
// Convert LaTeX to MathML.
4748
const response = await latexToMathml(latex, properties.editorServicesRoot, properties.editorServicesExtension);
48-
// Insert mathml node.
49+
// Insert MathML node.
4950
const fragment = document.createRange().createContextualFragment(response.text);
50-
5151
node.parentNode?.insertBefore(fragment, node);
52-
node.nodeValue = node.nodeValue.substring(nextLatexPosition.start, nextLatexPosition.end);
5352

53+
// Update pos to search for next LaTeX instance.
5454
pos = nextLatexPosition.end + "$$".length;
5555
} else {
56-
// No more LaTeX node found.
57-
const text = textContent.substring(pos);
58-
const textNode = document.createTextNode(text);
59-
node.parentNode?.insertBefore(textNode, node);
60-
node.nodeValue = "";
61-
pos = textContent.length;
56+
// If no more LaTeX found, append the rest of the text as a new text node and break the loop.
57+
const remainingText = textContent.substring(pos);
58+
if (remainingText) {
59+
const remainingTextNode = document.createTextNode(remainingText);
60+
node.parentNode?.insertBefore(remainingTextNode, node);
61+
}
62+
break; // Exit the loop as we've processed all text.
6263
}
6364
}
64-
65-
// Delete original text node.
65+
// Remove the original node after processing.
6666
node.parentNode?.removeChild(node);
6767
}
6868

6969
/**
70-
* Returns an array with all HTML LaTeX nodes.
71-
* @param {HTMLElement} root - Any DOM element that can contain LaTeX.
72-
* @returns {Node[]} Array with all HTML LaTeX nodes inside root.
70+
* Finds and returns an array of text nodes containing LaTeX expressions.
71+
*
72+
* @param properties - The properties object.
73+
* @param root - The root element to search within.
74+
* @returns An array of text nodes containing LaTeX expressions.
7375
*/
74-
function findLatexTextNodes(root: any): Node[] {
75-
const nodeIterator: NodeIterator = document.createNodeIterator(root, NodeFilter.SHOW_TEXT, (node) =>
76-
/(\$\$)(.*)(\$\$)/.test(node.nodeValue || "") ? NodeFilter.FILTER_ACCEPT : NodeFilter.FILTER_REJECT,
77-
);
76+
function findLatexTextNodes(root: any, ignored_latex_containers: string | null): Node[] {
77+
const nodeIterator: NodeIterator = createNodeIterator(root);
78+
const blackListedNodes = root.querySelectorAll(ignored_latex_containers) ?? [];
7879
const latexNodes: Node[] = [];
7980

8081
let currentNode: Node | null;
8182
while ((currentNode = nodeIterator.nextNode())) {
83+
if (blackListedNodes.length > 0 && isNodeBlacklisted(currentNode, blackListedNodes)) {
84+
continue;
85+
}
8286
latexNodes.push(currentNode);
8387
}
8488

8589
return latexNodes;
8690
}
8791

92+
/**
93+
* Creates a NodeIterator to find text nodes containing LaTeX expressions.
94+
*
95+
* @param root - The root element to search within.
96+
* @returns A NodeIterator for text nodes containing LaTeX expressions.
97+
*/
98+
function createNodeIterator(root: any): NodeIterator {
99+
return document.createNodeIterator(root, NodeFilter.SHOW_TEXT, (node) =>
100+
/(\$\$)(.*)(\$\$)/.test(node.nodeValue || "") ? NodeFilter.FILTER_ACCEPT : NodeFilter.FILTER_REJECT,
101+
);
102+
}
103+
104+
/**
105+
* Checks if a node or any of its ancestors are in the blacklist.
106+
*
107+
* @param node - The node to check.
108+
* @param blackListedNodes - The list of blacklisted nodes.
109+
* @returns True if the node or any of its ancestors are blacklisted, false otherwise.
110+
*/
111+
function isNodeBlacklisted(node: Node, blackListedNodes: NodeListOf<Element>): boolean {
112+
return Array.from(blackListedNodes).some((blackListedNode) => blackListedNode.contains(node));
113+
}
114+
88115
/**
89116
* Returns an object {start, end} with the start and end latex position.
90117
* @param {number} pos - Current position inside the text.

packages/viewer/src/mathml.ts

+19-2
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,16 @@ function findSafeMathMLTextNodes(root: HTMLElement): Node[] {
3535
* Parse the DOM looking for «math» formulas and replace them with the corresponding rendered images within the given element.
3636
* @param {HTMLElement} root - Any DOM element that can contain MathML.
3737
*/
38-
function decodeSafeMathML(root: HTMLElement) {
38+
function decodeSafeMathML(root: any, ignored_mathml_containers: string | null) {
3939
const safeNodes = findSafeMathMLTextNodes(root);
40+
const blackListedNodes = root.querySelectorAll(ignored_mathml_containers) ?? [];
4041

4142
for (const safeNode of safeNodes) {
43+
if (blackListedNodes.length > 0 && isNodeBlacklisted(safeNode, blackListedNodes)) {
44+
console.log("Node is blacklisted");
45+
continue;
46+
}
47+
4248
const mathml = MathML.safeXmlDecode(safeNode.textContent ?? "");
4349
// Insert mathml node.
4450
const fragment = document.createRange().createContextualFragment(mathml);
@@ -48,6 +54,17 @@ function decodeSafeMathML(root: HTMLElement) {
4854
}
4955
}
5056

57+
/**
58+
* Checks if a node or any of its ancestors are in the blacklist.
59+
*
60+
* @param node - The node to check.
61+
* @param blackListedNodes - The list of blacklisted nodes.
62+
* @returns True if the node or any of its ancestors are blacklisted, false otherwise.
63+
*/
64+
function isNodeBlacklisted(node: Node, blackListedNodes: NodeListOf<Element>): boolean {
65+
return Array.from(blackListedNodes).some((blackListedNode) => blackListedNode.contains(node));
66+
}
67+
5168
/**
5269
* Parse the DOM looking for <math> elements and replace them with the corresponding rendered images within the given element.
5370
* @param {Properties} properties - Properties of the viewer.
@@ -58,7 +75,7 @@ export async function renderMathML(properties: Properties, root: HTMLElement): P
5875
return;
5976
}
6077

61-
decodeSafeMathML(root);
78+
decodeSafeMathML(root, properties.ignored_containers);
6279

6380
for (const mathElement of [...root.getElementsByTagName("math")]) {
6481
const mml = serializeHtmlToXml(mathElement.outerHTML);

packages/viewer/src/properties.ts

+16
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ export interface Config {
2121
lang: string;
2222
viewer: Viewer;
2323
zoom: number;
24+
ignored_containers: string | null;
2425
}
2526

2627
/**
@@ -39,6 +40,7 @@ const defaultValues: Config = {
3940
lang: "en",
4041
viewer: "none",
4142
zoom: 1,
43+
ignored_containers: null,
4244
};
4345

4446
/**
@@ -143,6 +145,11 @@ export class Properties {
143145
if (zoom !== null && zoom !== undefined) {
144146
instance.config.zoom = +zoom;
145147
}
148+
149+
const ignored_containers = urlParams.get("ignored_containers");
150+
if (ignored_containers !== null && ignored_containers !== undefined) {
151+
instance.config.ignored_containers = ignored_containers;
152+
}
146153
}
147154

148155
/**
@@ -301,4 +308,13 @@ export class Properties {
301308
this.config.backendConfig.wiriseditormathmlattribute = wiriseditormathmlattribute;
302309
this.render();
303310
}
311+
312+
get ignored_containers(): string | null {
313+
return this.config.ignored_containers ?? defaultValues.ignored_containers;
314+
}
315+
316+
set ignored_containers(ignored_containers: string) {
317+
this.config.ignored_containers = ignored_containers;
318+
this.render();
319+
}
304320
}

0 commit comments

Comments
 (0)