@@ -1393,6 +1393,17 @@ class DoclingDocument(BaseModel):
1393
1393
math annotation {
1394
1394
display: none;
1395
1395
}
1396
+ .formula-not-decoded {
1397
+ background: repeating-linear-gradient(
1398
+ 45deg, /* Angle of the stripes */
1399
+ LightGray, /* First color */
1400
+ LightGray 10px, /* Length of the first color */
1401
+ White 10px, /* Second color */
1402
+ White 20px /* Length of the second color */
1403
+ );
1404
+ margin: 0;
1405
+ text-align: center;
1406
+ }
1396
1407
</style>
1397
1408
</head>"""
1398
1409
@@ -2216,11 +2227,18 @@ def _append_text(text: str, do_escape_html=True, do_escape_underscores=True):
2216
2227
2217
2228
elif isinstance (item , TextItem ) and item .label in [DocItemLabel .FORMULA ]:
2218
2229
in_list = False
2219
- _append_text (
2220
- f"$${ item .text } $$\n " ,
2221
- do_escape_underscores = False ,
2222
- do_escape_html = False ,
2223
- )
2230
+ if item .text != "" :
2231
+ _append_text (
2232
+ f"$${ item .text } $$\n " ,
2233
+ do_escape_underscores = False ,
2234
+ do_escape_html = False ,
2235
+ )
2236
+ elif item .orig != "" :
2237
+ _append_text (
2238
+ "<!-- formula-not-decoded -->\n " ,
2239
+ do_escape_underscores = False ,
2240
+ do_escape_html = False ,
2241
+ )
2224
2242
2225
2243
elif isinstance (item , TextItem ) and item .label in labels :
2226
2244
in_list = False
@@ -2467,9 +2485,27 @@ def _prepare_tag_content(
2467
2485
math_formula = _prepare_tag_content (
2468
2486
item .text , do_escape_html = False , do_replace_newline = False
2469
2487
)
2470
- if formula_to_mathml :
2471
- # Building a math equation in MathML format
2472
- # ref https://www.w3.org/TR/wai-aria-1.1/#math
2488
+ text = ""
2489
+
2490
+ # If the formula is not processed correcty, use its image
2491
+ if (
2492
+ item .text == ""
2493
+ and item .orig != ""
2494
+ and image_mode == ImageRefMode .EMBEDDED
2495
+ and len (item .prov ) > 0
2496
+ ):
2497
+ item_image = item .get_image (doc = self )
2498
+ if item_image is not None :
2499
+ img_ref = ImageRef .from_pil (item_image , dpi = 72 )
2500
+ text = (
2501
+ "<figure>"
2502
+ f'<img src="{ img_ref .uri } " alt="{ item .orig } " />'
2503
+ "</figure>"
2504
+ )
2505
+
2506
+ # Building a math equation in MathML format
2507
+ # ref https://www.w3.org/TR/wai-aria-1.1/#math
2508
+ elif formula_to_mathml :
2473
2509
mathml_element = latex2mathml .converter .convert_to_element (
2474
2510
math_formula , display = "block"
2475
2511
)
@@ -2480,9 +2516,15 @@ def _prepare_tag_content(
2480
2516
mathml = unescape (tostring (mathml_element , encoding = "unicode" ))
2481
2517
text = f"<div>{ mathml } </div>"
2482
2518
2483
- else :
2519
+ elif math_formula != "" :
2484
2520
text = f"<pre>{ math_formula } </pre>"
2485
- html_texts .append (text )
2521
+
2522
+ if text != "" :
2523
+ html_texts .append (text )
2524
+ else :
2525
+ html_texts .append (
2526
+ '<div class="formula-not-decoded">Formula not decoded</div>'
2527
+ )
2486
2528
2487
2529
elif isinstance (item , ListItem ):
2488
2530
0 commit comments