File tree Expand file tree Collapse file tree 2 files changed +53
-2
lines changed Expand file tree Collapse file tree 2 files changed +53
-2
lines changed Original file line number Diff line number Diff line change 6666 LAYOUT_TABLE ,
6767 LAYOUT_KEY_VALUE ,
6868)
69+ from textractor .utils .legacy_utils import converter
6970
7071THRESHOLD = 0.95
7172
@@ -1542,7 +1543,6 @@ def parser_analyze_expense_response(response):
15421543 document .response = response
15431544 return document
15441545
1545-
15461546def parse (response : dict ) -> Document :
15471547 """
15481548 Ingests response data and API Call Mode and calls the appropriate function for it.
@@ -1559,4 +1559,4 @@ def parse(response: dict) -> Document:
15591559 if "ExpenseDocuments" in response :
15601560 return parser_analyze_expense_response (response )
15611561 else :
1562- return parse_document_api_response (response )
1562+ return parse_document_api_response (converter ( response ) )
Original file line number Diff line number Diff line change 1+ from textractor .data .constants import (
2+ LAYOUT_FIGURE ,
3+ LAYOUT_LIST ,
4+ LAYOUT_TABLE ,
5+ LAYOUT_KEY_VALUE ,
6+ LAYOUT_TEXT ,
7+ LAYOUT_TITLE ,
8+ LAYOUT_HEADER ,
9+ LAYOUT_FOOTER ,
10+ LAYOUT_SECTION_HEADER ,
11+ LAYOUT_PAGE_NUMBER ,
12+ )
13+
14+ def converter (response ):
15+ blocks_to_delete = []
16+ page_block = None
17+ for i , block in enumerate (response ["Blocks" ]):
18+ if block .get ("BlockType" ) == "PAGE" :
19+ page_block = block
20+ elif block .get ("BlockType" , "" ).startswith ("LAYOUT_FIGURE_" ):
21+ block ["BlockType" ] = LAYOUT_TEXT
22+ elif (
23+ block .get ("BlockType" , "" ).startswith ("LAYOUT_" ) and
24+ block .get ("BlockType" ) not in [
25+ LAYOUT_TEXT ,
26+ LAYOUT_TITLE ,
27+ LAYOUT_HEADER ,
28+ LAYOUT_FOOTER ,
29+ LAYOUT_SECTION_HEADER ,
30+ LAYOUT_PAGE_NUMBER ,
31+ LAYOUT_LIST ,
32+ LAYOUT_FIGURE ,
33+ LAYOUT_TABLE ,
34+ LAYOUT_KEY_VALUE ,
35+ ]
36+ ):
37+ block ["BlockType" ] = LAYOUT_FIGURE
38+ elif block .get ("BlockType" ) == LAYOUT_FIGURE and "CONTAINER" in block .get ("EntityTypes" , []):
39+ blocks_to_delete .append ((i , block ))
40+
41+ page_relationships = []
42+ for relationship in page_block ["Relationships" ]:
43+ if relationship ["Type" ] == "CHILD" :
44+ page_relationships = relationship ["Ids" ]
45+ break
46+
47+ for i , block in blocks_to_delete [::- 1 ]:
48+ del response ["Blocks" ][i ]
49+ page_relationships .remove (block ["Id" ])
50+
51+ return response
You can’t perform that action at this time.
0 commit comments