@@ -27,9 +27,13 @@ def appmetadata() -> AppMetadata:
27
27
'https://pypi.org/project/python-doctr . The model is capable of detecting text regions in the '
28
28
'input image and recognizing text in the regions. The text-localized regions are organized '
29
29
'hierarchically by the model into "pages" > "blocks" > "lines" > "words", and this CLAMS app '
30
- 'translated into `TextDocument`, `Paragraphs`, `Sentence`, and `Token` annotations that represent '
31
- 'recognized text contents, then aligned to `BoundingBox` annotations that represent the detected '
32
- 'geometries.' ,
30
+ 'translates them into `TextDocument`, `Paragraphs`, `Sentence`, and `Token` annotations to '
31
+ 'represent recognized text contents, then aligns them to `BoundingBox` annotations that represent '
32
+ 'the detected geometries. This hierarchical structure is also represented in the `TextDocument` '
33
+ 'annotation output as two newlines (`\\ n\\ n`) between "paragraphs", one newline (`\\ n`) between '
34
+ 'the "lines", and one space (" ") between the "words". For the text recognition, the model is '
35
+ 'internally configured to use the "parseq" recognition model, and only works with English text '
36
+ 'at the moment.' ,
33
37
app_license = "Apache 2.0" ,
34
38
identifier = "doctr-wrapper" ,
35
39
url = "https://github.com/clamsproject/app-doctr-wrapper" ,
@@ -42,17 +46,23 @@ def appmetadata() -> AppMetadata:
42
46
'`representatives` property is present, the app will process videos still frames at the '
43
47
'underlying time point annotations that are referred to by the `representatives` property. '
44
48
'Otherwise, the app will process the middle frame of the video segment.' )
45
- metadata .add_output (DocumentTypes .TextDocument )
46
- out_sent = metadata .add_output (at_type = Uri .SENTENCE )
47
- out_sent .add_description ('Translation of the recognized "text lines" in the processed input images' )
48
- out_para = metadata .add_output (at_type = Uri .PARAGRAPH )
49
- out_para .add_description ('Translation of the recognized "text blocks" in the processed input images' )
50
- out_tkn = metadata .add_output (at_type = Uri .TOKEN )
51
- out_tkn .add_description ('Translation of the recognized "text words" in the processed input images' )
49
+ out_td = metadata .add_output (DocumentTypes .TextDocument , ** {'@lang' : 'en' })
50
+ out_td .add_description ('Fully serialized text content of the recognized text in the input images. Serialization is'
51
+ 'done by concatenating `text` values of `Paragraph` annotations with two newline characters.' )
52
+ out_tkn = metadata .add_output (at_type = Uri .TOKEN , text = '*' , word = '*' )
53
+ out_tkn .add_description ('Translation of the recognized docTR "words" in the input images. `text` and `word` '
54
+ 'properties store the string values of the recognized text. The duplication is for keeping'
55
+ 'backward compatibility and consistency with `Paragraph` and `Sentence` annotations.' )
56
+ out_sent = metadata .add_output (at_type = Uri .SENTENCE , text = '*' )
57
+ out_sent .add_description ('Translation of the recognized docTR "lines" in the input images. `text` property stores '
58
+ 'the string value of space-joined words.' )
59
+ out_para = metadata .add_output (at_type = Uri .PARAGRAPH , text = '*' )
60
+ out_para .add_description ('Translation of the recognized docTR "blocks" in the input images. `text` property stores '
61
+ 'the string value of newline-joined sentences.' )
52
62
out_ali = metadata .add_output (AnnotationTypes .Alignment )
53
63
out_ali .add_description ('Alignments between 1) `TimePoint` <-> `TextDocument`, 2) `TimePoint` <-> '
54
64
'`Token`/`Sentence`/`Paragraph`, 3) `BoundingBox` <-> `Token`/`Sentence`/`Paragraph`' )
55
- out_bbox = metadata .add_output (AnnotationTypes .BoundingBox )
65
+ out_bbox = metadata .add_output (AnnotationTypes .BoundingBox , label = 'text' )
56
66
out_bbox .add_description ('Bounding boxes of the detected text regions in the input images. No corresponding box '
57
67
'for the entire image (`TextDocument`) region' )
58
68
0 commit comments