Skip to content

Commit 41848ef

Browse files
committed
more descriptions in app metadata
1 parent 83d07ea commit 41848ef

File tree

1 file changed

+21
-11
lines changed

1 file changed

+21
-11
lines changed

metadata.py

+21-11
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,13 @@ def appmetadata() -> AppMetadata:
2727
'https://pypi.org/project/python-doctr . The model is capable of detecting text regions in the '
2828
'input image and recognizing text in the regions. The text-localized regions are organized '
2929
'hierarchically by the model into "pages" > "blocks" > "lines" > "words", and this CLAMS app '
30-
'translated into `TextDocument`, `Paragraphs`, `Sentence`, and `Token` annotations that represent '
31-
'recognized text contents, then aligned to `BoundingBox` annotations that represent the detected '
32-
'geometries.',
30+
'translates them into `TextDocument`, `Paragraphs`, `Sentence`, and `Token` annotations to '
31+
'represent recognized text contents, then aligns them to `BoundingBox` annotations that represent '
32+
'the detected geometries. This hierarchical structure is also represented in the `TextDocument` '
33+
'annotation output as two newlines (`\\n\\n`) between "paragraphs", one newline (`\\n`) between '
34+
'the "lines", and one space (" ") between the "words". For the text recognition, the model is '
35+
'internally configured to use the "parseq" recognition model, and only works with English text '
36+
'at the moment.',
3337
app_license="Apache 2.0",
3438
identifier="doctr-wrapper",
3539
url="https://github.com/clamsproject/app-doctr-wrapper",
@@ -42,17 +46,23 @@ def appmetadata() -> AppMetadata:
4246
'`representatives` property is present, the app will process videos still frames at the '
4347
'underlying time point annotations that are referred to by the `representatives` property. '
4448
'Otherwise, the app will process the middle frame of the video segment.')
45-
metadata.add_output(DocumentTypes.TextDocument)
46-
out_sent = metadata.add_output(at_type=Uri.SENTENCE)
47-
out_sent.add_description('Translation of the recognized "text lines" in the processed input images')
48-
out_para = metadata.add_output(at_type=Uri.PARAGRAPH)
49-
out_para.add_description('Translation of the recognized "text blocks" in the processed input images')
50-
out_tkn = metadata.add_output(at_type=Uri.TOKEN)
51-
out_tkn.add_description('Translation of the recognized "text words" in the processed input images')
49+
out_td = metadata.add_output(DocumentTypes.TextDocument, **{'@lang': 'en'})
50+
out_td.add_description('Fully serialized text content of the recognized text in the input images. Serialization is'
51+
'done by concatenating `text` values of `Paragraph` annotations with two newline characters.')
52+
out_tkn = metadata.add_output(at_type=Uri.TOKEN, text='*', word='*')
53+
out_tkn.add_description('Translation of the recognized docTR "words" in the input images. `text` and `word` '
54+
'properties store the string values of the recognized text. The duplication is for keeping'
55+
'backward compatibility and consistency with `Paragraph` and `Sentence` annotations.')
56+
out_sent = metadata.add_output(at_type=Uri.SENTENCE, text='*')
57+
out_sent.add_description('Translation of the recognized docTR "lines" in the input images. `text` property stores '
58+
'the string value of space-joined words.')
59+
out_para = metadata.add_output(at_type=Uri.PARAGRAPH, text='*')
60+
out_para.add_description('Translation of the recognized docTR "blocks" in the input images. `text` property stores '
61+
'the string value of newline-joined sentences.')
5262
out_ali = metadata.add_output(AnnotationTypes.Alignment)
5363
out_ali.add_description('Alignments between 1) `TimePoint` <-> `TextDocument`, 2) `TimePoint` <-> '
5464
'`Token`/`Sentence`/`Paragraph`, 3) `BoundingBox` <-> `Token`/`Sentence`/`Paragraph`')
55-
out_bbox = metadata.add_output(AnnotationTypes.BoundingBox)
65+
out_bbox = metadata.add_output(AnnotationTypes.BoundingBox, label='text')
5666
out_bbox.add_description('Bounding boxes of the detected text regions in the input images. No corresponding box '
5767
'for the entire image (`TextDocument`) region')
5868

0 commit comments

Comments
 (0)