Skip to content

Commit 9b3ae27

Browse files
committed
Merge commit 'c2973e2be49b3a9cb7cd27467f31ce65bfc8b7a6'
2 parents 6623cf9 + c2973e2 commit 9b3ae27

File tree

8 files changed

+219
-113
lines changed

8 files changed

+219
-113
lines changed

README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ The application also includes tailored visualizations depending on the annotatio
1414
| [WebVTT](https://www.w3.org/TR/webvtt1/) for showing alignments of video captions. | [Whisper](https://github.com/clamsproject/app-whisper-wrapper), [Kaldi](https://github.com/clamsproject/app-aapb-pua-kaldi-wrapper) |
1515
| Javascript bounding boxes for image and OCR annotations. | [Tesseract](https://github.com/clamsproject/app-tesseractocr-wrapper), [EAST](https://github.com/clamsproject/app-east-textdetection) |
1616
| Named entity annotations with [displaCy.](https://explosion.ai/demos/displacy-ent) | [SPACY](https://github.com/clamsproject/app-spacy-wrapper) | |
17+
| Screenshots & HTML5 video navigation of TimeFrames | [Chyron text recognition](https://github.com/clamsproject/app-chyron-text-recognition), [Slate detection](https://github.com/clamsproject/app-slatedetection), [Bars detection](https://github.com/clamsproject/app-barsdetection) |
1718

1819

1920

@@ -65,14 +66,16 @@ With this, the mounted directory `/data` in the container is accessable from ins
6566

6667

6768

68-
## Running the server without Docker/Podman
69+
## Running the server locally
6970

7071
First install the python dependencies listed in `requirements.txt`:
7172

7273
````bash
7374
$ pip install -r requirements.txt
7475
````
7576

77+
You will also need to install opencv-python if you are not running within a container (`pip install opencv-python`).
78+
7679
Let's again assume that the data are in a local directory `/Users/Shared/archive` with sub directories `audio`, `image`, `text` and`video`. You need to copy, symlink, or mount that local directory into the `static` directory. Note that the `static/data` symbolic link that is in the repository is set up to work with the docker containers, if you keep it in that form your data need to be in `/data`, otherwise you need to change the link to fit your needs, for example, you could remove the symbolic link and replace it with one that uses your local directory:
7780

7881
```bash

app.py

+6-15
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
import os
2+
import pathlib
23
import sys
34
import secrets
4-
import html
5-
import datetime
6-
import ast
75

86
from flask import request, render_template, flash, redirect, send_from_directory
97
from werkzeug.utils import secure_filename
@@ -17,12 +15,11 @@ def index():
1715

1816
@app.route('/ocrpage', methods=['POST'])
1917
def ocrpage():
20-
data = request.form
18+
data = request.json
2119
try:
22-
frames_pages = eval(html.unescape(data['frames_pages']))
23-
page_number = int(data['page_number'])
24-
25-
return (render_ocr(data['vid_path'], frames_pages, page_number))
20+
page_number = data["page_number"]
21+
view_id = data["view_id"]
22+
return (render_ocr(data['vid_path'], data["view_id"], page_number))
2623
except Exception as e:
2724
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
2825
pass
@@ -68,15 +65,9 @@ def render_mmif(mmif_str):
6865
mmif=mmif, media=media, annotations=annotations)
6966

7067

71-
# Not sure what this was for, it had a route /display, but that did not work
72-
# def display_file():
73-
# mmif_str = requests.get(request.args["file"]).text
74-
# return display_mmif(mmif_str)
75-
76-
7768
if __name__ == '__main__':
7869
# Make path for temp files
79-
tmp_path = '/app/static/tmp'
70+
tmp_path = pathlib.Path(__file__).parent /'static'/'tmp'
8071
if not os.path.exists(tmp_path):
8172
os.makedirs(tmp_path)
8273

iiif_utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import os
3+
import pathlib
34
import tempfile
45
from typing import Dict
56

@@ -90,7 +91,6 @@ def build_document_url(document):
9091
symlink is correctly set.
9192
'''
9293
location = document.location
93-
print("LOCATION IS", location, "----------------------")
9494
if location.startswith("file://"):
9595
location = document.location[7:]
9696
file_path = os.path.join("data", "video", os.path.basename(location))
@@ -107,7 +107,7 @@ def add_structure_from_timeframe(in_mmif: Mmif, iiif_json: Dict):
107107

108108
def save_manifest(iiif_json: Dict) -> str:
109109
# generate a iiif manifest and save output file
110-
manifest = tempfile.NamedTemporaryFile('w', dir="/app/static/tmp/", suffix='.json', delete=False)
110+
manifest = tempfile.NamedTemporaryFile('w', dir=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix='.json', delete=False)
111111
json.dump(iiif_json, manifest, indent=4)
112112
return manifest.name
113113

ocr.py

+138-48
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,110 @@
11
import datetime
2+
import pathlib
3+
24
import cv2
35
import tempfile
46
import json
5-
6-
from flask import render_template
7-
8-
9-
def add_bounding_box(anno, frames, fps):
10-
frame_num = anno.properties.get("frame") or anno.properties.get("timePoint")
11-
box_id = anno.properties["id"]
12-
boxType = anno.properties["boxType"]
13-
coordinates = anno.properties["coordinates"]
14-
x = coordinates[0][0]
15-
y = coordinates[0][1]
16-
w = coordinates[3][0] - x
17-
h = coordinates[3][1] - y
18-
box = [box_id, boxType, [x, y, w, h]]
19-
if frame_num in frames.keys():
20-
frames[frame_num]["boxes"].append(box)
21-
frames[frame_num]["bb_ids"].append(box_id)
7+
import re
8+
import html
9+
10+
from flask import render_template, session
11+
# from utils import app
12+
13+
14+
class OCRFrame():
15+
"""Class representing an (aligned or otherwise) set of OCR annotations for a single frame"""
16+
17+
def __init__(self, anno, fps):
18+
self.text = []
19+
self.fps = fps
20+
self.boxes = []
21+
self.anno_ids = []
22+
self.timestamp = None
23+
self.secs = None
24+
self.repeat = False
25+
self.frame_num = None
26+
self.range = None
27+
self.timestamp_range = None
28+
self.sec_range = None
29+
30+
self.update(anno)
31+
32+
def update(self, anno):
33+
if anno.at_type.shortname == "BoundingBox":
34+
self.add_bounding_box(anno)
35+
36+
elif anno.at_type.shortname == "TimeFrame":
37+
self.add_timeframe(anno)
38+
39+
elif anno.at_type.shortname == "TextDocument":
40+
t = anno.properties.get("text_value") or anno.properties.get("text").value
41+
if t:
42+
self.text.append(re.sub(r'([\\\/\|\"\'])', r'\1 ', t))
43+
44+
def add_bounding_box(self, anno):
45+
self.frame_num = anno.properties.get(
46+
"frame") or anno.properties.get("timePoint")
47+
box_id = anno.properties["id"]
48+
boxType = anno.properties["boxType"]
49+
coordinates = anno.properties["coordinates"]
50+
x = coordinates[0][0]
51+
y = coordinates[0][1]
52+
w = coordinates[3][0] - x
53+
h = coordinates[3][1] - y
54+
box = [box_id, boxType, [x, y, w, h]]
55+
self.boxes.append(box)
56+
self.anno_ids.append(box_id)
57+
if self.fps:
58+
secs = int(self.frame_num/self.fps)
59+
self.timestamp = str(datetime.timedelta(seconds=secs))
60+
self.secs = secs
61+
62+
def add_timeframe(self, anno):
63+
start, end = anno.properties.get('start'), anno.properties.get('end')
64+
self.range = (start, end)
65+
if self.fps:
66+
start_secs, end_secs = int(start/self.fps), int(end/self.fps)
67+
self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(datetime.timedelta(seconds=end_secs)))
68+
self.sec_range = (start_secs, end_secs)
69+
70+
71+
def find_annotation(anno_id, view, mmif):
72+
if mmif.id_delimiter in anno_id:
73+
view_id, anno_id = anno_id.split(mmif.id_delimiter)
74+
view = mmif.get_view_by_id(view_id)
75+
return view.get_annotation_by_id(anno_id)
76+
77+
78+
def get_ocr_frames(view, mmif, fps):
79+
frames = {}
80+
full_alignment_type = [
81+
at_type for at_type in view.metadata.contains if at_type.shortname == "Alignment"]
82+
# If view contains alignments
83+
if full_alignment_type:
84+
for alignment in view.get_annotations(full_alignment_type[0]):
85+
source = find_annotation(alignment.properties["source"], view, mmif)
86+
target = find_annotation(alignment.properties["target"], view, mmif)
87+
frame = OCRFrame(source, fps)
88+
i = frame.frame_num if frame.frame_num is not None else frame.range
89+
if i in frames.keys():
90+
frames[i].update(source)
91+
frames[i].update(target)
92+
else:
93+
frame.update(target)
94+
frames[i] = frame
2295
else:
23-
frames[frame_num] = {"boxes": [box], "text": [], "bb_ids": [box_id], "timestamp": None, "secs": None, "repeat": False}
24-
if fps:
25-
secs = int(frame_num/fps)
26-
frames[frame_num]["timestamp"] = str(datetime.timedelta(seconds=secs))
27-
frames[frame_num]["secs"] = secs
28-
96+
for annotation in view.get_annotations():
97+
frame = OCRFrame(annotation, fps)
98+
i = frame.frame_num if frame.frame_num is not None else frame.range
99+
if i is None:
100+
continue
101+
if i in frames.keys():
102+
frames[i].update(annotation)
103+
else:
104+
frames[i] = frame
29105
return frames
30106

31107

32-
def align_annotations(frames_list, alignments, text_docs):
33-
"""Link alignments with frames"""
34-
prev_frame = None
35-
for frame_num, frame in frames_list:
36-
for box_id in frame["bb_ids"]:
37-
text_id = alignments[box_id]
38-
frame["text"].append(text_docs[text_id])
39-
if is_duplicate_ocr_frame(frame, prev_frame):
40-
frame["repeat"] = True
41-
prev_frame = frame
42-
return frames_list
43-
44-
45108
def paginate(frames_list):
46109
"""Generate pages from a list of frames"""
47110
pages = [[]]
@@ -56,25 +119,43 @@ def paginate(frames_list):
56119
if not frame["repeat"]:
57120
n_frames_on_page += 1
58121

59-
return pages
122+
return {i: page for (i, page) in enumerate(pages)}
60123

61-
def render_ocr(vid_path, frames_pages, page_number):
124+
def render_ocr(vid_path, view_id, page_number):
62125
"""Iterate through frames and display the contents/alignments."""
63126
# Path for storing temporary images generated by cv2
64127
cv2_vid = cv2.VideoCapture(vid_path)
65-
for frame_num, frame in frames_pages[page_number]:
128+
f = open(session[f"{view_id}-page-file"])
129+
frames_pages = json.load(f)
130+
page = frames_pages[str(page_number)]
131+
for frame_num, frame in page:
132+
# If index is range instead of frame...
133+
if frame.get("range"):
134+
frame_num = (int(frame["range"][0]) + int(frame["range"][1])) / 2
66135
cv2_vid.set(1, frame_num)
67136
_, frame_cap = cv2_vid.read()
68137
with tempfile.NamedTemporaryFile(
69-
prefix="/app/static/tmp/", suffix=".jpg", delete=False) as tf:
138+
prefix=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix=".jpg", delete=False) as tf:
70139
cv2.imwrite(tf.name, frame_cap)
71140
# "id" is just the name of the temp image file
72-
frame["id"] = tf.name[12:]
141+
frame["id"] = pathlib.Path(tf.name).name
73142

74-
return render_template('ocr.html',
75-
vid_path=vid_path,
76-
frames_pages=frames_pages,
77-
page_number=page_number)
143+
return render_template('ocr.html',
144+
vid_path=vid_path,
145+
view_id=view_id,
146+
page=page,
147+
n_pages=len(frames_pages),
148+
page_number=str(page_number))
149+
150+
151+
def find_duplicates(frames_list):
152+
"""Find duplicate frames"""
153+
prev_frame = None
154+
for frame_num, frame in frames_list:
155+
if is_duplicate_ocr_frame(frame, prev_frame):
156+
frame["repeat"] = True
157+
prev_frame = frame
158+
return frames_list
78159

79160

80161
def is_duplicate_ocr_frame(frame, prev_frame):
@@ -86,6 +167,7 @@ def is_duplicate_ocr_frame(frame, prev_frame):
86167
return True
87168
return False
88169

170+
89171
def round_boxes(boxes):
90172
# To account for jittery bounding boxes in OCR annotations
91173
rounded_boxes = []
@@ -96,11 +178,19 @@ def round_boxes(boxes):
96178
rounded_boxes.append(rounded_box)
97179
return rounded_boxes
98180

181+
99182
def get_ocr_views(mmif):
100183
"""Return OCR views, which have TextDocument, BoundingBox, and Alignment annotations"""
101184
views = []
102-
ocr_apps = ["east-textdetection", "tesseract"]
185+
ocr_apps = ["east-textdetection", "tesseract", "chyron-text-recognition", "slatedetection", "barsdetection", "parseq-wrapper"]
103186
for view in mmif.views:
104-
if any([view.metadata.app.find(ocr_app) for ocr_app in ocr_apps]):
187+
if any([ocr_app in view.metadata.app for ocr_app in ocr_apps]):
105188
views.append(view)
106-
return views
189+
return views
190+
191+
def save_json(dict, view_id):
192+
# jsonified_pages = json.dumps(dict)
193+
with tempfile.NamedTemporaryFile(prefix=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix=".json", delete=False) as tf:
194+
pages_json = open(tf.name, "w")
195+
json.dump(dict, pages_json)
196+
session[f"{view_id}-page-file"] = tf.name

requirements.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
spacy==2.3.2
2-
clams-python==1.0.0
2+
clams-python==1.0.*
3+
flask-session
4+
opencv-python==4.*

static/tmp/.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Ignore everything in this directory
2+
*
3+
# Except this file
4+
!.gitignore

0 commit comments

Comments
 (0)