Skip to content

Commit 2b33af4

Browse files
Further OCR/Frame bugfixes
1 parent 5c0326e commit 2b33af4

File tree

7 files changed

+23
-14
lines changed

7 files changed

+23
-14
lines changed

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,16 @@ With this, the mounted directory `/data` in the container is accessable from ins
6666

6767

6868

69-
## Running the server without Docker/Podman
69+
## Running the server locally
7070

7171
First install the python dependencies listed in `requirements.txt`:
7272

7373
````bash
7474
$ pip install -r requirements.txt
7575
````
7676

77+
You will also need to install opencv-python if you are not running within a container (`pip install opencv-python`).
78+
7779
Let's again assume that the data are in a local directory `/Users/Shared/archive` with sub directories `audio`, `image`, `text` and`video`. You need to copy, symlink, or mount that local directory into the `static` directory. Note that the `static/data` symbolic link that is in the repository is set up to work with the docker containers, if you keep it in that form your data need to be in `/data`, otherwise you need to change the link to fit your needs, for example, you could remove the symbolic link and replace it with one that uses your local directory:
7880

7981
```bash

app.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def ocrpage():
2121
data = request.json
2222
try:
2323
page_number = data["page_number"]
24-
return (render_ocr(data['vid_path'], page_number))
24+
view_id = data["view_id"]
25+
return (render_ocr(data['vid_path'], data["view_id"], page_number))
2526
except Exception as e:
2627
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
2728
pass

ocr.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,11 @@ def paginate(frames_list):
114114

115115
return {i: page for (i, page) in enumerate(pages)}
116116

117-
def render_ocr(vid_path, page_number):
117+
def render_ocr(vid_path, view_id, page_number):
118118
"""Iterate through frames and display the contents/alignments."""
119119
# Path for storing temporary images generated by cv2
120120
cv2_vid = cv2.VideoCapture(vid_path)
121-
f = open(session["frames_pages"])
121+
f = open(session[f"{view_id}-page-file"])
122122
frames_pages = json.load(f)
123123
page = frames_pages[str(page_number)]
124124
for frame_num, frame in page:
@@ -135,6 +135,7 @@ def render_ocr(vid_path, page_number):
135135

136136
return render_template('ocr.html',
137137
vid_path=vid_path,
138+
view_id=view_id,
138139
page=page,
139140
n_pages=len(frames_pages),
140141
page_number=str(page_number))
@@ -180,10 +181,10 @@ def get_ocr_views(mmif):
180181
views.append(view)
181182
return views
182183

183-
def save_json(dict):
184+
def save_json(dict, view_id):
184185
# jsonified_pages = json.dumps(dict)
185186
with tempfile.NamedTemporaryFile(
186187
prefix="/app/static/tmp/", suffix=".json", delete=False) as tf:
187188
pages_json = open(tf.name, "w")
188189
json.dump(dict, pages_json)
189-
session["frames_pages"] = tf.name
190+
session[f"{view_id}-page-file"] = tf.name

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
spacy==2.3.2
22
clams-python==1.0.0
3-
flask-session
3+
flask-session==0.5.0

static/tmp/.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Ignore everything in this directory
2+
*
3+
# Except this file
4+
!.gitignore

templates/ocr.html

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<div id="ocr_tab">
1+
<div id="ocr_tab_{{view_id}}">
22
{% for frame_num, frame in page %}
33
{% set filename = frame["id"] %}
44
{% set id = frame["id"] %}
@@ -37,7 +37,7 @@ <h4>
3737
</div>
3838
{% endfor %}
3939
{% if n_pages > 1 %}
40-
<div id="page-buttons">
40+
<div class="page-buttons">
4141
</div>
4242
{% endif %}
4343
</div>
@@ -119,6 +119,7 @@ <h4>
119119
function changePage(page) {
120120
var data = {
121121
"vid_path": "{{vid_path}}",
122+
"view_id": "{{view_id}}",
122123
"page_number": parseInt("{{page_number}}")
123124
}
124125
if (page == BACKWARD) {
@@ -137,7 +138,7 @@ <h4>
137138
contentType: "application/json",
138139
data: JSON.stringify(data),
139140
success: function(res_html){
140-
$('#ocr_tab').parent().html(res_html);
141+
$('#ocr_tab_{{view_id}}').parent().html(res_html);
141142
}
142143
})
143144
}
@@ -173,7 +174,7 @@ <h4>
173174
})
174175
})
175176

176-
$("#page-buttons").each(function() {
177+
$("#ocr_tab_{{view_id}} .page-buttons").each(function() {
177178
page_number = parseInt("{{page_number}}")
178179
n_pages = parseInt("{{n_pages}}")
179180

@@ -199,5 +200,5 @@ <h4>
199200

200201
$(this).append(`<input id="page-input" onKeyPress="if(event.keyCode==13) enterPageNumber();" />`)
201202
})
202-
})
203+
})
203204
</script>

utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -371,5 +371,5 @@ def prepare_ocr_visualization(mmif, view):
371371
find_duplicates(frames_list)
372372
frames_pages = paginate(frames_list)
373373
# Save page list as temp file
374-
save_json(frames_pages)
375-
return render_ocr(vid_path, 0)
374+
save_json(frames_pages, view.id)
375+
return render_ocr(vid_path, view.id, 0)

0 commit comments

Comments
 (0)