Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate from function-based to class-based rendering #40

Merged
merged 1 commit into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@

from flask import Flask, request, render_template, flash, send_from_directory, redirect
from mmif.serialize import Mmif
from mmif.vocabulary import DocumentTypes

import cache
from cache import set_last_access, cleanup
import traceback
from render import render_documents, render_annotations, prepare_and_render_ocr, render_ocr_page
from render import render_documents, render_annotations, prepare_ocr, render_ocr_page

# these two static folder-related params are important, do not remove
app = Flask(__name__, static_folder='static', static_url_path='')
Expand All @@ -26,9 +27,11 @@ def index():
@app.route('/ocr', methods=['POST'])
def ocr():
if "page_number" not in request.json:
return serve_first_ocr_page(request.json)
else:
return serve_ocr_page(request.json)
build_ocr_tab(request.json)
request.json["page_number"] = 0
# return serve_first_ocr_page(request.json)
# else:
return serve_ocr_page(request.json)


@app.route('/upload', methods=['GET', 'POST'])
Expand Down Expand Up @@ -102,17 +105,20 @@ def render_mmif(mmif_str, viz_id):
annotations=rendered_annotations)


def serve_first_ocr_page(data):
def build_ocr_tab(data):
"""
Prepares OCR (at load time, due to lazy loading) and serves the first page
Prepares OCR (at load time, due to lazy loading)
"""
try:
data = dict(request.json)
mmif_str = open(cache.get_cache_root() /
data["mmif_id"] / "file.mmif").read()
mmif = Mmif(mmif_str)
ocr_view = mmif.get_view_by_id(data["view_id"])
return prepare_and_render_ocr(mmif, ocr_view, data["mmif_id"])
prepare_ocr(mmif, ocr_view, data["mmif_id"])
request.json["vid_path"] = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[
0].location_path()

except Exception as e:
app.logger.error(f"{e}\n{traceback.format_exc()}")
return f'<p class="error">Error: {e} Check the server log for more information.</h1>'
Expand Down
15 changes: 15 additions & 0 deletions ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,21 @@ def add_text_document(self, anno):
[text_val] if text_val not in self.text else self.text


def prepare_ocr(mmif, view, viz_id):
"""
Prepares list of frames that will be passed back and forth between server
and client, and renders the first page of the OCR.
"""
ocr_frames = get_ocr_frames(view, mmif)

# Generate pages (necessary to reduce IO cost) and render
frames_list = [(k, vars(v)) for k, v in ocr_frames.items()]
frames_list = find_duplicates(frames_list)
frames_pages = paginate(frames_list)
# Save page list as temp file
save_json(frames_pages, view.id, viz_id)


def find_annotation(anno_id, mmif):
if mmif.id_delimiter in anno_id:
view_id, anno_id = anno_id.split(mmif.id_delimiter)
Expand Down
Loading
Loading