Merge pull request #9 from clamsproject/sdk-1.2.x-update

keighrim · web-flow · commit ce6c317f040b · 2024-06-23T08:15:02.000-04:00
releasing 1.2.4
diff --git a/Containerfile b/Containerfile
@@ -1,5 +1,5 @@
 # Use the same base image version as the clams-python python library version
-FROM ghcr.io/clamsproject/clams-python-opencv4:1.0.9
+FROM ghcr.io/clamsproject/clams-python-opencv4:1.2.4
 # See https://github.com/orgs/clamsproject/packages?tab=packages&q=clams-python for more base images
 # IF you want to automatically publish this image to the clamsproject organization, 
 # 1. you should have generated this template without --no-github-actions flag
@@ -19,6 +19,14 @@ ENV CLAMS_APP_VERSION ${CLAMS_APP_VERSION}
 # install more system packages as needed using the apt manager
 ################################################################################
 
+# https://github.com/openai/whisper/blob/ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab/whisper/__init__.py#L130
+ENV XDG_CACHE_HOME='/cache'  
+# https://huggingface.co/docs/huggingface_hub/main/en/package_reference/environment_variables#hfhome
+ENV HF_HOME="/cache/huggingface"
+# https://pytorch.org/docs/stable/hub.html#where-are-my-downloaded-models-saved
+ENV TORCH_HOME="/cache/torch"
+
+# RUN mkdir /cache && rm -rf /root/.cache && ln -s /cache /root/.cache
 ################################################################################
 # main app installation
 COPY ./ /app
diff --git a/app.py b/app.py
@@ -2,6 +2,7 @@
 import logging
 from typing import Union, Sequence
 
+import copy
 import cv2
 import itertools
 import numpy as np
@@ -22,24 +23,49 @@ def _appmetadata(self):
         pass
 
     def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
+        """Internal Annotate Wrapper Method
+        
+        Generates a new set of annotations for `mmif` 
+        via EAST Text Detection on Videos and Images. 
+
+        ### params 
+        + mmif => a mmif object
+        + **parameters => runtime parameters (see `metadata.py`)
+        
+        ### returns
+        + mmif object, with new app annotations.
+        """
+        
+        # Run app on contained VideoDocument(s) in MMIF
         for videodocument in mmif.get_documents_by_type(DocumentTypes.VideoDocument):
             # one view per video document
             new_view = mmif.new_view()
             self.sign_view(new_view, parameters)
-            config = self.get_configuration(**parameters)
-            new_view.new_contain(AnnotationTypes.BoundingBox, document=videodocument.id, timeUnit=config["timeUnit"])
+            new_view.new_contain(AnnotationTypes.BoundingBox, document=videodocument.id, timeUnit=parameters["timeUnit"])
             self.logger.debug(f"Running on video {videodocument.location_path()}")
-            mmif = self.run_on_video(mmif, videodocument, new_view, **config)
+            mmif = self.run_on_video(mmif, videodocument, new_view, **parameters)
+
+        # Run app on contained ImageDocument(s) in MMIF
         if mmif.get_documents_by_type(DocumentTypes.ImageDocument):
             # one view for all image documents
             new_view = mmif.new_view()
             self.sign_view(new_view, parameters)
             new_view.new_contain(AnnotationTypes.BoundingBox)
             self.logger.debug(f"Running on all images")
             mmif = self.run_on_images(mmif, new_view)
+        
         return mmif
 
     def run_on_images(self, mmif: Mmif, new_view: View) -> Mmif:
+        """Run EAST on ImageDocuments
+
+        ### params
+        + mmif => Mmif Object 
+        + new_view => a single mmif View (representing all ImageDocuments)
+
+        ### returns
+        + mmif, annotated with boundingboxes
+        """
         for imgdocument in mmif.get_documents_by_type(DocumentTypes.ImageDocument):
             image = cv2.imread(imgdocument.location)
             box_list = image_to_east_boxes(image)
@@ -54,6 +80,16 @@ def run_on_images(self, mmif: Mmif, new_view: View) -> Mmif:
             return mmif
 
     def run_on_video(self, mmif: Mmif, videodocument: Document, new_view: View, **config) -> Mmif:
+        """Run EAST on a VideoDocument
+
+        ### params
+        + mmif => Mmif Object 
+        + videodocument => VideoDocument file
+        + new_view => a single mmif View
+
+        ### returns
+        + mmif, annotated with boundingboxes
+        """
         cap = vdh.capture(videodocument)
         views_with_tframe = [v for v in mmif.get_views_for_document(videodocument.id) 
                              if v.metadata.contains[AnnotationTypes.TimeFrame]]
@@ -66,20 +102,20 @@ def run_on_video(self, mmif: Mmif, videodocument: Document, new_view: View, **co
                                    for v in views_with_tframe for a in v.get_annotations(AnnotationTypes.TimeFrame)
                                    if not frame_type or a.get_property("frameType") in frame_type])
             target_frames = list(map(int, target_frames))
-            self.logger.debug(f"Processing frames {target_frames} from TimeFrame annotations of {frame_type} types")
         else:
             target_frames = vdh.sample_frames(
-                sample_ratio=config['sampleRatio'], start_frame=0, 
-                end_frame=min(int(config['stopAt']), videodocument.get_property("frameCount"))
+                start_frame=0, 
+                end_frame=min(int(config['stopAt']), videodocument.get_property("frameCount")),
+                sample_rate=config['sampleRate']
             )
+
         target_frames.sort()
-        self.logger.debug(f"Running on frames {target_frames}")
-        for fn, fi in zip(target_frames, vdh.extract_frames_as_images(videodocument, target_frames)):
-            self.logger.debug(f"Processing frame {fn}")
+
+        for fn, fi in zip(target_frames, vdh.extract_frames_as_images(videodocument, copy.deepcopy(target_frames))):
             result_list = image_to_east_boxes(fi)
             for box in result_list:
                 bb_annotation = new_view.new_annotation(AnnotationTypes.BoundingBox)
-                tp = vdh.convert(time=fn, in_unit='frame', out_unit=config['timeUnit'], fps=videodocument.get_property("fps"))
+                tp = vdh.convert(t=fn, in_unit='frame', out_unit=config['timeUnit'], fps=videodocument.get_property("fps"))
                 self.logger.debug(f"Adding a timepoint at frame: {fn} >> {tp}")
 
                 tp_annotation = new_view.new_annotation(AnnotationTypes.TimePoint)
@@ -97,9 +133,16 @@ def run_on_video(self, mmif: Mmif, videodocument: Document, new_view: View, **co
 
         return mmif
 
-
+def get_app():
+    """
+    This function effectively creates an instance of the app class, without any arguments passed in, meaning, any 
+    external information such as initial app configuration should be set without using function arguments. The easiest
+    way to do this is to set global variables before calling this. 
+    """
+    return EastTextDetection()
 
 if __name__ == "__main__":
+    
     parser = argparse.ArgumentParser()
     parser.add_argument("--port", action="store", default="5000", help="set port to listen" )
     parser.add_argument("--production", action="store_true", help="run gunicorn server")
@@ -109,8 +152,8 @@ def run_on_video(self, mmif: Mmif, videodocument: Document, new_view: View, **co
     parsed_args = parser.parse_args()
 
     # create the app instance
-    app = EastTextDetection()
-
+    app = get_app()
+    
     http_app = Restifier(app, port=int(parsed_args.port))
     # for running the application in production mode
     if parsed_args.production:
diff --git a/cli.py b/cli.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+"""
+The purpose of this file is to define a thin CLI interface for your app
+
+DO NOT CHANGE the name of the file
+"""
+
+import argparse
+import sys
+from contextlib import redirect_stdout
+
+import app
+
+import clams.app
+from clams import AppMetadata
+
+
+def metadata_to_argparser(app_metadata: AppMetadata) -> argparse.ArgumentParser:
+    """
+    Automatically generate an argparse.ArgumentParser from parameters specified in the app metadata (metadata.py).
+    """
+
+    parser = argparse.ArgumentParser(
+        description=f"{app_metadata.name}: {app_metadata.description} (visit {app_metadata.url} for more info)",
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    # parse cli args from app parameters
+    for parameter in app_metadata.parameters:
+        if parameter.multivalued:
+            a = parser.add_argument(
+                f"--{parameter.name}",
+                help=parameter.description,
+                nargs='+',
+                action='extend',
+                type=str
+            )
+        else:
+            a = parser.add_argument(
+                f"--{parameter.name}",
+                help=parameter.description,
+                nargs=1,
+                action="store",
+                type=str)
+        if parameter.choices is not None:
+            a.choices = parameter.choices
+        if parameter.default is not None:
+            a.help += f" (default: {parameter.default}"
+            if parameter.type == "boolean":
+                a.help += (f", any value except for {[v for v in clams.app.falsy_values if isinstance(v, str)]} "
+                           f"will be interpreted as True")
+            a.help += ')'
+            # then we don't have to add default values to the arg_parser
+            # since that's handled by the app._refined_params() method.
+    parser.add_argument('IN_MMIF_FILE', nargs='?', type=argparse.FileType('r'),
+                        help='input MMIF file path, or STDIN if `-` or not provided. NOTE: When running this cli.py in '
+                             'a containerized environment, make sure the container is run with `-i` flag to keep stdin '
+                             'open.',
+                        # will check if stdin is a keyboard, and return None if it is
+                        default=None if sys.stdin.isatty() else sys.stdin)
+    parser.add_argument('OUT_MMIF_FILE', nargs='?', type=argparse.FileType('w'), 
+                        help='output MMIF file path, or STDOUT if `-` or not provided. NOTE: When this is set to '
+                             'STDOUT, any print statements in the app code will be redirected to stderr.',
+                        default=sys.stdout)
+    return parser
+
+
+if __name__ == "__main__":
+    clamsapp = app.get_app()
+    arg_parser = metadata_to_argparser(app_metadata=clamsapp.metadata)
+    args = arg_parser.parse_args()
+    if args.IN_MMIF_FILE:
+        in_data = args.IN_MMIF_FILE.read()
+        # since flask webapp interface will pass parameters as "unflattened" dict to handle multivalued parameters
+        # (https://werkzeug.palletsprojects.com/en/latest/datastructures/#werkzeug.datastructures.MultiDict.to_dict)
+        # we need to convert arg_parsers results into a similar structure, which is the dict values are wrapped in lists
+        params = {}
+        for pname, pvalue in vars(args).items():
+            if pvalue is None or pname in ['IN_MMIF_FILE', 'OUT_MMIF_FILE']:
+                continue
+            elif isinstance(pvalue, list):
+                params[pname] = pvalue
+            else:
+                params[pname] = [pvalue]
+        if args.OUT_MMIF_FILE.name == '<stdout>':
+            with redirect_stdout(sys.stderr):
+                out_mmif = clamsapp.annotate(in_data, **params)
+        else:
+            out_mmif = clamsapp.annotate(in_data, **params)
+        args.OUT_MMIF_FILE.write(out_mmif)
+    else:
+        arg_parser.print_help()
+        sys.exit(1)
diff --git a/metadata.py b/metadata.py
@@ -39,15 +39,15 @@ def appmetadata() -> AppMetadata:
         description="Segments of video to run on. Only works with VideoDocument input and TimeFrame input. Empty value means run on the every frame types.",
     )
     metadata.add_parameter(
-        name="sampleRatio",
+        name="sampleRate",
         type="integer",
-        default="30",
+        default=30,
         description="Frequency to sample frames. Only works with VideoDocument input, and without TimeFrame input. (when `TimeFrame` annotation is found, this parameter is ignored.)",
     )
     metadata.add_parameter(
         name="stopAt",
         type="integer",
-        default=108000,  # ~2 hours of video at 30fps 1 * 60 * 60 * 30
+        default=2 * 60 * 60 * 30,  # ~2 hours of video at 30fps 1 * 60 * 60 * 30
         description="Frame number to stop running. Only works with VideoDocument input. The default is roughly 2 hours of video at 30fps.",
     )
     metadata.add_parameter(
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-clams-python==1.0.9
+clams-python==1.2.4
 
 opencv-python-rolling==4.7.*  # standard 4.7.x version (no rolling release) has bug with old CPUs
 imutils

Original file line number	Diff line number	Diff line change
`@@ -39,15 +39,15 @@ def appmetadata() -> AppMetadata:`
`39`	`39`	`description="Segments of video to run on. Only works with VideoDocument input and TimeFrame input. Empty value means run on the every frame types.",`
`40`	`40`	`)`
`41`	`41`	`metadata.add_parameter(`
`42`		`- name="sampleRatio",`
	`42`	`+ name="sampleRate",`
`43`	`43`	`type="integer",`
`44`		`- default="30",`
	`44`	`+ default=30,`
`45`	`45`	description="Frequency to sample frames. Only works with VideoDocument input, and without TimeFrame input. (when `TimeFrame` annotation is found, this parameter is ignored.)",
`46`	`46`	`)`
`47`	`47`	`metadata.add_parameter(`
`48`	`48`	`name="stopAt",`
`49`	`49`	`type="integer",`
`50`		`- default=108000, # ~2 hours of video at 30fps 1 * 60 * 60 * 30`
	`50`	`+ default=2 * 60 * 60 * 30, # ~2 hours of video at 30fps 1 * 60 * 60 * 30`
`51`	`51`	`description="Frame number to stop running. Only works with VideoDocument input. The default is roughly 2 hours of video at 30fps.",`
`52`	`52`	`)`
`53`	`53`	`metadata.add_parameter(`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-clams-python==1.0.9`
	`1`	`+clams-python==1.2.4`
`2`	`2`
`3`	`3`	`opencv-python-rolling==4.7.* # standard 4.7.x version (no rolling release) has bug with old CPUs`
`4`	`4`	`imutils`