Version 0.1.0

angus-lherrou · angus-lherrou · commit 324022237ce9 · 2020-10-28T23:52:53.000-04:00
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,6 @@
+.git/
+.gitignore
+.dockerignore
+Dockerfile
+outerdemo.sh
+wgbh-audios/
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+.idea/
+__pycache__/
+.pytype
+htmlcov/
+.coverage
+demo/results
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,23 @@
+FROM clamsproject/clams-python:0.1.6
+
+LABEL maintainer="Angus L'Herrou <piraka@brandeis.edu>"
+
+RUN apt-get update && apt-get install -y libsndfile1 ffmpeg
+
+RUN mkdir /segmenter
+COPY . /segmenter
+WORKDIR /segmenter
+
+RUN mkdir ./data
+
+RUN git clone --depth 1 --branch v1 https://github.com/brandeis-llc/acoustic-classification-segmentation.git
+
+RUN pip install -r ./acoustic-classification-segmentation/requirements.txt
+
+# hotfix for issue with the requirements
+RUN pip install --upgrade librosa
+
+RUN pip install -r ./requirements.txt
+
+ENTRYPOINT ["python"]
+CMD ["app.py"]
diff --git a/README.md b/README.md
@@ -0,0 +1,13 @@
+# app-audio-segmenter
+
+To run the demo on the provided mp3 file, first build the Docker image, then run the outer demo script:
+
+```
+$ docker build -t app-audio-segmenter:latest -t app-audio-segmenter:0.1.0 .
+$ chmod +x outerdemo.sh
+$ ./outerdemo.sh
+```
+
+Open the newly created `demo/results` to see the generated tsv file (for comparison) and MMIF files.
+
+To run the app as a Flask app, build the image and run the container in the usual CLAMS-y way.
diff --git a/app.py b/app.py
@@ -0,0 +1,184 @@
+# app-audio-segmenter version 0.1.0
+# author: Angus L'Herrou
+# org: CLAMS team
+import argparse
+import glob
+import os
+import csv
+import shutil
+import subprocess
+from io import StringIO
+from typing import Dict, Union
+
+from clams import ClamsApp, Restifier
+from mmif import DocumentTypes, AnnotationTypes, Mmif, Document, View, Annotation
+
+APP_VERSION = '0.1.0'
+WRAPPED_IMAGE = 'clamsproject/clams-python:0.1.6'
+MEDIA_DIRECTORY = '/segmenter/data'
+SEGMENTER_DIR = '/segmenter/acoustic-classification-segmentation'
+TIME_FRAME_PREFIX = 'tf'
+SEGMENTER_ACCEPTED_EXTENSIONS = {'.mp3', '.wav'}
+
+
+class Segmenter(ClamsApp):
+
+    def setupmetadata(self) -> dict:
+        return {
+            "name": "Audio Segmenter",
+            "description": "tbd",
+            "vendor": "Team CLAMS",
+            "iri": f"http://mmif.clams.ai/apps/audio-segmenter/{APP_VERSION}",
+            "wrappee": WRAPPED_IMAGE,
+            "requires": [DocumentTypes.AudioDocument.value],
+            "produces": [
+                AnnotationTypes.TimeFrame.value
+            ]
+        }
+
+    def sniff(self, mmif) -> bool:
+        if type(mmif) is not Mmif:
+            mmif = Mmif(mmif)
+        return len([loc for loc in mmif.get_documents_locations(DocumentTypes.AudioDocument.value)
+                    if os.path.splitext(loc)[-1] in SEGMENTER_ACCEPTED_EXTENSIONS]) > 0
+
+    def annotate(self, mmif: Union[str, dict, Mmif], save_tsv=False, pretty=False) -> str:
+        mmif_obj: Mmif
+        if isinstance(mmif, Mmif):
+            mmif_obj: Mmif = mmif
+        else:
+            mmif_obj: Mmif = Mmif(mmif)
+
+        # get AudioDocuments with locations
+        docs = [document for document in mmif_obj.documents
+                if document.at_type == DocumentTypes.AudioDocument.value
+                and len(document.location) > 0
+                and os.path.splitext(document.location)[-1] in SEGMENTER_ACCEPTED_EXTENSIONS]
+
+        files = [document.location for document in docs]
+
+        # key them by location basenames
+        docs_dict: Dict[str, Document] = {os.path.splitext(os.path.basename(doc.location))[0]: doc for doc in docs}
+        assert len(docs) == len(docs_dict), 'no duplicate filenames'
+        # TODO (angus-lherrou @ 2020-10-03): allow duplicate basenames for files originally from different folders
+        #  by renaming files more descriptively
+
+        setup(files)
+
+        tsv_string = segment(save_tsv)
+
+        reader = csv.reader(StringIO(tsv_string), delimiter='\t')
+
+        for row in reader:
+            filename = os.path.splitext(os.path.split(row[0])[-1])[0]
+            splits = row[1:-1]  # first element is filepath, last element is speech ratio
+            assert len(splits) % 2 == 0, 'every row should have an even number of timestamps'
+
+            v: View = mmif_obj.new_view()
+            self.stamp_view(v, docs_dict[filename].id)
+
+            tf_idx = 1
+
+            for speech_start_idx in range(0, len(splits)-2, 2):
+                s_start_ts = float(splits[speech_start_idx])
+                s_end_ts = float(splits[speech_start_idx+1])
+                ns_end_ts = float(splits[speech_start_idx+2])
+                s_tf = self.create_segment_tf(s_start_ts, s_end_ts, tf_idx, frame_type='speech')
+                ns_tf = self.create_segment_tf(s_end_ts, ns_end_ts, tf_idx+1, frame_type='non-speech')
+                tf_idx += 2
+                v.add_annotation(s_tf)
+                v.add_annotation(ns_tf)
+
+            final_s_start_ts = float(splits[-2])
+            final_s_end_ts = float(splits[-1])
+            final_s_tf = self.create_segment_tf(final_s_start_ts, final_s_end_ts, tf_idx, frame_type='speech')
+            v.add_annotation(final_s_tf)
+
+        return mmif_obj.serialize(pretty=pretty)
+
+    @staticmethod
+    def create_segment_tf(start: float, end: float, index: int, frame_type: str) -> Annotation:
+        assert frame_type in {'speech', 'non-speech'}
+        tf = Annotation()
+        tf.at_type = AnnotationTypes.TimeFrame.value
+        tf.id = TIME_FRAME_PREFIX + str(index)
+        tf.properties['frameType'] = 'speech'
+        # times should be in milliseconds
+        tf.properties['start'] = int(start * 1000)
+        tf.properties['end'] = int(end * 1000)
+        tf.properties['frameType'] = frame_type
+        return tf
+
+    def stamp_view(self, view: View, tf_source_id: str):
+        if view.is_frozen():
+            raise ValueError("can't modify an old view")
+        view.metadata['app'] = self.metadata['iri']
+        view.new_contain(AnnotationTypes.TimeFrame.value, {'unit': 'milliseconds', 'document': tf_source_id})
+
+
+def setup(files: list):
+    for file in glob.glob(os.path.join(MEDIA_DIRECTORY, '*')):
+        os.remove(file)
+    links = [os.path.join(MEDIA_DIRECTORY, os.path.basename(file)) for file in files]
+    for file, link in zip(files, links):
+        shutil.copy(file, link)
+
+
+def segment(save_tsv=False) -> str:
+    pretrained_model_dir = sorted(os.listdir(os.path.join(SEGMENTER_DIR, "pretrained")))[-1]
+    if save_tsv:
+        output = open('segmented.tsv', 'w')
+    else:
+        output = subprocess.PIPE
+    proc = subprocess.run(
+        [
+            'python',
+            os.path.join(SEGMENTER_DIR, 'run.py'),
+            '-s',
+            os.path.join(SEGMENTER_DIR, 'pretrained', pretrained_model_dir),
+            MEDIA_DIRECTORY
+        ],
+        stdout=output
+    )
+    if save_tsv:
+        output.close()
+        with open('segmented.tsv', 'r') as tsv:
+            return tsv.read()
+    else:
+        return proc.stdout.decode(encoding='utf8')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--once',
+                        type=str,
+                        metavar='PATH',
+                        help='Use this flag if you want to run the segmenter on a path you specify, instead of running '
+                             'the Flask app.')
+    parser.add_argument('--pretty',
+                        action='store_true',
+                        help='Use this flag to return "pretty" (indented) MMIF data.')
+    parser.add_argument('--save-tsv',
+                        action='store_true',
+                        help='Use this flag to preserve the intermediary TSV file '
+                             'generated by the segmenter.')
+
+    parsed_args = parser.parse_args()
+
+    if parsed_args.once:
+        with open(parsed_args.once) as mmif_in:
+            mmif_str = mmif_in.read()
+
+        segmenter_app = Segmenter()
+
+        mmif_out = segmenter_app.annotate(mmif_str, save_tsv=parsed_args.save_tsv, pretty=parsed_args.pretty)
+        with open('mmif_out.json', 'w') as out_file:
+            out_file.write(mmif_out)
+    else:
+        segmenter_app = Segmenter()
+        annotate = segmenter_app.annotate
+        segmenter_app.annotate = lambda *args, **kwargs: annotate(*args,
+                                                                  save_tsv=parsed_args.save_tsv,
+                                                                  pretty=parsed_args.pretty)
+        segmenter_service = Restifier(segmenter_app)
+        segmenter_service.run()
diff --git a/demo.sh b/demo.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+find demo/ -type f ! -name '*.mp3' -delete
+clams source audio:/segmenter/demo/cpb-aacip-259-dj58gh9t.h264.mp4.mp3 > demo/dj58gh9t.json
+python app.py --once demo/dj58gh9t.json --pretty --save-tsv
+mv mmif_out.json demo/
+mv segmented.tsv demo/
+rm demo/cpb-aacip-259-dj58gh9t.h264.mp4.mp3
diff --git a/demo/cpb-aacip-259-dj58gh9t.h264.mp4.mp3 b/demo/cpb-aacip-259-dj58gh9t.h264.mp4.mp3
diff --git a/outerdemo.sh b/outerdemo.sh
@@ -0,0 +1,3 @@
+docker run --entrypoint /bin/bash app-audio-segmenter:latest demo.sh
+docker cp "$(docker ps -lq)":/segmenter/demo/ ./demo/results/
+echo "Done. Check ./demo/results for the results of the demo."
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+clams-python~=0.1.6

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+docker run --entrypoint /bin/bash app-audio-segmenter:latest demo.sh`
	`2`	`+docker cp "$(docker ps -lq)":/segmenter/demo/ ./demo/results/`
	`3`	`+echo "Done. Check ./demo/results for the results of the demo."`