|
| 1 | +# app-audio-segmenter version 0.1.0 |
| 2 | +# author: Angus L'Herrou |
| 3 | +# org: CLAMS team |
| 4 | +import argparse |
| 5 | +import glob |
| 6 | +import os |
| 7 | +import csv |
| 8 | +import shutil |
| 9 | +import subprocess |
| 10 | +from io import StringIO |
| 11 | +from typing import Dict, Union |
| 12 | + |
| 13 | +from clams import ClamsApp, Restifier |
| 14 | +from mmif import DocumentTypes, AnnotationTypes, Mmif, Document, View, Annotation |
| 15 | + |
| 16 | +APP_VERSION = '0.1.0' |
| 17 | +WRAPPED_IMAGE = 'clamsproject/clams-python:0.1.6' |
| 18 | +MEDIA_DIRECTORY = '/segmenter/data' |
| 19 | +SEGMENTER_DIR = '/segmenter/acoustic-classification-segmentation' |
| 20 | +TIME_FRAME_PREFIX = 'tf' |
| 21 | +SEGMENTER_ACCEPTED_EXTENSIONS = {'.mp3', '.wav'} |
| 22 | + |
| 23 | + |
| 24 | +class Segmenter(ClamsApp): |
| 25 | + |
| 26 | + def setupmetadata(self) -> dict: |
| 27 | + return { |
| 28 | + "name": "Audio Segmenter", |
| 29 | + "description": "tbd", |
| 30 | + "vendor": "Team CLAMS", |
| 31 | + "iri": f"http://mmif.clams.ai/apps/audio-segmenter/{APP_VERSION}", |
| 32 | + "wrappee": WRAPPED_IMAGE, |
| 33 | + "requires": [DocumentTypes.AudioDocument.value], |
| 34 | + "produces": [ |
| 35 | + AnnotationTypes.TimeFrame.value |
| 36 | + ] |
| 37 | + } |
| 38 | + |
| 39 | + def sniff(self, mmif) -> bool: |
| 40 | + if type(mmif) is not Mmif: |
| 41 | + mmif = Mmif(mmif) |
| 42 | + return len([loc for loc in mmif.get_documents_locations(DocumentTypes.AudioDocument.value) |
| 43 | + if os.path.splitext(loc)[-1] in SEGMENTER_ACCEPTED_EXTENSIONS]) > 0 |
| 44 | + |
| 45 | + def annotate(self, mmif: Union[str, dict, Mmif], save_tsv=False, pretty=False) -> str: |
| 46 | + mmif_obj: Mmif |
| 47 | + if isinstance(mmif, Mmif): |
| 48 | + mmif_obj: Mmif = mmif |
| 49 | + else: |
| 50 | + mmif_obj: Mmif = Mmif(mmif) |
| 51 | + |
| 52 | + # get AudioDocuments with locations |
| 53 | + docs = [document for document in mmif_obj.documents |
| 54 | + if document.at_type == DocumentTypes.AudioDocument.value |
| 55 | + and len(document.location) > 0 |
| 56 | + and os.path.splitext(document.location)[-1] in SEGMENTER_ACCEPTED_EXTENSIONS] |
| 57 | + |
| 58 | + files = [document.location for document in docs] |
| 59 | + |
| 60 | + # key them by location basenames |
| 61 | + docs_dict: Dict[str, Document] = {os.path.splitext(os.path.basename(doc.location))[0]: doc for doc in docs} |
| 62 | + assert len(docs) == len(docs_dict), 'no duplicate filenames' |
| 63 | + # TODO (angus-lherrou @ 2020-10-03): allow duplicate basenames for files originally from different folders |
| 64 | + # by renaming files more descriptively |
| 65 | + |
| 66 | + setup(files) |
| 67 | + |
| 68 | + tsv_string = segment(save_tsv) |
| 69 | + |
| 70 | + reader = csv.reader(StringIO(tsv_string), delimiter='\t') |
| 71 | + |
| 72 | + for row in reader: |
| 73 | + filename = os.path.splitext(os.path.split(row[0])[-1])[0] |
| 74 | + splits = row[1:-1] # first element is filepath, last element is speech ratio |
| 75 | + assert len(splits) % 2 == 0, 'every row should have an even number of timestamps' |
| 76 | + |
| 77 | + v: View = mmif_obj.new_view() |
| 78 | + self.stamp_view(v, docs_dict[filename].id) |
| 79 | + |
| 80 | + tf_idx = 1 |
| 81 | + |
| 82 | + for speech_start_idx in range(0, len(splits)-2, 2): |
| 83 | + s_start_ts = float(splits[speech_start_idx]) |
| 84 | + s_end_ts = float(splits[speech_start_idx+1]) |
| 85 | + ns_end_ts = float(splits[speech_start_idx+2]) |
| 86 | + s_tf = self.create_segment_tf(s_start_ts, s_end_ts, tf_idx, frame_type='speech') |
| 87 | + ns_tf = self.create_segment_tf(s_end_ts, ns_end_ts, tf_idx+1, frame_type='non-speech') |
| 88 | + tf_idx += 2 |
| 89 | + v.add_annotation(s_tf) |
| 90 | + v.add_annotation(ns_tf) |
| 91 | + |
| 92 | + final_s_start_ts = float(splits[-2]) |
| 93 | + final_s_end_ts = float(splits[-1]) |
| 94 | + final_s_tf = self.create_segment_tf(final_s_start_ts, final_s_end_ts, tf_idx, frame_type='speech') |
| 95 | + v.add_annotation(final_s_tf) |
| 96 | + |
| 97 | + return mmif_obj.serialize(pretty=pretty) |
| 98 | + |
| 99 | + @staticmethod |
| 100 | + def create_segment_tf(start: float, end: float, index: int, frame_type: str) -> Annotation: |
| 101 | + assert frame_type in {'speech', 'non-speech'} |
| 102 | + tf = Annotation() |
| 103 | + tf.at_type = AnnotationTypes.TimeFrame.value |
| 104 | + tf.id = TIME_FRAME_PREFIX + str(index) |
| 105 | + tf.properties['frameType'] = 'speech' |
| 106 | + # times should be in milliseconds |
| 107 | + tf.properties['start'] = int(start * 1000) |
| 108 | + tf.properties['end'] = int(end * 1000) |
| 109 | + tf.properties['frameType'] = frame_type |
| 110 | + return tf |
| 111 | + |
| 112 | + def stamp_view(self, view: View, tf_source_id: str): |
| 113 | + if view.is_frozen(): |
| 114 | + raise ValueError("can't modify an old view") |
| 115 | + view.metadata['app'] = self.metadata['iri'] |
| 116 | + view.new_contain(AnnotationTypes.TimeFrame.value, {'unit': 'milliseconds', 'document': tf_source_id}) |
| 117 | + |
| 118 | + |
| 119 | +def setup(files: list): |
| 120 | + for file in glob.glob(os.path.join(MEDIA_DIRECTORY, '*')): |
| 121 | + os.remove(file) |
| 122 | + links = [os.path.join(MEDIA_DIRECTORY, os.path.basename(file)) for file in files] |
| 123 | + for file, link in zip(files, links): |
| 124 | + shutil.copy(file, link) |
| 125 | + |
| 126 | + |
| 127 | +def segment(save_tsv=False) -> str: |
| 128 | + pretrained_model_dir = sorted(os.listdir(os.path.join(SEGMENTER_DIR, "pretrained")))[-1] |
| 129 | + if save_tsv: |
| 130 | + output = open('segmented.tsv', 'w') |
| 131 | + else: |
| 132 | + output = subprocess.PIPE |
| 133 | + proc = subprocess.run( |
| 134 | + [ |
| 135 | + 'python', |
| 136 | + os.path.join(SEGMENTER_DIR, 'run.py'), |
| 137 | + '-s', |
| 138 | + os.path.join(SEGMENTER_DIR, 'pretrained', pretrained_model_dir), |
| 139 | + MEDIA_DIRECTORY |
| 140 | + ], |
| 141 | + stdout=output |
| 142 | + ) |
| 143 | + if save_tsv: |
| 144 | + output.close() |
| 145 | + with open('segmented.tsv', 'r') as tsv: |
| 146 | + return tsv.read() |
| 147 | + else: |
| 148 | + return proc.stdout.decode(encoding='utf8') |
| 149 | + |
| 150 | + |
| 151 | +if __name__ == '__main__': |
| 152 | + parser = argparse.ArgumentParser() |
| 153 | + parser.add_argument('--once', |
| 154 | + type=str, |
| 155 | + metavar='PATH', |
| 156 | + help='Use this flag if you want to run the segmenter on a path you specify, instead of running ' |
| 157 | + 'the Flask app.') |
| 158 | + parser.add_argument('--pretty', |
| 159 | + action='store_true', |
| 160 | + help='Use this flag to return "pretty" (indented) MMIF data.') |
| 161 | + parser.add_argument('--save-tsv', |
| 162 | + action='store_true', |
| 163 | + help='Use this flag to preserve the intermediary TSV file ' |
| 164 | + 'generated by the segmenter.') |
| 165 | + |
| 166 | + parsed_args = parser.parse_args() |
| 167 | + |
| 168 | + if parsed_args.once: |
| 169 | + with open(parsed_args.once) as mmif_in: |
| 170 | + mmif_str = mmif_in.read() |
| 171 | + |
| 172 | + segmenter_app = Segmenter() |
| 173 | + |
| 174 | + mmif_out = segmenter_app.annotate(mmif_str, save_tsv=parsed_args.save_tsv, pretty=parsed_args.pretty) |
| 175 | + with open('mmif_out.json', 'w') as out_file: |
| 176 | + out_file.write(mmif_out) |
| 177 | + else: |
| 178 | + segmenter_app = Segmenter() |
| 179 | + annotate = segmenter_app.annotate |
| 180 | + segmenter_app.annotate = lambda *args, **kwargs: annotate(*args, |
| 181 | + save_tsv=parsed_args.save_tsv, |
| 182 | + pretty=parsed_args.pretty) |
| 183 | + segmenter_service = Restifier(segmenter_app) |
| 184 | + segmenter_service.run() |
0 commit comments