-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
120 lines (100 loc) · 4.37 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#Imports =====================================================================|
import argparse
import logging
from typing import Union
from clams import ClamsApp, Restifier
from mmif import Mmif, AnnotationTypes, DocumentTypes
import aubio
import numpy as np
#Primary Class ===============================================================|
class TonesDetector(ClamsApp):
def __init__(self):
super().__init__()
def _appmetadata(self):
#see metadata.py
pass
def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
# see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._annotate
if isinstance(mmif, Mmif):
mmif_obj: Mmif = mmif
else:
mmif_obj: Mmif = Mmif(mmif)
docs, files = self._get_docs(mmif_obj)
conf = self.get_configuration(**parameters)
newview = mmif_obj.new_view()
# we want to sign the view with the raw user input, not the processed one
self.sign_view(newview, parameters)
for file, location in files.items():
newview.new_contain(AnnotationTypes.TimeFrame,
timeUnit = conf["timeUnit"],
document = file)
tones = self._detect_tones(location, **conf)
for tone_pair in tones:
tf_anno = newview.new_annotation(AnnotationTypes.TimeFrame)
tf_anno.add_property("start", tone_pair[0])
tf_anno.add_property("end", tone_pair[1])
tf_anno.add_property("frameType", "tone")
return mmif_obj
#Helper Methods ==========================================================|
@staticmethod
def _get_docs(mmif: Mmif):
documents = [document for document in mmif.documents
if document.at_type == DocumentTypes.AudioDocument
and len(document.location) > 0]
files = {document.id: document.location_path() for document in documents}
return documents, files
@staticmethod
def _detect_tones(filepath, **kwargs):
"""
perform tone detection using average cross-correlation across consecutive samples
"""
aud = aubio.source(filepath)
out = []
vec1 = np.array(aud()[0])
vec2, read2 = aud()
vec2 = np.array(vec2)
start_sample = 0
sample_size = int(kwargs["sampleSize"])
duration = sample_size
if kwargs["stopAt"] != "None":
endpoint = int(kwargs["stopAt"])
else:
endpoint = aud.duration
while read2 >= duration and start_sample < endpoint:
similarity = np.average(np.correlate(vec1, vec2, mode="valid"))
sim_count = 0
while similarity >= float(kwargs["tolerance"]):
sim_count += 1
duration += sample_size
vec2, read2 = aud()
vec2 = np.array(vec2)
similarity = np.average(np.correlate(vec1, vec2, mode="valid"))
if sim_count > 0:
out.append((start_sample/aud.samplerate, (start_sample+duration)/aud.samplerate))
sim_count = 0
start_sample += duration
vec1 = vec2
vec2, read2 = aud()
duration = sample_size
if kwargs["timeUnit"] == "seconds":
return [x for x in out if x[1]-x[0] >= int(kwargs["lengthThreshold"]) / 1000]
elif kwargs["timeUnit"] == "milliseconds":
return [(x[0]*1000, x[1]*1000) for x in out if (x[1]-x[0])*1000 >= int(kwargs["lengthThreshold"])]
#Main ========================================================================|
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--port", action="store", default="5000", help="set port to listen" )
parser.add_argument("--production", action="store_true", help="run gunicorn server")
# add more arguments as needed
# parser.add_argument(more_arg...)
parsed_args = parser.parse_args()
# create the app instance
app = TonesDetector()
http_app = Restifier(app, port=int(parsed_args.port))
# for running the application in production mode
if parsed_args.production:
http_app.serve_production()
# development mode
else:
app.logger.setLevel(logging.DEBUG)
http_app.run()