forked from ronvidev/modelo_lstm_lsp
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
116 lines (92 loc) · 3.98 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import sys
import cv2
from PyQt5.QtWidgets import QApplication, QMainWindow
from PyQt5.QtGui import QImage, QPixmap
from PyQt5.QtCore import QTimer, Qt
from PyQt5.uic import loadUi
import numpy as np
from keras.models import load_model
from mediapipe.python.solutions.holistic import Holistic
from evaluate_model import normalize_keypoints
from helpers import *
from constants import *
from text_to_speech import text_to_speech
class VideoRecorder(QMainWindow):
def __init__(self):
super().__init__()
loadUi('mainwindow.ui', self)
self.capture = cv2.VideoCapture(0)
self.init_lsp()
# self.btn_start.clicked.connect(self.start_recording)
# self.btn_stop.clicked.connect(self.stop_recording)
self.timer = QTimer(self)
self.timer.timeout.connect(self.update_frame)
self.timer.start(30) # Update frame every 30ms
def init_lsp(self):
self.holistic_model = Holistic()
self.kp_seq, self.sentence = [], []
self.count_frame = 0
self.fix_frames = 0
self.margin_frame = 1
self.delay_frames = 3
self.model = load_model(MODEL_PATH)
self.recording = False
def update_frame(self):
word_ids = get_word_ids(WORDS_JSON_PATH)
ret, frame = self.capture.read()
if not ret: return
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = mediapipe_detection(frame, self.holistic_model)
if there_hand(results) or self.recording:
self.recording = False
self.count_frame += 1
if self.count_frame > self.margin_frame:
self.kp_seq.append(extract_keypoints(results))
else:
if self.count_frame >= MIN_LENGTH_FRAMES + self.margin_frame:
self.fix_frames += 1
if self.fix_frames < self.delay_frames:
self.recording = True
return
self.kp_seq = self.kp_seq[: - (self.margin_frame + self.delay_frames)]
kp_normalized = normalize_keypoints(self.kp_seq, int(MODEL_FRAMES))
res = self.model.predict(np.expand_dims(kp_normalized, axis=0))[0]
if res[np.argmax(res)] > 0.7:
word_id = word_ids[np.argmax(res)].split('-')[0]
sent = words_text.get(word_id)
self.sentence.insert(0, sent)
text_to_speech(sent) # ONLY LOCAL (NO SERVER)
self.recording = False
self.fix_frames = 0
self.count_frame = 0
self.kp_seq = []
self.lbl_output.setText(" - ".join(self.sentence))
draw_keypoints(image, results)
height, width, channel = image.shape
step = channel * width
qImg = QImage(image.data, width, height, step, QImage.Format_RGB888)
scaled_qImg = qImg.scaled(self.lbl_video.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation)
self.lbl_video.setPixmap(QPixmap.fromImage(scaled_qImg))
# def start_recording(self):
# if not self.recording:
# self.recording = True
# # self.video_writer = cv2.VideoWriter("output.mp4", cv2.VideoWriter_fourcc(*'mp4v'), 30, (640, 480))
# self.btn_start.setEnabled(False)
# self.btn_stop.setEnabled(True)
# def stop_recording(self):
# if self.recording:
# self.recording = False
# # self.video_writer.release()
# self.btn_start.setEnabled(True)
# self.btn_stop.setEnabled(False)
def closeEvent(self, event):
self.capture.release()
# if self.is_recording:
# self.video_writer.release()
event.accept()
if __name__ == "__main__":
app = QApplication(sys.argv)
window = VideoRecorder()
window.show()
# window.showFullScreen()
sys.exit(app.exec_())