-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathHandTrackingModule.py
148 lines (115 loc) · 4.95 KB
/
HandTrackingModule.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import mediapipe as mp
import cv2 as cv
import time
class HandDetector:
"""
HandDetector class uses MediaPipe to detect and track hands in video frames.
Attributes:
- mode (bool): Static mode or dynamic mode for the hand detection.
- maxHands (int): Maximum number of hands to detect.
- complexity (int): Complexity level of the hand landmarks model.
- detectionCon (float): Minimum detection confidence threshold.
- trackCon (float): Minimum tracking confidence threshold.
"""
def __init__(self, mode=False, maxHands=2, complexity=1, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.complexity = complexity
self.detectionCon = detectionCon
self.trackCon = trackCon
# Initialize MediaPipe hands and drawing utilities
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(
self.mode, self.maxHands, self.complexity, self.detectionCon, self.trackCon
)
self.mpDraw = mp.solutions.drawing_utils
self.tip_IDS = [4, 8, 12, 16, 20] # Indices of fingertips
def findHands(self, frame, draw=True):
"""
Detects hands in the provided frame and draws landmarks if specified.
Args:
- frame (ndarray): The input image in BGR format.
- draw (bool): Whether to draw the landmarks on the frame.
Returns:
- frame (ndarray): The processed frame with landmarks drawn if specified.
"""
frame_rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB) # Convert BGR to RGB for MediaPipe
self.results = self.hands.process(frame_rgb) # Process the RGB frame to detect hands
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(frame, handLms, self.mpHands.HAND_CONNECTIONS)
return frame
def findPositions(self, frame, handNo=0, draw=False):
"""
Finds and returns the dictionary of landmark positions for the specified hand.
Args:
- frame (ndarray): The input image.
- handNo (int): The index of the hand (default is 0 for the first detected hand).
- draw (bool): Whether to draw circles on landmarks.
Returns:
- landmarks (dict): Dictionary of landmark positions with landmark ID as keys and coordinates as values.
"""
self.landmarks = {}
if self.results.multi_hand_landmarks:
myhand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myhand.landmark):
h, w, _ = frame.shape
cx, cy = int(lm.x * w), int(lm.y * h)
self.landmarks[id] = (cx, cy)
if draw:
cv.circle(frame, (cx, cy), 10, (255, 0, 255), cv.FILLED)
return self.landmarks
def fingersUp(self):
"""
Determines which fingers are up and returns their status.
Returns:
- fingers (list): List of integers (0 or 1) representing if the fingers are up (1) or down (0).
"""
if len(self.landmarks) == 0:
return []
fingers = []
# Determine if it is a left or right hand
wrist_x = self.landmarks[0][0] # Wrist x-coordinate
thumb_x = self.landmarks[self.tip_IDS[0]][0] # Thumb tip x-coordinate
# Check thumb status
if thumb_x > wrist_x:
# Right hand (Thumb is to the right of the wrist)
fingers.append(1 if self.landmarks[self.tip_IDS[0]][0] > self.landmarks[self.tip_IDS[0] - 2][0] else 0)
else:
# Left hand (Thumb is to the left of the wrist)
fingers.append(1 if self.landmarks[self.tip_IDS[0]][0] < self.landmarks[self.tip_IDS[0] - 2][0] else 0)
# Check the status of the other four fingers
for ID in range(1, 5):
fingers.append(1 if self.landmarks[self.tip_IDS[ID]][1] < self.landmarks[self.tip_IDS[ID] - 2][1] else 0)
return fingers
def release(self):
"""
Releases the MediaPipe hand detection resources.
"""
self.hands.close()
def main():
pTime = 0
cap = cv.VideoCapture(0) # Capture video from the default camera
detector = HandDetector()
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame = detector.findHands(frame)
landmarks = detector.findPositions(frame)
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
frame = cv.flip(frame, 1) # Flip frame horizontally
cv.putText(frame, f'FPS: {int(fps)}', (10, 50), cv.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 2)
cv.imshow('Hands', frame)
# Exit when 'p' key is pressed
if cv.waitKey(1) & 0xFF == ord('p'):
break
# Release resources
cap.release()
detector.release()
cv.destroyAllWindows()
if __name__ == "__main__":
main()