Skip to content

Commit 64433a6

Browse files
committed
Frame send to vision api - detect hand gestures
Signed-off-by: Mohtasim Bellah <[email protected]>
1 parent 6cd92ba commit 64433a6

File tree

1 file changed

+242
-3
lines changed

1 file changed

+242
-3
lines changed

samples/Calling/src/app/views/CallScreen.tsx

Lines changed: 242 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import {
2020
useTeamsCallAdapter
2121
} from '@azure/communication-react';
2222
import type { Profile, StartCallIdentifier, TeamsAdapterOptions } from '@azure/communication-react';
23-
import React, { useCallback, useMemo, useRef } from 'react';
23+
import React, { useCallback, useEffect, useMemo, useRef } from 'react';
2424
import { createAutoRefreshingCredential } from '../utils/credential';
2525
import { WEB_APP_TITLE } from '../utils/AppUtils';
2626
import { CallCompositeContainer } from './CallCompositeContainer';
@@ -147,11 +147,148 @@ type AzureCommunicationCallScreenProps = CallScreenProps & {
147147

148148
const AzureCommunicationCallScreen = (props: AzureCommunicationCallScreenProps): JSX.Element => {
149149
const { afterCreate, callLocator: locator, userId, ...adapterArgs } = props;
150+
const videoRef = useRef<HTMLVideoElement>(null);
151+
const canvasRef = useRef<HTMLCanvasElement>(null);
150152

151153
if (!('communicationUserId' in userId)) {
152154
throw new Error('A MicrosoftTeamsUserIdentifier must be provided for Teams Identity Call.');
153155
}
154156

157+
useEffect(() => {
158+
//console.log('AzureCommunicationCallScreen:: useEffect in action...');
159+
async function startVideoStream() {
160+
//console.log('Starting video stream...');
161+
try {
162+
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
163+
if (videoRef.current) {
164+
videoRef.current.srcObject = stream;
165+
}
166+
} catch (error) {
167+
console.error('Error accessing webcam:', error);
168+
}
169+
}
170+
171+
startVideoStream();
172+
}, []);
173+
174+
useEffect(() => {
175+
let animationFrameId: number;
176+
177+
async function analyzeFrame() {
178+
const imageUrl = await captureFrame();
179+
180+
if (imageUrl && imageUrl.length > 10) {
181+
console.log('image url - ', imageBase64ToBlob(imageUrl));
182+
await detectHandGestures(imageUrl);
183+
}
184+
animationFrameId = requestAnimationFrame(analyzeFrame);
185+
}
186+
187+
function startAnalyzing() {
188+
animationFrameId = requestAnimationFrame(analyzeFrame);
189+
}
190+
191+
startAnalyzing();
192+
193+
return () => {
194+
cancelAnimationFrame(animationFrameId);
195+
};
196+
}, []);
197+
198+
async function captureFrame() {
199+
console.log('Capturing frame...');
200+
if (videoRef.current && canvasRef.current) {
201+
const video = videoRef.current;
202+
const canvas = canvasRef.current;
203+
const ctx = canvas.getContext('2d');
204+
canvas.width = video.videoWidth;
205+
canvas.height = video.videoHeight;
206+
if (ctx) {
207+
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
208+
}
209+
return canvas.toDataURL('image/jpeg');
210+
}
211+
return null;
212+
}
213+
async function detectHandGestures1(imageBase64: string) {
214+
const CUSTOM_VISION_ENDPOINT = 'https://azureaiinsravan.cognitiveservices.azure.com';
215+
const CUSTOM_VISION_KEY = 'Ffnb7EK1Z65PWAn9o31l5dxMV8kP1C6rIMAn2vbPRzZ3EidaEKjvJQQJ99BBACYeBjFXJ3w3AAAFACOGXhO6';
216+
const PREDICTION_KEY = 'ebc77a8a52e04e9394125c19f2dc8a16';
217+
const PROJECT_ID = 'daaea539-0d1a-456b-a0fc-31e121039d56';
218+
const MODEL_NAME = 'FaceExpressionAndHandGestures';
219+
220+
const response = await fetch(
221+
//`${CUSTOM_VISION_ENDPOINT}/customvision/v3.0/Prediction/${PROJECT_ID}/classify/iterations/${MODEL_NAME}/url`,
222+
`https://azureaiinsravan.cognitiveservices.azure.com/customvision/v3.0/Prediction/daaea539-0d1a-456b-a0fc-31e121039d56/detect/iterations/FaceExpressionAndHandGestures/url`,
223+
{
224+
method: 'POST',
225+
headers: {
226+
'Ocp-Apim-Subscription-Key': CUSTOM_VISION_KEY,
227+
'Prediction-Key': PREDICTION_KEY,
228+
'Content-Type': 'application/json'
229+
},
230+
//body: imageBase64ToBlob(imageBase64)
231+
body: JSON.stringify({
232+
Url: 'https://raw.githubusercontent.com/Azure-Samples/cognitive-services-sample-data-files/master/ComputerVision/Images/printed_text.jpg'
233+
})
234+
}
235+
);
236+
237+
const data = await response.json();
238+
console.log('Gesture detected data - \n');
239+
console.log(data);
240+
}
241+
async function detectHandGestures(imageBase64: string) {
242+
const CUSTOM_VISION_ENDPOINT = 'https://azureaiinsravan.cognitiveservices.azure.com';
243+
const CUSTOM_VISION_KEY = 'Ffnb7EK1Z65PWAn9o31l5dxMV8kP1C6rIMAn2vbPRzZ3EidaEKjvJQQJ99BBACYeBjFXJ3w3AAAFACOGXhO6';
244+
const PREDICTION_KEY = 'ebc77a8a52e04e9394125c19f2dc8a16';
245+
const PROJECT_ID = 'daaea539-0d1a-456b-a0fc-31e121039d56';
246+
const MODEL_NAME = 'FaceExpressionAndHandGestures';
247+
248+
const response = await fetch(
249+
//`${CUSTOM_VISION_ENDPOINT}/customvision/v3.0/Prediction/${PROJECT_ID}/classify/iterations/${MODEL_NAME}/url`,
250+
`https://azureaiinsravan.cognitiveservices.azure.com/customvision/v3.0/Prediction/daaea539-0d1a-456b-a0fc-31e121039d56/detect/iterations/FaceExpressionAndHandGestures/image`,
251+
{
252+
method: 'POST',
253+
headers: {
254+
'Ocp-Apim-Subscription-Key': CUSTOM_VISION_KEY,
255+
'Prediction-Key': PREDICTION_KEY,
256+
'Content-Type': 'application/octet-stream'
257+
},
258+
body: imageBase64ToBlob(imageBase64)
259+
}
260+
);
261+
262+
const data = await response.json();
263+
console.log('Gesture detected data - \n');
264+
console.log(data);
265+
}
266+
function imageBase64ToBlob(base64: string) {
267+
const base64Data = base64.split(',')[1];
268+
if (!base64Data) {
269+
throw new Error('Invalid base64 string');
270+
}
271+
const byteCharacters = atob(base64Data);
272+
const byteNumbers = new Array(byteCharacters.length);
273+
for (let i = 0; i < byteCharacters.length; i++) {
274+
byteNumbers[i] = byteCharacters.charCodeAt(i);
275+
}
276+
return new Blob([new Uint8Array(byteNumbers)], { type: 'image/jpeg' });
277+
}
278+
// function imageBase64ToBlob(base64: string) {
279+
// const base64Data = base64.split(',')[1];
280+
// console.log('base64Data image - ', base64Data);
281+
// if (!base64Data) {
282+
// throw new Error('Invalid base64 string');
283+
// }
284+
// const byteCharacters = atob(base64Data);
285+
// const byteNumbers = new Array(byteCharacters.length);
286+
// for (let i = 0; i < byteCharacters.length; i++) {
287+
// byteNumbers[i] = byteCharacters.charCodeAt(i);
288+
// }
289+
// return new Blob([new Uint8Array(byteNumbers)], { type: 'image/jpeg' });
290+
// }
291+
155292
const callAdapterOptions: AzureCommunicationCallAdapterOptions = useMemo(() => {
156293
return {
157294
videoBackgroundOptions: {
@@ -188,16 +325,112 @@ const AzureCommunicationCallScreen = (props: AzureCommunicationCallScreenProps):
188325
afterCreate
189326
);
190327

191-
return <CallCompositeContainer {...props} adapter={adapter} />;
328+
detectHandGestures1('asdfa');
329+
return (
330+
<div>
331+
<video ref={videoRef} autoPlay style={{ display: 'none' }} />
332+
<canvas ref={canvasRef} style={{ display: 'none' }} />
333+
<CallCompositeContainer {...props} adapter={adapter} />
334+
</div>
335+
);
192336
};
193337

194338
const AzureCommunicationOutboundCallScreen = (props: AzureCommunicationCallScreenProps): JSX.Element => {
195339
const { afterCreate, targetCallees: targetCallees, userId, ...adapterArgs } = props;
340+
const videoRef = useRef<HTMLVideoElement>(null);
341+
const canvasRef = useRef<HTMLCanvasElement>(null);
196342

197343
if (!('communicationUserId' in userId)) {
198344
throw new Error('A MicrosoftTeamsUserIdentifier must be provided for Teams Identity Call.');
199345
}
200346

347+
useEffect(() => {
348+
async function startVideoStream() {
349+
try {
350+
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
351+
if (videoRef.current) {
352+
videoRef.current.srcObject = stream;
353+
}
354+
} catch (error) {
355+
console.error('Error accessing webcam:', error);
356+
}
357+
}
358+
359+
startVideoStream();
360+
}, []);
361+
362+
useEffect(() => {
363+
let animationFrameId: number;
364+
365+
async function analyzeFrame() {
366+
const imageUrl = await captureFrame();
367+
if (imageUrl) {
368+
await detectHandGestures(imageUrl);
369+
}
370+
animationFrameId = requestAnimationFrame(analyzeFrame);
371+
}
372+
373+
function startAnalyzing() {
374+
animationFrameId = requestAnimationFrame(analyzeFrame);
375+
}
376+
377+
startAnalyzing();
378+
379+
return () => {
380+
cancelAnimationFrame(animationFrameId);
381+
};
382+
}, []);
383+
384+
async function captureFrame() {
385+
console.log('Capturing frame...');
386+
if (videoRef.current && canvasRef.current) {
387+
const video = videoRef.current;
388+
const canvas = canvasRef.current;
389+
const ctx = canvas.getContext('2d');
390+
canvas.width = video.videoWidth;
391+
canvas.height = video.videoHeight;
392+
if (ctx) {
393+
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
394+
}
395+
return canvas.toDataURL('image/jpeg');
396+
}
397+
return null;
398+
}
399+
async function detectHandGestures(imageBase64: string) {
400+
const CUSTOM_VISION_ENDPOINT = 'https://YOUR_CUSTOM_VISION_ENDPOINT/';
401+
const CUSTOM_VISION_KEY = 'YOUR_CUSTOM_VISION_KEY';
402+
const PROJECT_ID = 'YOUR_PROJECT_ID';
403+
const MODEL_NAME = 'YOUR_MODEL_NAME';
404+
405+
const response = await fetch(
406+
`${CUSTOM_VISION_ENDPOINT}/customvision/v3.0/Prediction/${PROJECT_ID}/classify/iterations/${MODEL_NAME}/image`,
407+
{
408+
method: 'POST',
409+
headers: {
410+
'Prediction-Key': CUSTOM_VISION_KEY,
411+
'Content-Type': 'application/octet-stream'
412+
},
413+
body: imageBase64ToBlob(imageBase64)
414+
}
415+
);
416+
417+
const data = await response.json();
418+
console.log('Gesture detected data - \n');
419+
console.log(data);
420+
}
421+
function imageBase64ToBlob(base64: string) {
422+
const base64Data = base64.split(',')[1];
423+
if (!base64Data) {
424+
throw new Error('Invalid base64 string');
425+
}
426+
const byteCharacters = atob(base64Data);
427+
const byteNumbers = new Array(byteCharacters.length);
428+
for (let i = 0; i < byteCharacters.length; i++) {
429+
byteNumbers[i] = byteCharacters.charCodeAt(i);
430+
}
431+
return new Blob([new Uint8Array(byteNumbers)], { type: 'image/jpeg' });
432+
}
433+
201434
const callAdapterOptions: AzureCommunicationCallAdapterOptions = useMemo(() => {
202435
return {
203436
videoBackgroundOptions: {
@@ -237,7 +470,13 @@ const AzureCommunicationOutboundCallScreen = (props: AzureCommunicationCallScree
237470
afterCreate
238471
);
239472

240-
return <CallCompositeContainer {...props} adapter={adapter} />;
473+
return (
474+
<div>
475+
<video ref={videoRef} autoPlay style={{ display: 'none' }} />
476+
<canvas ref={canvasRef} style={{ display: 'none' }} />
477+
<CallCompositeContainer {...props} adapter={adapter} />
478+
</div>
479+
);
241480
};
242481

243482
const convertPageStateToString = (state: CallAdapterState): string => {

0 commit comments

Comments
 (0)