Skip to content

Commit 9cf84f2

Browse files
authored
Update (#11)
* update * add face
1 parent 04e470b commit 9cf84f2

File tree

6 files changed

+264
-103
lines changed

6 files changed

+264
-103
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,10 @@ You can run this repo virtually by using GitHub Codespaces, which will open a we
2424
[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new?skip_quickstart=true&machine=basicLinux32gb&repo=899687170&ref=main&geo=UsEast&devcontainer_path=.devcontainer%2Fdevcontainer.json)
2525

2626
### Local environment
27-
2827
1. Make sure the following tools are installed:
2928

3029
* [Azure Developer CLI (azd)](https://aka.ms/install-azd)
31-
* [Python 3.9+](https://www.python.org/downloads/)
30+
* [Python 3.11+](https://www.python.org/downloads/)
3231

3332
2. Make a new directory called `azure-ai-content-understanding-python` and clone this template into it using the `azd` CLI:
3433

@@ -40,6 +39,7 @@ You can run this repo virtually by using GitHub Codespaces, which will open a we
4039

4140
## Configure Azure AI service resource
4241
### (Option 1) Use `azd` commands to auto create temporal resources to run sample
42+
1. Make sure you have permission to grant roles under subscription
4343
1. Login Azure
4444
```shell
4545
azd auth login
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
{
2+
"description": "Generate face-aware content understanding from video.",
3+
"config": {
4+
"enableFace": true,
5+
"returnDetails": true,
6+
"locales": [
7+
"en-US",
8+
"es-ES",
9+
"es-MX",
10+
"fr-FR",
11+
"hi-IN",
12+
"it-IT",
13+
"ja-JP",
14+
"ko-KR",
15+
"pt-BR",
16+
"zh-CN"
17+
]
18+
},
19+
"fieldSchema": {
20+
"description": "Analyze videos to extract faces",
21+
"fields": {
22+
"description": {
23+
"type": "string",
24+
"description": "Describe what happened in the video segment. Include all significant details and be specific where possible. For example, use 'man,' 'woman,' 'child,' etc., rather than 'person,' and specify animals like 'dog' or 'cat.' For recognizable movie or animation characters, use their names if known.",
25+
"examples": [
26+
"A close-up of a brown, leaf-like insect camouflaged against a green background."
27+
]
28+
},
29+
"audio_description": {
30+
"type": "string",
31+
"description": "Generate a first-person audio description that narrates changes across scenes. Use 'we' and omit the movie title. Mention popular actors if known, and only include information that differs from the previous scene.",
32+
"examples": [
33+
"We see a man adjusting his hat in a mirror, looking straight at us."
34+
]
35+
}
36+
}
37+
},
38+
"scenario": "videoShot"
39+
}

notebooks/analyzer_training.ipynb

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,25 @@
6868
"import json\n",
6969
"import os\n",
7070
"import sys\n",
71+
"from pathlib import Path\n",
7172
"from dotenv import find_dotenv, load_dotenv\n",
73+
"from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n",
7274
"\n",
7375
"# import utility package from python samples root directory\n",
74-
"py_samples_root_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
75-
"sys.path.append(py_samples_root_dir)\n",
76+
"parent_dir = Path(Path.cwd()).parent\n",
77+
"sys.path.append(str(parent_dir))\n",
7678
"from python.content_understanding_client import AzureContentUnderstandingClient\n",
7779
"\n",
7880
"load_dotenv(find_dotenv())\n",
7981
"logging.basicConfig(level=logging.INFO)\n",
8082
"\n",
83+
"credential = DefaultAzureCredential()\n",
84+
"token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n",
85+
"\n",
8186
"client = AzureContentUnderstandingClient(\n",
8287
" endpoint=os.getenv(\"AZURE_AI_ENDPOINT\"),\n",
8388
" api_version=os.getenv(\"AZURE_AI_API_VERSION\", \"2024-12-01-preview\"),\n",
89+
" token_provider=token_provider,\n",
8490
" x_ms_useragent=\"azure-ai-content-understanding-python/analyzer_training\",\n",
8591
")"
8692
]

notebooks/content_extraction.ipynb

Lines changed: 116 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -51,22 +51,29 @@
5151
"import json\n",
5252
"import os\n",
5353
"import sys\n",
54+
"import uuid\n",
55+
"from pathlib import Path\n",
5456
"from dotenv import find_dotenv, load_dotenv\n",
57+
"from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n",
5558
"\n",
5659
"load_dotenv(find_dotenv())\n",
5760
"logging.basicConfig(level=logging.INFO)\n",
5861
"\n",
5962
"AZURE_AI_ENDPOINT = os.getenv(\"AZURE_AI_ENDPOINT\")\n",
6063
"AZURE_AI_API_VERSION = os.getenv(\"AZURE_AI_API_VERSION\", \"2024-12-01-preview\")\n",
6164
"\n",
62-
"# Import utility package from python samples root directory\n",
63-
"py_samples_root_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
64-
"sys.path.append(py_samples_root_dir)\n",
65+
"# Add the parent directory to the path to use shared modules\n",
66+
"parent_dir = Path(Path.cwd()).parent\n",
67+
"sys.path.append(str(parent_dir))\n",
6568
"from python.content_understanding_client import AzureContentUnderstandingClient\n",
6669
"\n",
70+
"credential = DefaultAzureCredential()\n",
71+
"token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n",
72+
"\n",
6773
"client = AzureContentUnderstandingClient(\n",
6874
" endpoint=AZURE_AI_ENDPOINT,\n",
6975
" api_version=AZURE_AI_API_VERSION,\n",
76+
" token_provider=token_provider,\n",
7077
" x_ms_useragent=\"azure-ai-content-understanding-python/content_extraction\",\n",
7178
")"
7279
]
@@ -90,25 +97,25 @@
9097
"name": "stderr",
9198
"output_type": "stream",
9299
"text": [
93-
"INFO:python.content_understanding_client:Analyzer field-extraction-sample-97d1d17d-29b6-4af1-9078-00650666fda1 create request accepted.\n",
100+
"INFO:python.content_understanding_client:Analyzer content-doc-sample-f79b4605-4990-4788-ba6a-47d55d244212 create request accepted.\n",
94101
"INFO:python.content_understanding_client:Request result is ready after 0.00 seconds.\n",
95-
"INFO:python.content_understanding_client:Analyzing file ../data/purchase_order.jpg with analyzer: field-extraction-sample-97d1d17d-29b6-4af1-9078-00650666fda1\n",
96-
"INFO:python.content_understanding_client:Request cc64dcc4-0797-45d5-b18e-49c77b5b1122 in progress ...\n",
97-
"INFO:python.content_understanding_client:Request cc64dcc4-0797-45d5-b18e-49c77b5b1122 in progress ...\n",
98-
"INFO:python.content_understanding_client:Request result is ready after 4.37 seconds.\n"
102+
"INFO:python.content_understanding_client:Analyzing file ../data/purchase_order.jpg with analyzer: content-doc-sample-f79b4605-4990-4788-ba6a-47d55d244212\n",
103+
"INFO:python.content_understanding_client:Request 6c1f5fea-8f2e-423b-afbe-cd35872379f6 in progress ...\n",
104+
"INFO:python.content_understanding_client:Request 6c1f5fea-8f2e-423b-afbe-cd35872379f6 in progress ...\n",
105+
"INFO:python.content_understanding_client:Request result is ready after 4.56 seconds.\n"
99106
]
100107
},
101108
{
102109
"name": "stdout",
103110
"output_type": "stream",
104111
"text": [
105112
"{\n",
106-
" \"id\": \"cc64dcc4-0797-45d5-b18e-49c77b5b1122\",\n",
113+
" \"id\": \"6c1f5fea-8f2e-423b-afbe-cd35872379f6\",\n",
107114
" \"status\": \"Succeeded\",\n",
108115
" \"result\": {\n",
109-
" \"analyzerId\": \"field-extraction-sample-97d1d17d-29b6-4af1-9078-00650666fda1\",\n",
116+
" \"analyzerId\": \"content-doc-sample-f79b4605-4990-4788-ba6a-47d55d244212\",\n",
110117
" \"apiVersion\": \"2024-12-01-preview\",\n",
111-
" \"createdAt\": \"2024-12-10T06:50:03Z\",\n",
118+
" \"createdAt\": \"2024-12-10T23:29:52Z\",\n",
112119
" \"warnings\": [],\n",
113120
" \"contents\": [\n",
114121
" {\n",
@@ -133,8 +140,6 @@
133140
}
134141
],
135142
"source": [
136-
"import uuid\n",
137-
"\n",
138143
"ANALYZER_ID = \"content-doc-sample-\" + str(uuid.uuid4())\n",
139144
"ANALYZER_TEMPLATE_FILE = '../analyzer_templates/content_document.json'\n",
140145
"ANALYZER_SAMPLE_FILE = '../data/purchase_order.jpg'\n",
@@ -147,7 +152,8 @@
147152
"response = client.begin_analyze(ANALYZER_ID, file_location=ANALYZER_SAMPLE_FILE)\n",
148153
"result = client.poll_result(response)\n",
149154
"\n",
150-
"print(json.dumps(result, indent=2))"
155+
"print(json.dumps(result, indent=2))\n",
156+
"client.delete_analyzer(ANALYZER_ID)"
151157
]
152158
},
153159
{
@@ -186,7 +192,8 @@
186192
"response = client.begin_analyze(ANALYZER_ID, file_location=ANALYZER_SAMPLE_FILE)\n",
187193
"result = client.poll_result(response)\n",
188194
"\n",
189-
"print(json.dumps(result, indent=2))"
195+
"print(json.dumps(result, indent=2))\n",
196+
"client.delete_analyzer(ANALYZER_ID)"
190197
]
191198
},
192199
{
@@ -222,8 +229,101 @@
222229
"response = client.begin_analyze(ANALYZER_ID, file_location=ANALYZER_SAMPLE_FILE)\n",
223230
"result = client.poll_result(response)\n",
224231
"\n",
232+
"print(json.dumps(result, indent=2))\n",
233+
"client.delete_analyzer(ANALYZER_ID)"
234+
]
235+
},
236+
{
237+
"cell_type": "markdown",
238+
"metadata": {},
239+
"source": [
240+
"## Video Content with Face"
241+
]
242+
},
243+
{
244+
"cell_type": "code",
245+
"execution_count": null,
246+
"metadata": {},
247+
"outputs": [],
248+
"source": [
249+
"ANALYZER_ID = \"content-video-face-sample-\" + str(uuid.uuid4())\n",
250+
"ANALYZER_TEMPLATE_FILE = '../analyzer_templates/face_aware_in_video.json'\n",
251+
"ANALYZER_SAMPLE_FILE = '../data/video.mp4'\n",
252+
"\n",
253+
"# Create analyzer\n",
254+
"response = client.begin_create_analyzer(ANALYZER_ID, analyzer_template_path=ANALYZER_TEMPLATE_FILE)\n",
255+
"result = client.poll_result(response)\n",
256+
"\n",
257+
"# Analyzer file\n",
258+
"response = client.begin_analyze(ANALYZER_ID, file_location=ANALYZER_SAMPLE_FILE)\n",
259+
"result = client.poll_result(response)\n",
260+
"\n",
225261
"print(json.dumps(result, indent=2))"
226262
]
263+
},
264+
{
265+
"cell_type": "markdown",
266+
"metadata": {},
267+
"source": [
268+
"### Get and Save Key Frames and Face Thumbnails"
269+
]
270+
},
271+
{
272+
"cell_type": "code",
273+
"execution_count": null,
274+
"metadata": {},
275+
"outputs": [],
276+
"source": [
277+
"from PIL import Image\n",
278+
"from io import BytesIO\n",
279+
"import re\n",
280+
"\n",
281+
"\n",
282+
"def save_image(image_id: str):\n",
283+
" raw_image = client.get_image_from_analyze_operation(analyze_response=response,\n",
284+
" image_id=image_id\n",
285+
" )\n",
286+
" image = Image.open(BytesIO(raw_image))\n",
287+
" # image.show()\n",
288+
" Path(\".cache\").mkdir(exist_ok=True)\n",
289+
" image.save(f\".cache/{image_id}.jpg\", \"JPEG\")\n",
290+
"\n",
291+
"\n",
292+
"# Initialize sets for unique face IDs and keyframe IDs\n",
293+
"face_ids = set()\n",
294+
"keyframe_ids = set()\n",
295+
"\n",
296+
"# Extract unique face IDs safely\n",
297+
"result_data = result.get(\"result\", {})\n",
298+
"contents = result_data.get(\"contents\", [])\n",
299+
"\n",
300+
"# Iterate over contents to find faces and keyframes if available\n",
301+
"for content in contents:\n",
302+
" # Safely retrieve face IDs if \"faces\" exists and is a list\n",
303+
" faces = content.get(\"faces\", [])\n",
304+
" if isinstance(faces, list):\n",
305+
" for face in faces:\n",
306+
" face_id = face.get(\"faceId\")\n",
307+
" if face_id:\n",
308+
" face_ids.add(f\"face.{face_id}\")\n",
309+
"\n",
310+
" # Extract keyframe IDs from \"markdown\" if it exists and is a string\n",
311+
" markdown_content = content.get(\"markdown\", \"\")\n",
312+
" if isinstance(markdown_content, str):\n",
313+
" keyframe_ids.update(re.findall(r\"(keyFrame\\.\\d+)\\.jpg\", markdown_content))\n",
314+
"\n",
315+
"# Output the results\n",
316+
"print(\"Unique Face IDs:\", face_ids)\n",
317+
"print(\"Unique Keyframe IDs:\", keyframe_ids)\n",
318+
"\n",
319+
"# Save all face images\n",
320+
"for face_id in face_ids:\n",
321+
" save_image(face_id)\n",
322+
"\n",
323+
"# Save all keyframe images\n",
324+
"for keyframe_id in keyframe_ids:\n",
325+
" save_image(keyframe_id)"
326+
]
227327
}
228328
],
229329
"metadata": {
@@ -242,7 +342,7 @@
242342
"name": "python",
243343
"nbconvert_exporter": "python",
244344
"pygments_lexer": "ipython3",
245-
"version": "3.8.10"
345+
"version": "3.11.11"
246346
}
247347
},
248348
"nbformat": 4,

0 commit comments

Comments
 (0)