Update (#11)

timctho · web-flow · commit 9cf84f25aa96 · 2024-12-10T15:41:29.000-08:00
* update

* add face
diff --git a/README.md b/README.md
@@ -24,11 +24,10 @@ You can run this repo virtually by using GitHub Codespaces, which will open a we
 [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new?skip_quickstart=true&machine=basicLinux32gb&repo=899687170&ref=main&geo=UsEast&devcontainer_path=.devcontainer%2Fdevcontainer.json)
 
 ### Local environment
-
 1. Make sure the following tools are installed:
 
     * [Azure Developer CLI (azd)](https://aka.ms/install-azd)
-    * [Python 3.9+](https://www.python.org/downloads/)
+    * [Python 3.11+](https://www.python.org/downloads/)
 
 2. Make a new directory called `azure-ai-content-understanding-python` and clone this template into it using the `azd` CLI:
 
@@ -40,6 +39,7 @@ You can run this repo virtually by using GitHub Codespaces, which will open a we
 
 ## Configure Azure AI service resource
 ### (Option 1) Use `azd` commands to auto create temporal resources to run sample
+1. Make sure you have permission to grant roles under subscription
 1. Login Azure
     ```shell
     azd auth login
diff --git a/analyzer_templates/face_aware_in_video.json b/analyzer_templates/face_aware_in_video.json
@@ -0,0 +1,39 @@
+{
+    "description": "Generate face-aware content understanding from video.",
+    "config": {
+        "enableFace": true,
+        "returnDetails": true,
+        "locales": [
+            "en-US",
+            "es-ES",
+            "es-MX",
+            "fr-FR",
+            "hi-IN",
+            "it-IT",
+            "ja-JP",
+            "ko-KR",
+            "pt-BR",
+            "zh-CN"
+        ]
+    },
+    "fieldSchema": {
+        "description": "Analyze videos to extract faces",
+        "fields": {
+            "description": {
+                "type": "string",
+                "description": "Describe what happened in the video segment. Include all significant details and be specific where possible. For example, use 'man,' 'woman,' 'child,' etc., rather than 'person,' and specify animals like 'dog' or 'cat.' For recognizable movie or animation characters, use their names if known.",
+                "examples": [
+                    "A close-up of a brown, leaf-like insect camouflaged against a green background."
+                ]
+            },
+            "audio_description": {
+                "type": "string",
+                "description": "Generate a first-person audio description that narrates changes across scenes. Use 'we' and omit the movie title. Mention popular actors if known, and only include information that differs from the previous scene.",
+                "examples": [
+                    "We see a man adjusting his hat in a mirror, looking straight at us."
+                ]
+            }
+        }
+    },
+    "scenario": "videoShot"
+}
diff --git a/notebooks/analyzer_training.ipynb b/notebooks/analyzer_training.ipynb
@@ -68,19 +68,25 @@
     "import json\n",
     "import os\n",
     "import sys\n",
+    "from pathlib import Path\n",
     "from dotenv import find_dotenv, load_dotenv\n",
+    "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n",
     "\n",
     "# import utility package from python samples root directory\n",
-    "py_samples_root_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
-    "sys.path.append(py_samples_root_dir)\n",
+    "parent_dir = Path(Path.cwd()).parent\n",
+    "sys.path.append(str(parent_dir))\n",
     "from python.content_understanding_client import AzureContentUnderstandingClient\n",
     "\n",
     "load_dotenv(find_dotenv())\n",
     "logging.basicConfig(level=logging.INFO)\n",
     "\n",
+    "credential = DefaultAzureCredential()\n",
+    "token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n",
+    "\n",
     "client = AzureContentUnderstandingClient(\n",
     "    endpoint=os.getenv(\"AZURE_AI_ENDPOINT\"),\n",
     "    api_version=os.getenv(\"AZURE_AI_API_VERSION\", \"2024-12-01-preview\"),\n",
+    "    token_provider=token_provider,\n",
     "    x_ms_useragent=\"azure-ai-content-understanding-python/analyzer_training\",\n",
     ")"
    ]
diff --git a/notebooks/content_extraction.ipynb b/notebooks/content_extraction.ipynb
@@ -51,22 +51,29 @@
     "import json\n",
     "import os\n",
     "import sys\n",
+    "import uuid\n",
+    "from pathlib import Path\n",
     "from dotenv import find_dotenv, load_dotenv\n",
+    "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n",
     "\n",
     "load_dotenv(find_dotenv())\n",
     "logging.basicConfig(level=logging.INFO)\n",
     "\n",
     "AZURE_AI_ENDPOINT = os.getenv(\"AZURE_AI_ENDPOINT\")\n",
     "AZURE_AI_API_VERSION = os.getenv(\"AZURE_AI_API_VERSION\", \"2024-12-01-preview\")\n",
     "\n",
-    "# Import utility package from python samples root directory\n",
-    "py_samples_root_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
-    "sys.path.append(py_samples_root_dir)\n",
+    "# Add the parent directory to the path to use shared modules\n",
+    "parent_dir = Path(Path.cwd()).parent\n",
+    "sys.path.append(str(parent_dir))\n",
     "from python.content_understanding_client import AzureContentUnderstandingClient\n",
     "\n",
+    "credential = DefaultAzureCredential()\n",
+    "token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n",
+    "\n",
     "client = AzureContentUnderstandingClient(\n",
     "    endpoint=AZURE_AI_ENDPOINT,\n",
     "    api_version=AZURE_AI_API_VERSION,\n",
+    "    token_provider=token_provider,\n",
     "    x_ms_useragent=\"azure-ai-content-understanding-python/content_extraction\",\n",
     ")"
    ]
@@ -90,25 +97,25 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:python.content_understanding_client:Analyzer field-extraction-sample-97d1d17d-29b6-4af1-9078-00650666fda1 create request accepted.\n",
+      "INFO:python.content_understanding_client:Analyzer content-doc-sample-f79b4605-4990-4788-ba6a-47d55d244212 create request accepted.\n",
       "INFO:python.content_understanding_client:Request result is ready after 0.00 seconds.\n",
-      "INFO:python.content_understanding_client:Analyzing file ../data/purchase_order.jpg with analyzer: field-extraction-sample-97d1d17d-29b6-4af1-9078-00650666fda1\n",
-      "INFO:python.content_understanding_client:Request cc64dcc4-0797-45d5-b18e-49c77b5b1122 in progress ...\n",
-      "INFO:python.content_understanding_client:Request cc64dcc4-0797-45d5-b18e-49c77b5b1122 in progress ...\n",
-      "INFO:python.content_understanding_client:Request result is ready after 4.37 seconds.\n"
+      "INFO:python.content_understanding_client:Analyzing file ../data/purchase_order.jpg with analyzer: content-doc-sample-f79b4605-4990-4788-ba6a-47d55d244212\n",
+      "INFO:python.content_understanding_client:Request 6c1f5fea-8f2e-423b-afbe-cd35872379f6 in progress ...\n",
+      "INFO:python.content_understanding_client:Request 6c1f5fea-8f2e-423b-afbe-cd35872379f6 in progress ...\n",
+      "INFO:python.content_understanding_client:Request result is ready after 4.56 seconds.\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "{\n",
-      "  \"id\": \"cc64dcc4-0797-45d5-b18e-49c77b5b1122\",\n",
+      "  \"id\": \"6c1f5fea-8f2e-423b-afbe-cd35872379f6\",\n",
       "  \"status\": \"Succeeded\",\n",
       "  \"result\": {\n",
-      "    \"analyzerId\": \"field-extraction-sample-97d1d17d-29b6-4af1-9078-00650666fda1\",\n",
+      "    \"analyzerId\": \"content-doc-sample-f79b4605-4990-4788-ba6a-47d55d244212\",\n",
       "    \"apiVersion\": \"2024-12-01-preview\",\n",
-      "    \"createdAt\": \"2024-12-10T06:50:03Z\",\n",
+      "    \"createdAt\": \"2024-12-10T23:29:52Z\",\n",
       "    \"warnings\": [],\n",
       "    \"contents\": [\n",
       "      {\n",
@@ -133,8 +140,6 @@
     }
    ],
    "source": [
-    "import uuid\n",
-    "\n",
     "ANALYZER_ID = \"content-doc-sample-\" + str(uuid.uuid4())\n",
     "ANALYZER_TEMPLATE_FILE = '../analyzer_templates/content_document.json'\n",
     "ANALYZER_SAMPLE_FILE = '../data/purchase_order.jpg'\n",
@@ -147,7 +152,8 @@
     "response = client.begin_analyze(ANALYZER_ID, file_location=ANALYZER_SAMPLE_FILE)\n",
     "result = client.poll_result(response)\n",
     "\n",
-    "print(json.dumps(result, indent=2))"
+    "print(json.dumps(result, indent=2))\n",
+    "client.delete_analyzer(ANALYZER_ID)"
    ]
   },
   {
@@ -186,7 +192,8 @@
     "response = client.begin_analyze(ANALYZER_ID, file_location=ANALYZER_SAMPLE_FILE)\n",
     "result = client.poll_result(response)\n",
     "\n",
-    "print(json.dumps(result, indent=2))"
+    "print(json.dumps(result, indent=2))\n",
+    "client.delete_analyzer(ANALYZER_ID)"
    ]
   },
   {
@@ -222,8 +229,101 @@
     "response = client.begin_analyze(ANALYZER_ID, file_location=ANALYZER_SAMPLE_FILE)\n",
     "result = client.poll_result(response)\n",
     "\n",
+    "print(json.dumps(result, indent=2))\n",
+    "client.delete_analyzer(ANALYZER_ID)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Video Content with Face"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ANALYZER_ID = \"content-video-face-sample-\" + str(uuid.uuid4())\n",
+    "ANALYZER_TEMPLATE_FILE = '../analyzer_templates/face_aware_in_video.json'\n",
+    "ANALYZER_SAMPLE_FILE = '../data/video.mp4'\n",
+    "\n",
+    "# Create analyzer\n",
+    "response = client.begin_create_analyzer(ANALYZER_ID, analyzer_template_path=ANALYZER_TEMPLATE_FILE)\n",
+    "result = client.poll_result(response)\n",
+    "\n",
+    "# Analyzer file\n",
+    "response = client.begin_analyze(ANALYZER_ID, file_location=ANALYZER_SAMPLE_FILE)\n",
+    "result = client.poll_result(response)\n",
+    "\n",
     "print(json.dumps(result, indent=2))"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Get and Save Key Frames and Face Thumbnails"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "from io import BytesIO\n",
+    "import re\n",
+    "\n",
+    "\n",
+    "def save_image(image_id: str):\n",
+    "    raw_image = client.get_image_from_analyze_operation(analyze_response=response,\n",
+    "        image_id=image_id\n",
+    "    )\n",
+    "    image = Image.open(BytesIO(raw_image))\n",
+    "    # image.show()\n",
+    "    Path(\".cache\").mkdir(exist_ok=True)\n",
+    "    image.save(f\".cache/{image_id}.jpg\", \"JPEG\")\n",
+    "\n",
+    "\n",
+    "# Initialize sets for unique face IDs and keyframe IDs\n",
+    "face_ids = set()\n",
+    "keyframe_ids = set()\n",
+    "\n",
+    "# Extract unique face IDs safely\n",
+    "result_data = result.get(\"result\", {})\n",
+    "contents = result_data.get(\"contents\", [])\n",
+    "\n",
+    "# Iterate over contents to find faces and keyframes if available\n",
+    "for content in contents:\n",
+    "    # Safely retrieve face IDs if \"faces\" exists and is a list\n",
+    "    faces = content.get(\"faces\", [])\n",
+    "    if isinstance(faces, list):\n",
+    "        for face in faces:\n",
+    "            face_id = face.get(\"faceId\")\n",
+    "            if face_id:\n",
+    "                face_ids.add(f\"face.{face_id}\")\n",
+    "\n",
+    "    # Extract keyframe IDs from \"markdown\" if it exists and is a string\n",
+    "    markdown_content = content.get(\"markdown\", \"\")\n",
+    "    if isinstance(markdown_content, str):\n",
+    "        keyframe_ids.update(re.findall(r\"(keyFrame\\.\\d+)\\.jpg\", markdown_content))\n",
+    "\n",
+    "# Output the results\n",
+    "print(\"Unique Face IDs:\", face_ids)\n",
+    "print(\"Unique Keyframe IDs:\", keyframe_ids)\n",
+    "\n",
+    "# Save all face images\n",
+    "for face_id in face_ids:\n",
+    "    save_image(face_id)\n",
+    "\n",
+    "# Save all keyframe images\n",
+    "for keyframe_id in keyframe_ids:\n",
+    "    save_image(keyframe_id)"
+   ]
   }
  ],
  "metadata": {
@@ -242,7 +342,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.11.11"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/field_extraction.ipynb b/notebooks/field_extraction.ipynb
diff --git a/python/content_understanding_client.py b/python/content_understanding_client.py