Skip to content

Commit 402ec1b

Browse files
authored
Add extraction samples (#2)
* extraction * env * env * env * env * clean
1 parent decd278 commit 402ec1b

20 files changed

+929
-47
lines changed

.devcontainer/devcontainer.json

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"name": "Azure Content Understanding Demo",
3+
"image": "mcr.microsoft.com/devcontainers/python:3.11",
4+
"customizations": {
5+
"vscode": {
6+
"extensions": [
7+
"ms-azuretools.azure-dev",
8+
"ms-azuretools.vscode-bicep",
9+
"ms-python.python",
10+
"ms-toolsai.jupyter",
11+
"esbenp.prettier-vscode"
12+
]
13+
}
14+
}
15+
}

.gitattributes

+254
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
* text=auto
2+
*.sh text eol=lf
3+
run text eol=lf
4+
5+
# lfs tracked extensions
6+
*.dll filter=lfs diff=lfs merge=lfs -text
7+
*.lzma filter=lfs diff=lfs merge=lfs -text
8+
*.ogv filter=lfs diff=lfs merge=lfs -text
9+
*.thmx filter=lfs diff=lfs merge=lfs -text
10+
*.mov filter=lfs diff=lfs merge=lfs -text
11+
*.raw filter=lfs diff=lfs merge=lfs -text
12+
*.scpt filter=lfs diff=lfs merge=lfs -text
13+
*.dvb filter=lfs diff=lfs merge=lfs -text
14+
*.wmv filter=lfs diff=lfs merge=lfs -text
15+
*.xlsm filter=lfs diff=lfs merge=lfs -text
16+
*.tif filter=lfs diff=lfs merge=lfs -text
17+
*.cr2 filter=lfs diff=lfs merge=lfs -text
18+
*.gzip filter=lfs diff=lfs merge=lfs -text
19+
*.pnm filter=lfs diff=lfs merge=lfs -text
20+
*.ppm filter=lfs diff=lfs merge=lfs -text
21+
*.tbz2 filter=lfs diff=lfs merge=lfs -text
22+
*.rgb filter=lfs diff=lfs merge=lfs -text
23+
*.ttc filter=lfs diff=lfs merge=lfs -text
24+
*.aiff filter=lfs diff=lfs merge=lfs -text
25+
*.avi filter=lfs diff=lfs merge=lfs -text
26+
*.fh filter=lfs diff=lfs merge=lfs -text
27+
*.m3u filter=lfs diff=lfs merge=lfs -text
28+
*.pot filter=lfs diff=lfs merge=lfs -text
29+
*.sketch filter=lfs diff=lfs merge=lfs -text
30+
*.zipx filter=lfs diff=lfs merge=lfs -text
31+
*.dmg filter=lfs diff=lfs merge=lfs -text
32+
*.fla filter=lfs diff=lfs merge=lfs -text
33+
*.h263 filter=lfs diff=lfs merge=lfs -text
34+
*.ico filter=lfs diff=lfs merge=lfs -text
35+
*.mmr filter=lfs diff=lfs merge=lfs -text
36+
*.pyv filter=lfs diff=lfs merge=lfs -text
37+
*.cab filter=lfs diff=lfs merge=lfs -text
38+
*.pbm filter=lfs diff=lfs merge=lfs -text
39+
*.rmvb filter=lfs diff=lfs merge=lfs -text
40+
*.xmind filter=lfs diff=lfs merge=lfs -text
41+
*.xpm filter=lfs diff=lfs merge=lfs -text
42+
*.aif filter=lfs diff=lfs merge=lfs -text
43+
*.f4v filter=lfs diff=lfs merge=lfs -text
44+
*.npx filter=lfs diff=lfs merge=lfs -text
45+
*.webm filter=lfs diff=lfs merge=lfs -text
46+
*.xlam filter=lfs diff=lfs merge=lfs -text
47+
*.docx filter=lfs diff=lfs merge=lfs -text
48+
*.lib filter=lfs diff=lfs merge=lfs -text
49+
*.udf filter=lfs diff=lfs merge=lfs -text
50+
*.wim filter=lfs diff=lfs merge=lfs -text
51+
*.woff2 filter=lfs diff=lfs merge=lfs -text
52+
*.h264 filter=lfs diff=lfs merge=lfs -text
53+
*.mj2 filter=lfs diff=lfs merge=lfs -text
54+
*.oga filter=lfs diff=lfs merge=lfs -text
55+
*.alz filter=lfs diff=lfs merge=lfs -text
56+
*.btif filter=lfs diff=lfs merge=lfs -text
57+
*.s3m filter=lfs diff=lfs merge=lfs -text
58+
*.pcx filter=lfs diff=lfs merge=lfs -text
59+
*.wbmp filter=lfs diff=lfs merge=lfs -text
60+
*.adp filter=lfs diff=lfs merge=lfs -text
61+
*.cmx filter=lfs diff=lfs merge=lfs -text
62+
*.xls filter=lfs diff=lfs merge=lfs -text
63+
*.xpi filter=lfs diff=lfs merge=lfs -text
64+
*.rlc filter=lfs diff=lfs merge=lfs -text
65+
*.JPEG filter=lfs diff=lfs merge=lfs -text
66+
*.ktx filter=lfs diff=lfs merge=lfs -text
67+
*.rar filter=lfs diff=lfs merge=lfs -text
68+
*.rip filter=lfs diff=lfs merge=lfs -text
69+
*.au filter=lfs diff=lfs merge=lfs -text
70+
*.h261 filter=lfs diff=lfs merge=lfs -text
71+
*.ogg filter=lfs diff=lfs merge=lfs -text
72+
*.class filter=lfs diff=lfs merge=lfs -text
73+
*.djvu filter=lfs diff=lfs merge=lfs -text
74+
*.lzo filter=lfs diff=lfs merge=lfs -text
75+
*.viv filter=lfs diff=lfs merge=lfs -text
76+
*.weba filter=lfs diff=lfs merge=lfs -text
77+
*.DS_Store filter=lfs diff=lfs merge=lfs -text
78+
*.ras filter=lfs diff=lfs merge=lfs -text
79+
*.jpgv filter=lfs diff=lfs merge=lfs -text
80+
*.pages filter=lfs diff=lfs merge=lfs -text
81+
*.wax filter=lfs diff=lfs merge=lfs -text
82+
*.caf filter=lfs diff=lfs merge=lfs -text
83+
*.dts filter=lfs diff=lfs merge=lfs -text
84+
*.pps filter=lfs diff=lfs merge=lfs -text
85+
*.ppsx filter=lfs diff=lfs merge=lfs -text
86+
*.tgz filter=lfs diff=lfs merge=lfs -text
87+
*.xz filter=lfs diff=lfs merge=lfs -text
88+
*.dwg filter=lfs diff=lfs merge=lfs -text
89+
*.fli filter=lfs diff=lfs merge=lfs -text
90+
*.lvp filter=lfs diff=lfs merge=lfs -text
91+
*.otf filter=lfs diff=lfs merge=lfs -text
92+
*.wav filter=lfs diff=lfs merge=lfs -text
93+
*.PNG filter=lfs diff=lfs merge=lfs -text
94+
*.rtf filter=lfs diff=lfs merge=lfs -text
95+
*.whl filter=lfs diff=lfs merge=lfs -text
96+
*.bin filter=lfs diff=lfs merge=lfs -text
97+
*.dra filter=lfs diff=lfs merge=lfs -text
98+
*.eot filter=lfs diff=lfs merge=lfs -text
99+
*.img filter=lfs diff=lfs merge=lfs -text
100+
*.lzh filter=lfs diff=lfs merge=lfs -text
101+
*.dotm filter=lfs diff=lfs merge=lfs -text
102+
*.flac filter=lfs diff=lfs merge=lfs -text
103+
*.mng filter=lfs diff=lfs merge=lfs -text
104+
*.qt filter=lfs diff=lfs merge=lfs -text
105+
*.ttf filter=lfs diff=lfs merge=lfs -text
106+
*.fvt filter=lfs diff=lfs merge=lfs -text
107+
*.JPG filter=lfs diff=lfs merge=lfs -text
108+
*.movie filter=lfs diff=lfs merge=lfs -text
109+
*.pic filter=lfs diff=lfs merge=lfs -text
110+
*.ppsm filter=lfs diff=lfs merge=lfs -text
111+
*.tiff filter=lfs diff=lfs merge=lfs -text
112+
*.epub filter=lfs diff=lfs merge=lfs -text
113+
*.g3 filter=lfs diff=lfs merge=lfs -text
114+
*.jxr filter=lfs diff=lfs merge=lfs -text
115+
*.midi filter=lfs diff=lfs merge=lfs -text
116+
*.mobi filter=lfs diff=lfs merge=lfs -text
117+
*.apk filter=lfs diff=lfs merge=lfs -text
118+
*.pdb filter=lfs diff=lfs merge=lfs -text
119+
*.pptm filter=lfs diff=lfs merge=lfs -text
120+
*.shar filter=lfs diff=lfs merge=lfs -text
121+
*.mp3 filter=lfs diff=lfs merge=lfs -text
122+
*.nef filter=lfs diff=lfs merge=lfs -text
123+
*.so filter=lfs diff=lfs merge=lfs -text
124+
*.3gp filter=lfs diff=lfs merge=lfs -text
125+
*.bz2 filter=lfs diff=lfs merge=lfs -text
126+
*.bzip2 filter=lfs diff=lfs merge=lfs -text
127+
*.egg filter=lfs diff=lfs merge=lfs -text
128+
*.graffle filter=lfs diff=lfs merge=lfs -text
129+
*.tlz filter=lfs diff=lfs merge=lfs -text
130+
*.xm filter=lfs diff=lfs merge=lfs -text
131+
*.xwd filter=lfs diff=lfs merge=lfs -text
132+
*.bh filter=lfs diff=lfs merge=lfs -text
133+
*.mka filter=lfs diff=lfs merge=lfs -text
134+
*.pdf filter=lfs diff=lfs merge=lfs -text
135+
*.dcm filter=lfs diff=lfs merge=lfs -text
136+
*.mpga filter=lfs diff=lfs merge=lfs -text
137+
*.wma filter=lfs diff=lfs merge=lfs -text
138+
*.ipa filter=lfs diff=lfs merge=lfs -text
139+
*.mpeg filter=lfs diff=lfs merge=lfs -text
140+
*.dxf filter=lfs diff=lfs merge=lfs -text
141+
*.smv filter=lfs diff=lfs merge=lfs -text
142+
*.tar filter=lfs diff=lfs merge=lfs -text
143+
*.xif filter=lfs diff=lfs merge=lfs -text
144+
*.ai filter=lfs diff=lfs merge=lfs -text
145+
*.asf filter=lfs diff=lfs merge=lfs -text
146+
*.mkv filter=lfs diff=lfs merge=lfs -text
147+
*.tbz filter=lfs diff=lfs merge=lfs -text
148+
*.wdp filter=lfs diff=lfs merge=lfs -text
149+
*.dng filter=lfs diff=lfs merge=lfs -text
150+
*.xltx filter=lfs diff=lfs merge=lfs -text
151+
*.jpm filter=lfs diff=lfs merge=lfs -text
152+
*.3g2 filter=lfs diff=lfs merge=lfs -text
153+
*.csv filter=lfs diff=lfs merge=lfs -text
154+
*.doc filter=lfs diff=lfs merge=lfs -text
155+
*.lz filter=lfs diff=lfs merge=lfs -text
156+
*.xlsx filter=lfs diff=lfs merge=lfs -text
157+
*.fpx filter=lfs diff=lfs merge=lfs -text
158+
*.fst filter=lfs diff=lfs merge=lfs -text
159+
*.mxu filter=lfs diff=lfs merge=lfs -text
160+
*.slk filter=lfs diff=lfs merge=lfs -text
161+
*.xlt filter=lfs diff=lfs merge=lfs -text
162+
*.dot filter=lfs diff=lfs merge=lfs -text
163+
*.jpeg filter=lfs diff=lfs merge=lfs -text
164+
*.mp4 filter=lfs diff=lfs merge=lfs -text
165+
*.ppa filter=lfs diff=lfs merge=lfs -text
166+
*.uvp filter=lfs diff=lfs merge=lfs -text
167+
*.cpio filter=lfs diff=lfs merge=lfs -text
168+
*.pptx filter=lfs diff=lfs merge=lfs -text
169+
*.mid filter=lfs diff=lfs merge=lfs -text
170+
*.pya filter=lfs diff=lfs merge=lfs -text
171+
*.arj filter=lfs diff=lfs merge=lfs -text
172+
*.bak filter=lfs diff=lfs merge=lfs -text
173+
*.cgm filter=lfs diff=lfs merge=lfs -text
174+
*.xbm filter=lfs diff=lfs merge=lfs -text
175+
*.ape filter=lfs diff=lfs merge=lfs -text
176+
*.jar filter=lfs diff=lfs merge=lfs -text
177+
*.key filter=lfs diff=lfs merge=lfs -text
178+
*.uvi filter=lfs diff=lfs merge=lfs -text
179+
*.wmx filter=lfs diff=lfs merge=lfs -text
180+
*.baml filter=lfs diff=lfs merge=lfs -text
181+
*.gif filter=lfs diff=lfs merge=lfs -text
182+
*.potx filter=lfs diff=lfs merge=lfs -text
183+
*.rz filter=lfs diff=lfs merge=lfs -text
184+
*.s7z filter=lfs diff=lfs merge=lfs -text
185+
*.z filter=lfs diff=lfs merge=lfs -text
186+
*.aac filter=lfs diff=lfs merge=lfs -text
187+
*.png filter=lfs diff=lfs merge=lfs -text
188+
*.uvh filter=lfs diff=lfs merge=lfs -text
189+
*.wm filter=lfs diff=lfs merge=lfs -text
190+
*.wvx filter=lfs diff=lfs merge=lfs -text
191+
*.bk filter=lfs diff=lfs merge=lfs -text
192+
*.flv filter=lfs diff=lfs merge=lfs -text
193+
*.m4a filter=lfs diff=lfs merge=lfs -text
194+
*.ppt filter=lfs diff=lfs merge=lfs -text
195+
*.3ds filter=lfs diff=lfs merge=lfs -text
196+
*.pyc filter=lfs diff=lfs merge=lfs -text
197+
*.vob filter=lfs diff=lfs merge=lfs -text
198+
*.swf filter=lfs diff=lfs merge=lfs -text
199+
*.exe filter=lfs diff=lfs merge=lfs -text
200+
*.woff filter=lfs diff=lfs merge=lfs -text
201+
*.pea filter=lfs diff=lfs merge=lfs -text
202+
*.psd filter=lfs diff=lfs merge=lfs -text
203+
*.pyo filter=lfs diff=lfs merge=lfs -text
204+
*.resources filter=lfs diff=lfs merge=lfs -text
205+
*.tga filter=lfs diff=lfs merge=lfs -text
206+
*.BMP filter=lfs diff=lfs merge=lfs -text
207+
*.fbs filter=lfs diff=lfs merge=lfs -text
208+
*.numbers filter=lfs diff=lfs merge=lfs -text
209+
*.war filter=lfs diff=lfs merge=lfs -text
210+
*.xla filter=lfs diff=lfs merge=lfs -text
211+
*.o filter=lfs diff=lfs merge=lfs -text
212+
*.pgm filter=lfs diff=lfs merge=lfs -text
213+
*.potm filter=lfs diff=lfs merge=lfs -text
214+
*.dtshd filter=lfs diff=lfs merge=lfs -text
215+
*.gz filter=lfs diff=lfs merge=lfs -text
216+
*.icns filter=lfs diff=lfs merge=lfs -text
217+
*.m4v filter=lfs diff=lfs merge=lfs -text
218+
*.mht filter=lfs diff=lfs merge=lfs -text
219+
*.zip filter=lfs diff=lfs merge=lfs -text
220+
*.sgi filter=lfs diff=lfs merge=lfs -text
221+
*.sub filter=lfs diff=lfs merge=lfs -text
222+
*.uvu filter=lfs diff=lfs merge=lfs -text
223+
*.cur filter=lfs diff=lfs merge=lfs -text
224+
*.eol filter=lfs diff=lfs merge=lfs -text
225+
*.jpg filter=lfs diff=lfs merge=lfs -text
226+
*.mpg filter=lfs diff=lfs merge=lfs -text
227+
*.rmf filter=lfs diff=lfs merge=lfs -text
228+
*.xltm filter=lfs diff=lfs merge=lfs -text
229+
*.uvs filter=lfs diff=lfs merge=lfs -text
230+
*.xlsb filter=lfs diff=lfs merge=lfs -text
231+
*.a filter=lfs diff=lfs merge=lfs -text
232+
*.bmp filter=lfs diff=lfs merge=lfs -text
233+
*.docm filter=lfs diff=lfs merge=lfs -text
234+
*.iso filter=lfs diff=lfs merge=lfs -text
235+
*.mar filter=lfs diff=lfs merge=lfs -text
236+
*.ecelp7470 filter=lfs diff=lfs merge=lfs -text
237+
*.ecelp9600 filter=lfs diff=lfs merge=lfs -text
238+
*.ief filter=lfs diff=lfs merge=lfs -text
239+
*.lha filter=lfs diff=lfs merge=lfs -text
240+
*.ar filter=lfs diff=lfs merge=lfs -text
241+
*.dat filter=lfs diff=lfs merge=lfs -text
242+
*.ppam filter=lfs diff=lfs merge=lfs -text
243+
*.deb filter=lfs diff=lfs merge=lfs -text
244+
*.mp4a filter=lfs diff=lfs merge=lfs -text
245+
*.txz filter=lfs diff=lfs merge=lfs -text
246+
*.ecelp4800 filter=lfs diff=lfs merge=lfs -text
247+
*.GIF filter=lfs diff=lfs merge=lfs -text
248+
*.mdi filter=lfs diff=lfs merge=lfs -text
249+
*.uvm filter=lfs diff=lfs merge=lfs -text
250+
*.webp filter=lfs diff=lfs merge=lfs -text
251+
*.7z filter=lfs diff=lfs merge=lfs -text
252+
*.dsk filter=lfs diff=lfs merge=lfs -text
253+
*.sil filter=lfs diff=lfs merge=lfs -text
254+
*.dex filter=lfs diff=lfs merge=lfs -text

CHANGELOG.md

-13
This file was deleted.

CODE_OF_CONDUCT.md

-10
This file was deleted.

LICENSE.md

-21
This file was deleted.

README.md

+23-3
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,34 @@ Welcome! Content Understanding is a solution that analyzes and comprehends vario
55
- The contents of this repository default to the latest preview version: **(2024-12-01-preview)**.
66

77

8-
[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new?skip_quickstart=true&machine=basicLinux32gb&repo=879881662&ref=main&geo=UsEast)
9-
10-
118
## Features
129

1310
Azure AI Content Understanding is a new Generative AI based [Azure AI service](https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/overview), designed to process/ingest content of any types (document, image, audio, and video) into an user-defined output format. Content Understanding offers a streamlined process to reason over large amounts of unstructured data, accelerating time-to-value by generating an output that can be integrated into automation and analytical workflows.
1411

1512

13+
## Sample List
14+
| File | Description |
15+
| --- | --- |
16+
| [field_extraction.ipynb](notebooks/field_extraction.ipynb) | Extract customized fields defined in analyzer templates |
17+
| [content_extraction.ipynb](notebooks/content_extraction.ipynb) | Extract structrued content understanding result from your input files |
18+
| [analyzer_training.ipynb](notebooks/analyzer_training.ipynb) | Provide training data to improve quality of your analyzer |
19+
20+
21+
22+
## Prerequisites
23+
24+
1. To get started, you need an active [Azure account](https://azure.microsoft.com/free/cognitive-services/). If you don't have one, you can [create a free subscription](https://azure.microsoft.com/free/).
25+
1. Once you have Azure subscription, create an [Content Understanding Service and Get endpoint and keys](Create_Content_Understanding_Service.md).
26+
27+
28+
29+
## Getting Started
30+
31+
### GitHub Codespaces
32+
You can run this repo virtually by using GitHub Codespaces, which will open a web-based VS Code in your browser:
33+
34+
[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new?skip_quickstart=true&machine=basicLinux32gb&repo=899687170&ref=main&geo=UsEast&devcontainer_path=.devcontainer%2Fdevcontainer.json)
35+
1636
### Note
1737

1838
>Trademarks This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft’s Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party’s policies.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"analyzerId": "sample_chart_analyzer",
3+
"description": "Sample call transcript analyzer",
4+
"scenario": "callCenter",
5+
"config": {
6+
"returnDetails": true,
7+
"locales": ["en-US"]
8+
},
9+
"fieldSchema": {
10+
"fields": {
11+
"Summary": {
12+
"type": "string",
13+
"method": "generate"
14+
},
15+
"Sentiment": {
16+
"type": "string",
17+
"method": "classify",
18+
"enum": [ "Positive", "Neutral", "Negative" ]
19+
},
20+
"People": {
21+
"type": "array",
22+
"description": "List of people mentioned",
23+
"items": {
24+
"type": "object",
25+
"properties": {
26+
"Name": { "type": "string" },
27+
"Role": { "type": "string" }
28+
}
29+
}
30+
}
31+
}
32+
}
33+
}

0 commit comments

Comments
 (0)