Skip to content

Commit 99e019f

Browse files
committed
Update examples
1 parent fc87aae commit 99e019f

14 files changed

+52
-52
lines changed

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
![Textractor](https://raw.githubusercontent.com/aws-samples/amazon-textract-textractor/5716c52e8a39c063f43e058e1637e4984a4b2da4/docs/source/textractor_cropped.png)
22

3-
[![Tests](https://github.com/aws-samples/amazon-textract-textractor/actions/workflows/tests.yml/badge.svg)](https://github.com/aws-samples/amazon-textract-textractor/actions/workflows/tests.yml) [![Documentation](https://github.com/aws-samples/amazon-textract-textractor/actions/workflows/documentation.yml/badge.svg)](https://aws-samples.github.io/amazon-textract-textractor/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
3+
[![Tests](https://github.com/aws-samples/amazon-textract-textractor/actions/workflows/tests.yml/badge.svg)](https://github.com/aws-samples/amazon-textract-textractor/actions/workflows/tests.yml) [![Documentation](https://github.com/aws-samples/amazon-textract-textractor/actions/workflows/documentation.yml/badge.svg)](https://aws-samples.github.io/amazon-textract-textractor/) [![PyPI version](https://badge.fury.io/py/amazon-textract-textractor.svg)](https://pypi.org/project/amazon-textract-textractor/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
44

55
**Textractor** is a python package created to seamlessly work with [Amazon Textract](https://docs.aws.amazon.com/textract/latest/dg/what-is.html) a document intelligence service offering text recognition, table extraction, form processing, and much more. Whether you are making a one-off script or a complex distributed document processing pipeline, Textractor makes it easy to use Textract.
66

@@ -65,9 +65,9 @@ document.tables[0].to_excel("output.xlsx")
6565
### Analyze ID
6666

6767
```py
68-
document = extractor.analyze_id(file_source="tests/fixtures/fake_id.jpg")
68+
document = extractor.analyze_id(file_source="tests/fixtures/fake_id.png")
6969
print(document.identity_documents[0].get("FIRST_NAME"))
70-
# 'FAKEID'
70+
# 'MARIA'
7171
```
7272

7373
### Receipt processing (Analyze Expense)

docs/source/notebooks/exporting_form_data.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"document = extractor.start_document_analysis(\n",
4545
" # Here we pass a Pillow image instead of path. This changes nothing as\n",
4646
" # Textractor supports most input types.\n",
47-
" file_source=Image.open(\"../../../tests/fixtures/invoice1.png\"),\n",
47+
" file_source=Image.open(\"../../../tests/fixtures/form.png\"),\n",
4848
" # We specify the features that we want, here, we only want keys and values\n",
4949
" # therefore we use TextractFeatures.FORMS.\n",
5050
" features=[TextractFeatures.FORMS],\n",

docs/source/notebooks/going_further.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
"\n",
3737
"extractor = Textractor(profile_name=\"default\")\n",
3838
"# This path assumes that you are running the notebook from docs/source/notebooks\n",
39-
"document = extractor.analyze_expense(file_source=\"../../../tests/fixtures/invoice1.png\")"
39+
"document = extractor.analyze_expense(file_source=\"../../../tests/fixtures/form.png\")"
4040
]
4141
},
4242
{

docs/source/notebooks/interfacing_with_trp2.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
"\n",
3232
"extractor = Textractor(profile_name=\"default\")\n",
3333
"# This path assumes that you are running the notebook from docs/source/notebooks\n",
34-
"document = extractor.detect_document_text(\"../../../tests/fixtures/invoice1.png\")"
34+
"document = extractor.detect_document_text(\"../../../tests/fixtures/form.png\")"
3535
]
3636
},
3737
{

docs/source/notebooks/simple_ocr.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
"\n",
4040
"extractor = Textractor(profile_name=\"default\")\n",
4141
"# This path assumes that you are running the notebook from docs/source/notebooks\n",
42-
"document = extractor.detect_document_text(\"../../../tests/fixtures/invoice1.png\")"
42+
"document = extractor.detect_document_text(\"../../../tests/fixtures/form.png\")"
4343
]
4444
},
4545
{
@@ -121,7 +121,7 @@
121121
"extractor = Textractor(profile_name=\"default\")\n",
122122
"# This path assumes that you are running the notebook from docs/source/notebooks\n",
123123
"document = extractor.start_document_text_detection(\n",
124-
" \"../../../tests/fixtures/invoice1.png\",\n",
124+
" \"../../../tests/fixtures/form.png\",\n",
125125
" s3_upload_path=\"s3://textract-ocr/temp/\",\n",
126126
")"
127127
]

docs/source/notebooks/using_analyze_id.ipynb

+21-21
Large diffs are not rendered by default.

docs/source/notebooks/visualizing_results.ipynb

+15-15
Large diffs are not rendered by default.

tests/fixtures/fake_id.jpg

-157 KB
Binary file not shown.

tests/fixtures/fake_id.png

205 KB
Loading

tests/fixtures/invoice1.png

-2.76 MB
Binary file not shown.

tests/fixtures/invoice2.png

-475 KB
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"IdentityDocuments": [{"DocumentIndex": 1, "IdentityDocumentFields": [{"Type": {"Text": "FIRST_NAME"}, "ValueDetection": {"Text": "FAKEID", "Confidence": 94.69052124023438}}, {"Type": {"Text": "LAST_NAME"}, "ValueDetection": {"Text": "GREAT", "Confidence": 98.74333953857422}}, {"Type": {"Text": "MIDDLE_NAME"}, "ValueDetection": {"Text": "", "Confidence": 99.57127380371094}}, {"Type": {"Text": "SUFFIX"}, "ValueDetection": {"Text": "", "Confidence": 99.6200180053711}}, {"Type": {"Text": "CITY_IN_ADDRESS"}, "ValueDetection": {"Text": "PHOENIX", "Confidence": 99.16668701171875}}, {"Type": {"Text": "ZIP_CODE_IN_ADDRESS"}, "ValueDetection": {"Text": "85007", "Confidence": 98.64995574951172}}, {"Type": {"Text": "STATE_IN_ADDRESS"}, "ValueDetection": {"Text": "AZ", "Confidence": 98.56987762451172}}, {"Type": {"Text": "STATE_NAME"}, "ValueDetection": {"Text": "ARIZONA", "Confidence": 98.91461944580078}}, {"Type": {"Text": "DOCUMENT_NUMBER"}, "ValueDetection": {"Text": "D12345678", "Confidence": 98.87849426269531}}, {"Type": {"Text": "EXPIRATION_DATE"}, "ValueDetection": {"Text": "02/01/2018", "NormalizedValue": {"Value": "2018-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 90.80914306640625}}, {"Type": {"Text": "DATE_OF_BIRTH"}, "ValueDetection": {"Text": "02/01/1957", "NormalizedValue": {"Value": "1957-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 98.23692321777344}}, {"Type": {"Text": "DATE_OF_ISSUE"}, "ValueDetection": {"Text": "02/01/2018", "NormalizedValue": {"Value": "2018-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 98.06478118896484}}, {"Type": {"Text": "ID_TYPE"}, "ValueDetection": {"Text": "DRIVER LICENSE FRONT", "Confidence": 99.15076446533203}}, {"Type": {"Text": "ENDORSEMENTS"}, "ValueDetection": {"Text": "NONE", "Confidence": 99.27373504638672}}, {"Type": {"Text": "VETERAN"}, "ValueDetection": {"Text": "", "Confidence": 99.668212890625}}, {"Type": {"Text": "RESTRICTIONS"}, "ValueDetection": {"Text": "NONE", "Confidence": 99.08118438720703}}, {"Type": {"Text": "CLASS"}, "ValueDetection": {"Text": "D", "Confidence": 99.11907196044922}}, {"Type": {"Text": "ADDRESS"}, "ValueDetection": {"Text": "123 MAIN STREET", "Confidence": 98.97272491455078}}, {"Type": {"Text": "COUNTY"}, "ValueDetection": {"Text": "", "Confidence": 99.62360382080078}}, {"Type": {"Text": "PLACE_OF_BIRTH"}, "ValueDetection": {"Text": "", "Confidence": 99.63812255859375}}]}], "DocumentMetadata": {"Pages": 1}, "AnalyzeIDModelVersion": "1.0", "ResponseMetadata": {"RequestId": "21ea950f-4ecf-4aad-af7e-4e5adff2b767", "HTTPStatusCode": 200, "HTTPHeaders": {"x-amzn-requestid": "21ea950f-4ecf-4aad-af7e-4e5adff2b767", "content-type": "application/x-amz-json-1.1", "content-length": "2292", "date": "Tue, 27 Sep 2022 18:12:15 GMT"}, "RetryAttempts": 0}}
1+
{"IdentityDocuments": [{"DocumentIndex": 1, "IdentityDocumentFields": [{"Type": {"Text": "FIRST_NAME"}, "ValueDetection": {"Text": "MARIA", "Confidence": 94.69052124023438}}, {"Type": {"Text": "LAST_NAME"}, "ValueDetection": {"Text": "GARCIA", "Confidence": 98.74333953857422}}, {"Type": {"Text": "MIDDLE_NAME"}, "ValueDetection": {"Text": "", "Confidence": 99.57127380371094}}, {"Type": {"Text": "SUFFIX"}, "ValueDetection": {"Text": "", "Confidence": 99.6200180053711}}, {"Type": {"Text": "CITY_IN_ADDRESS"}, "ValueDetection": {"Text": "PHOENIX", "Confidence": 99.16668701171875}}, {"Type": {"Text": "ZIP_CODE_IN_ADDRESS"}, "ValueDetection": {"Text": "85007", "Confidence": 98.64995574951172}}, {"Type": {"Text": "STATE_IN_ADDRESS"}, "ValueDetection": {"Text": "AZ", "Confidence": 98.56987762451172}}, {"Type": {"Text": "STATE_NAME"}, "ValueDetection": {"Text": "ARIZONA", "Confidence": 98.91461944580078}}, {"Type": {"Text": "DOCUMENT_NUMBER"}, "ValueDetection": {"Text": "D12345678", "Confidence": 98.87849426269531}}, {"Type": {"Text": "EXPIRATION_DATE"}, "ValueDetection": {"Text": "02/01/2018", "NormalizedValue": {"Value": "2018-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 90.80914306640625}}, {"Type": {"Text": "DATE_OF_BIRTH"}, "ValueDetection": {"Text": "02/01/1957", "NormalizedValue": {"Value": "1957-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 98.23692321777344}}, {"Type": {"Text": "DATE_OF_ISSUE"}, "ValueDetection": {"Text": "02/01/2018", "NormalizedValue": {"Value": "2018-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 98.06478118896484}}, {"Type": {"Text": "ID_TYPE"}, "ValueDetection": {"Text": "DRIVER LICENSE FRONT", "Confidence": 99.15076446533203}}, {"Type": {"Text": "ENDORSEMENTS"}, "ValueDetection": {"Text": "NONE", "Confidence": 99.27373504638672}}, {"Type": {"Text": "VETERAN"}, "ValueDetection": {"Text": "", "Confidence": 99.668212890625}}, {"Type": {"Text": "RESTRICTIONS"}, "ValueDetection": {"Text": "NONE", "Confidence": 99.08118438720703}}, {"Type": {"Text": "CLASS"}, "ValueDetection": {"Text": "D", "Confidence": 99.11907196044922}}, {"Type": {"Text": "ADDRESS"}, "ValueDetection": {"Text": "123 MAIN STREET", "Confidence": 98.97272491455078}}, {"Type": {"Text": "COUNTY"}, "ValueDetection": {"Text": "", "Confidence": 99.62360382080078}}, {"Type": {"Text": "PLACE_OF_BIRTH"}, "ValueDetection": {"Text": "", "Confidence": 99.63812255859375}}]}], "DocumentMetadata": {"Pages": 1}, "AnalyzeIDModelVersion": "1.0", "ResponseMetadata": {"RequestId": "21ea950f-4ecf-4aad-af7e-4e5adff2b767", "HTTPStatusCode": 200, "HTTPHeaders": {"x-amzn-requestid": "21ea950f-4ecf-4aad-af7e-4e5adff2b767", "content-type": "application/x-amz-json-1.1", "content-length": "2292", "date": "Tue, 27 Sep 2022 18:12:15 GMT"}, "RetryAttempts": 0}}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"IdentityDocuments": [{"DocumentIndex": 1, "IdentityDocumentFields": [{"Type": {"Text": "FIRST_NAME"}, "ValueDetection": {"Text": "FAKEID", "Confidence": 94.69052124023438}}, {"Type": {"Text": "LAST_NAME"}, "ValueDetection": {"Text": "GREAT", "Confidence": 98.74333953857422}}, {"Type": {"Text": "MIDDLE_NAME"}, "ValueDetection": {"Text": "", "Confidence": 99.57127380371094}}, {"Type": {"Text": "SUFFIX"}, "ValueDetection": {"Text": "", "Confidence": 99.6200180053711}}, {"Type": {"Text": "CITY_IN_ADDRESS"}, "ValueDetection": {"Text": "PHOENIX", "Confidence": 99.16668701171875}}, {"Type": {"Text": "ZIP_CODE_IN_ADDRESS"}, "ValueDetection": {"Text": "85007", "Confidence": 98.64995574951172}}, {"Type": {"Text": "STATE_IN_ADDRESS"}, "ValueDetection": {"Text": "AZ", "Confidence": 98.56987762451172}}, {"Type": {"Text": "STATE_NAME"}, "ValueDetection": {"Text": "ARIZONA", "Confidence": 98.91461944580078}}, {"Type": {"Text": "DOCUMENT_NUMBER"}, "ValueDetection": {"Text": "D12345678", "Confidence": 98.87849426269531}}, {"Type": {"Text": "EXPIRATION_DATE"}, "ValueDetection": {"Text": "02/01/2018", "NormalizedValue": {"Value": "2018-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 90.80914306640625}}, {"Type": {"Text": "DATE_OF_BIRTH"}, "ValueDetection": {"Text": "02/01/1957", "NormalizedValue": {"Value": "1957-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 98.23692321777344}}, {"Type": {"Text": "DATE_OF_ISSUE"}, "ValueDetection": {"Text": "02/01/2018", "NormalizedValue": {"Value": "2018-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 98.06478118896484}}, {"Type": {"Text": "ID_TYPE"}, "ValueDetection": {"Text": "DRIVER LICENSE FRONT", "Confidence": 99.15076446533203}}, {"Type": {"Text": "ENDORSEMENTS"}, "ValueDetection": {"Text": "NONE", "Confidence": 99.27373504638672}}, {"Type": {"Text": "VETERAN"}, "ValueDetection": {"Text": "", "Confidence": 99.668212890625}}, {"Type": {"Text": "RESTRICTIONS"}, "ValueDetection": {"Text": "NONE", "Confidence": 99.08118438720703}}, {"Type": {"Text": "CLASS"}, "ValueDetection": {"Text": "D", "Confidence": 99.11907196044922}}, {"Type": {"Text": "ADDRESS"}, "ValueDetection": {"Text": "123 MAIN STREET", "Confidence": 98.97272491455078}}, {"Type": {"Text": "COUNTY"}, "ValueDetection": {"Text": "", "Confidence": 99.62360382080078}}, {"Type": {"Text": "PLACE_OF_BIRTH"}, "ValueDetection": {"Text": "", "Confidence": 99.63812255859375}}]}], "DocumentMetadata": {"Pages": 1}, "AnalyzeIDModelVersion": "1.0", "ResponseMetadata": {"RequestId": "568d4209-a4fe-4bee-9e29-37bfdacaa71f", "HTTPStatusCode": 200, "HTTPHeaders": {"x-amzn-requestid": "568d4209-a4fe-4bee-9e29-37bfdacaa71f", "content-type": "application/x-amz-json-1.1", "content-length": "2292", "date": "Tue, 27 Sep 2022 18:12:19 GMT"}, "RetryAttempts": 0}}
1+
{"IdentityDocuments": [{"DocumentIndex": 1, "IdentityDocumentFields": [{"Type": {"Text": "FIRST_NAME"}, "ValueDetection": {"Text": "MARIA", "Confidence": 94.69052124023438}}, {"Type": {"Text": "LAST_NAME"}, "ValueDetection": {"Text": "GARCIA", "Confidence": 98.74333953857422}}, {"Type": {"Text": "MIDDLE_NAME"}, "ValueDetection": {"Text": "", "Confidence": 99.57127380371094}}, {"Type": {"Text": "SUFFIX"}, "ValueDetection": {"Text": "", "Confidence": 99.6200180053711}}, {"Type": {"Text": "CITY_IN_ADDRESS"}, "ValueDetection": {"Text": "PHOENIX", "Confidence": 99.16668701171875}}, {"Type": {"Text": "ZIP_CODE_IN_ADDRESS"}, "ValueDetection": {"Text": "85007", "Confidence": 98.64995574951172}}, {"Type": {"Text": "STATE_IN_ADDRESS"}, "ValueDetection": {"Text": "AZ", "Confidence": 98.56987762451172}}, {"Type": {"Text": "STATE_NAME"}, "ValueDetection": {"Text": "ARIZONA", "Confidence": 98.91461944580078}}, {"Type": {"Text": "DOCUMENT_NUMBER"}, "ValueDetection": {"Text": "D12345678", "Confidence": 98.87849426269531}}, {"Type": {"Text": "EXPIRATION_DATE"}, "ValueDetection": {"Text": "02/01/2018", "NormalizedValue": {"Value": "2018-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 90.80914306640625}}, {"Type": {"Text": "DATE_OF_BIRTH"}, "ValueDetection": {"Text": "02/01/1957", "NormalizedValue": {"Value": "1957-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 98.23692321777344}}, {"Type": {"Text": "DATE_OF_ISSUE"}, "ValueDetection": {"Text": "02/01/2018", "NormalizedValue": {"Value": "2018-02-01T00:00:00", "ValueType": "Date"}, "Confidence": 98.06478118896484}}, {"Type": {"Text": "ID_TYPE"}, "ValueDetection": {"Text": "DRIVER LICENSE FRONT", "Confidence": 99.15076446533203}}, {"Type": {"Text": "ENDORSEMENTS"}, "ValueDetection": {"Text": "NONE", "Confidence": 99.27373504638672}}, {"Type": {"Text": "VETERAN"}, "ValueDetection": {"Text": "", "Confidence": 99.668212890625}}, {"Type": {"Text": "RESTRICTIONS"}, "ValueDetection": {"Text": "NONE", "Confidence": 99.08118438720703}}, {"Type": {"Text": "CLASS"}, "ValueDetection": {"Text": "D", "Confidence": 99.11907196044922}}, {"Type": {"Text": "ADDRESS"}, "ValueDetection": {"Text": "123 MAIN STREET", "Confidence": 98.97272491455078}}, {"Type": {"Text": "COUNTY"}, "ValueDetection": {"Text": "", "Confidence": 99.62360382080078}}, {"Type": {"Text": "PLACE_OF_BIRTH"}, "ValueDetection": {"Text": "", "Confidence": 99.63812255859375}}]}], "DocumentMetadata": {"Pages": 1}, "AnalyzeIDModelVersion": "1.0", "ResponseMetadata": {"RequestId": "568d4209-a4fe-4bee-9e29-37bfdacaa71f", "HTTPStatusCode": 200, "HTTPHeaders": {"x-amzn-requestid": "568d4209-a4fe-4bee-9e29-37bfdacaa71f", "content-type": "application/x-amz-json-1.1", "content-length": "2292", "date": "Tue, 27 Sep 2022 18:12:19 GMT"}, "RetryAttempts": 0}}

tests/test_analyze_id.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ def setUp(self):
1414
# insert credentials and filepaths here to run test
1515
self.profile_name = "default"
1616
self.current_directory = os.path.abspath(os.path.dirname(__file__))
17-
self.image_path = os.path.join(self.current_directory, "fixtures/fake_id.jpg")
18-
self.image = PIL.Image.open(os.path.join(self.current_directory, "fixtures/fake_id.jpg"))
17+
self.image_path = os.path.join(self.current_directory, "fixtures/fake_id.png")
18+
self.image = PIL.Image.open(os.path.join(self.current_directory, "fixtures/fake_id.png"))
1919

2020
if self.profile_name is None:
2121
raise InvalidProfileNameError(
@@ -39,8 +39,8 @@ def test_analyze_id_from_path(self):
3939
self.assertIsInstance(document, Document)
4040
self.assertEqual(len(document.identity_documents), 1)
4141
self.assertEqual(len(document.identity_documents[0].fields), 20)
42-
self.assertEqual(document.identity_documents[0].get(AnalyzeIDFields.FIRST_NAME), "FAKEID")
43-
self.assertEqual(document.identity_documents[0][AnalyzeIDFields.FIRST_NAME], "FAKEID")
42+
self.assertEqual(document.identity_documents[0].get(AnalyzeIDFields.FIRST_NAME), "MARIA")
43+
self.assertEqual(document.identity_documents[0][AnalyzeIDFields.FIRST_NAME], "MARIA")
4444

4545
def test_analyze_id_from_image(self):
4646
# Testing local single image input
@@ -55,8 +55,8 @@ def test_analyze_id_from_image(self):
5555
self.assertIsInstance(document, Document)
5656
self.assertEqual(len(document.identity_documents), 1)
5757
self.assertEqual(len(document.identity_documents[0].fields), 20)
58-
self.assertEqual(document.identity_documents[0].get("FIRST_NAME"), "FAKEID")
59-
self.assertEqual(document.identity_documents[0]["FIRST_NAME"], "FAKEID")
58+
self.assertEqual(document.identity_documents[0].get("FIRST_NAME"), "MARIA")
59+
self.assertEqual(document.identity_documents[0]["FIRST_NAME"], "MARIA")
6060

6161
if __name__ == "__main__":
6262
test = TestTextractorAnalyzeID()

0 commit comments

Comments
 (0)