Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit c314139

Browse files
committed
Finalize changes for end-to-end workflow for AI Document Processing
1 parent a040d03 commit c314139

File tree

5 files changed

+15
-29
lines changed

5 files changed

+15
-29
lines changed

src/AIDocumentPipeline/invoices/extract_invoice_data_workflow.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
from __future__ import annotations
2+
from invoices.invoice_data import InvoiceData
23
from shared.storage import write_bytes_to_blob
34
from invoices.activities import extract_invoice_data
45
from shared.workflow_result import WorkflowResult
56
import azure.durable_functions as df
6-
import azure.functions as func
7-
import logging
87
from shared import config as app_config
98

109
name = "ExtractInvoiceDataWorkflow"
@@ -35,7 +34,7 @@ def run(context: df.DurableOrchestrationContext) -> WorkflowResult:
3534
f"Failed to extract data for {invoice}.")
3635
continue
3736

38-
invoice_data_stored = yield context.call_activity(write_bytes_to_blob.name, write_bytes_to_blob.Request(app_config.invoices_storage_account_name, input.container_name, f"{invoice}.Data.json", invoice_data))
37+
invoice_data_stored = yield context.call_activity(write_bytes_to_blob.name, write_bytes_to_blob.Request(app_config.invoices_storage_account_name, input.container_name, f"{invoice}.Data.json", InvoiceData.to_json(invoice_data).encode("utf-8"), True))
3938

4039
if not invoice_data_stored:
4140
result.add_error(write_bytes_to_blob.name,

src/AIDocumentPipeline/invoices/invoice_data.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,15 @@ def from_dict(obj: dict) -> InvoiceData:
7575
result.customer_address = obj["customer_address"]
7676
result.delivery_date = obj["delivery_date"]
7777
result.payable_by = obj["payable_by"]
78-
result.products = [InvoiceProduct.from_dict(p) for p in obj["products"]]
78+
result.products = [InvoiceProduct.from_dict(
79+
p) for p in obj["products"]]
7980
result.returns = [InvoiceProduct.from_dict(p) for p in obj["returns"]]
8081
result.total_quantity = obj["total_quantity"]
8182
result.total_price = obj["total_price"]
82-
result.products_signatures = [InvoiceSignature.from_dict(s) for s in obj["products_signatures"]]
83-
result.returns_signatures = [InvoiceSignature.from_dict(s) for s in obj["returns_signatures"]]
83+
result.products_signatures = [InvoiceSignature.from_dict(
84+
s) for s in obj["products_signatures"]]
85+
result.returns_signatures = [InvoiceSignature.from_dict(
86+
s) for s in obj["returns_signatures"]]
8487
return result
8588

8689

@@ -141,7 +144,7 @@ class InvoiceSignature:
141144
@staticmethod
142145
def empty() -> InvoiceSignature:
143146
result = InvoiceSignature()
144-
result.type = "Customer"
147+
result.type = ""
145148
result.name = ""
146149
result.is_signed = False
147150
return result

src/AIDocumentPipeline/shared/base_workflow.py

Lines changed: 0 additions & 19 deletions
This file was deleted.

src/AIDocumentPipeline/shared/documents/document_data_extractor.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import base64
44
from openai import AzureOpenAI
55
import json
6+
import io
67

78

89
class DocumentDataExtractorOptions:
@@ -75,7 +76,9 @@ def __get_document_image_uris__(self, document_bytes: bytes) -> list:
7576

7677
image_uris = []
7778
for page in pages:
78-
base64_data = base64.b64encode(page.tobytes()).decode('utf-8')
79-
image_uris.append(f"data:image/jpeg;base64,{base64_data}")
79+
byteIO = io.BytesIO()
80+
page.save(byteIO, format='PNG')
81+
base64_data = base64.b64encode(byteIO.getvalue()).decode('utf-8')
82+
image_uris.append(f"data:image/png;base64,{base64_data}")
8083

8184
return image_uris

src/AIDocumentPipeline/shared/storage/write_bytes_to_blob.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def run(input: Request) -> bool:
3434

3535

3636
class Request(BlobStorageRequest):
37-
def __init__(self, storage_account_name: str, container_name: str, blob_name: str, content: bytes, overwrite: bool):
37+
def __init__(self, storage_account_name: str, container_name: str, blob_name: str, content: bytes, overwrite: bool = True):
3838
super().__init__(storage_account_name, container_name, blob_name)
3939
self.content = content
4040
self.overwrite = overwrite

0 commit comments

Comments
 (0)