Skip to content

Document api #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 43 commits into
base: feature/api
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
a030144
WIP
satwik18 Jul 5, 2022
8fe99a8
add .vscode to gitignore
satwik18 Jul 5, 2022
a563ba9
modifying db schema
dantemazza Jul 5, 2022
a167998
push
dantemazza Jul 5, 2022
8c77793
fix schema
dantemazza Jul 6, 2022
9bffad8
get endpoints
dantemazza Jul 6, 2022
70c8bc4
ALL tickets API done
dantemazza Jul 7, 2022
7803197
Fixing default date bug
dantemazza Jul 7, 2022
03dd099
Cors header
dantemazza Jul 7, 2022
ec0560d
Cors header
dantemazza Jul 7, 2022
dbc2ec5
Cors header
dantemazza Jul 7, 2022
2b80291
message
dantemazza Jul 7, 2022
90019d7
Stefan codeazzzzzzzzzzzzzzzzzzzzzzzzzzzz
dantemazza Jul 7, 2022
419e917
Fix commit bugs for mergmerge
dantemazza Jul 7, 2022
32dee55
Fixed celery pipeline
dantemazza Jul 8, 2022
0f596ce
WIP
satwik18 Jul 5, 2022
057c206
modifying db schema
dantemazza Jul 5, 2022
a6ac6cc
push
dantemazza Jul 5, 2022
62ebb43
fix schema
dantemazza Jul 6, 2022
2bbbd1f
get endpoints
dantemazza Jul 6, 2022
f476281
ALL tickets API done
dantemazza Jul 7, 2022
57d16c4
Fixing default date bug
dantemazza Jul 7, 2022
c1e40db
Cors header
dantemazza Jul 7, 2022
3bc32a3
Cors header
dantemazza Jul 7, 2022
f312dcc
Cors header
dantemazza Jul 7, 2022
dc4d771
message
dantemazza Jul 7, 2022
fde205e
Stefan codeazzzzzzzzzzzzzzzzzzzzzzzzzzzz
dantemazza Jul 7, 2022
410b663
Fix commit bugs for mergmerge
dantemazza Jul 7, 2022
6ed5543
Fixed celery pipeline
dantemazza Jul 8, 2022
dcdb007
document api finished
dantemazza Jul 21, 2022
abcc182
conflicts fixed
dantemazza Jul 21, 2022
a9e7fb5
s3 presigned links
dantemazza Jul 21, 2022
9aa601b
enable cognito
satwik18 Jul 22, 2022
2ecc006
docker changes for flask-cognito-lib
satwik18 Jul 22, 2022
e0e035f
docker changes for flask-cognito-lib
satwik18 Jul 22, 2022
400662b
docker file changes
satwik18 Jul 22, 2022
a17a30b
Dockerization complete
dantemazza Jul 22, 2022
39d08f8
Extraction
dantemazza Jul 22, 2022
f925ade
Merge branch 'document_api' of github.com:ShipSolver/ShipSolver-backe…
satwik18 Jul 22, 2022
efa0367
satwik
dantemazza Jul 22, 2022
9108479
Merge branch 'document_api' of github.com:WLP-ai/metadata-extraction …
dantemazza Jul 22, 2022
ef635c0
document_api
dantemazza Jul 22, 2022
b32a29b
idek lol
dantemazza Jan 30, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ __pycache__/
*.pyc
.vscode
**/.env
tmp
tmp
2 changes: 0 additions & 2 deletions postgres/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ services:
image: postgres:latest
ports:
- 5432:5432
env_file:
- env/postgres.env
environment:
POSTGRES_DB: tenant_db
POSTGRES_HOST_AUTH_METHOD: trust
Expand Down
18 changes: 18 additions & 0 deletions servers/app.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM python:3.9
RUN apt-get update && apt-get -y install qpdf poppler-utils && apt-get install -y build-essential libpoppler-cpp-dev pkg-config python-dev
RUN apt -y install libpq-dev
COPY tenant/requirements.txt .
RUN pip3 install --upgrade pip
RUN pip3 install -r requirements.txt
RUN pip3 install psycopg2
RUN git -C /root clone https://github.com/ShipSolver/flask-cognito-lib.git
RUN pip3 install -e /root/flask-cognito-lib
WORKDIR /opt/metadata-extraction
ENV PYTHONPATH .
EXPOSE 6767
ENV aws_secret_access_key Mwi2Sq90taDAkUZwtiEuLHvTXZLzXDQZExPh53R4
ENV aws_access_key_id AKIASPMMHOET3PNSICG4
ENV AWS_REGION="us-east-1"
ENV AWS_COGNITO_USER_POOL_ID="us-east-1_6AUY6LKPZ"
ENV AWS_COGNITO_USER_POOL_CLIENT_ID="2vukbtukva3u0oh29lf32ghmkp"
ENV AWS_COGNITO_DOMAIN="https://shipsolver-dev.auth.us-east-1.amazoncognito.com"
17 changes: 17 additions & 0 deletions servers/celery.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM python:3.9
RUN apt-get update && apt-get -y install qpdf poppler-utils && apt-get install -y build-essential libpoppler-cpp-dev pkg-config python-dev
RUN apt -y install tesseract-ocr && apt -y install libtesseract-dev
COPY tenant/requirements.txt .
RUN pip3 install --upgrade pip
RUN pip3 install -r requirements.txt
RUN pip3 install psycopg2-binary
RUN git -C /root clone https://github.com/ShipSolver/flask-cognito-lib.git
RUN pip3 install -e /root/flask-cognito-lib
WORKDIR /opt/metadata-extraction/tenant
ENV PYTHONPATH ..
ENV aws_secret_access_key Mwi2Sq90taDAkUZwtiEuLHvTXZLzXDQZExPh53R4
ENV aws_access_key_id AKIASPMMHOET3PNSICG4
ENV AWS_REGION="us-east-1"
ENV AWS_COGNITO_USER_POOL_ID="us-east-1_6AUY6LKPZ"
ENV AWS_COGNITO_USER_POOL_CLIENT_ID="2vukbtukva3u0oh29lf32ghmkp"
ENV AWS_COGNITO_DOMAIN="https://shipsolver-dev.auth.us-east-1.amazoncognito.com"
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ services:
dockerfile: celery.Dockerfile
volumes:
- .:/opt/metadata-extraction
command: celery -A __init__.client worker --loglevel=info -f celery.logs -Ofair -c 2
command: celery -A config.client worker --loglevel=info -f celery.logs -Ofair -c 2
tty: true
app:
hostname: app.wlp.com
Expand All @@ -36,8 +36,8 @@ services:
- .:/opt/metadata-extraction
container_name: app01
ports:
- "5000:5000"
command: python3 server/__init__.py
- "6767:6767"
command: python3 tenant/server.py
tty: true
flower:
hostname: flower.wlp.com
Expand Down
15 changes: 9 additions & 6 deletions extraction/app.py → servers/extraction/app.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
# from multilingual_pdf2text.pdf2text import PDF2Text
# from multilingual_pdf2text.models.document_model.document import Document
# import pdfplumber
# import extraction.extract as e
from multilingual_pdf2text.pdf2text import PDF2Text
from multilingual_pdf2text.models.document_model.document import Document
import pdfplumber
import extraction.extract as e
import json
from celery.utils.log import get_logger

logger = get_logger(__name__)

def read_pdfplumber(file_name):
with pdfplumber.open(file_name) as pdf:
Expand All @@ -26,8 +28,9 @@ def work(folder_path):

ml_page_text = list(content)[0]["text"]
pp_text = read_pdfplumber(pdf_file)

extract_json = e.extract(ml_page_text, plumber_page=pp_text)
for i in range(14):
logger.info("WE HERE----------------")
extract_json = e.generate_doclist(e.extract(ml_page_text, plumber_page=pp_text))

with open(f"{folder_path}/{pdf_uuid}.json", "w") as f:
json.dump(extract_json, f, indent=2)
Expand Down
39 changes: 21 additions & 18 deletions extraction/const.py → servers/extraction/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,33 @@

#doclist_keys

HOUSE_REF = "house_ref"
BARCODE = "barcode"
FIRST_PARTY = "first_party"
NUM_PCS = "num_pcs"
PCS = "pcs"
BARCODE = "barcodeNumber"
HOUSE_REF = "houseReferenceNumber"
WEIGHT = "weight"
NUM_PCS = "claimedNumberOfPieces"
BOL_NUM = "BOLNumber"
SPECIAL_SERVICES = "specialServices"
SPECIAL_INSTRUCTIONS = "specialInstructions"
CONSIGNEE = "consignee"
SHIPPER = "shipper"
COMPANY = "Company"
NAME = "Name"
ADDRESS = "Address"
POSTAL_CODE = "PostalCode"
PHONE_NUMBER = "PhoneNumber"

NO_SIGNATURE_REQUIRED = "noSignatureRequired"
TAILGATE_AUTHORIZED = "tailgateAuthorized"

FIRST_PARTY = "customerName"

PCS = "pieces"

PKG = "pkg"
WT_LBS = "wt(lbs)"
WT_LBS = "weight"
COMMODITY_DESCRIPTION = "commodity_description"
DIMS_IN = "dims(in)"

BOL_NUM = "bol_num"
SPECIAL_SERVICES = "special_services"
SPECIAL_INSTRUCTIONS = "special_instructions"

COMPANY = "company"
NAME = "name"
ADDRESS = "address"
POSTAL_CODE = "postal_code"
PHONE_NUMBER = "phone_number"

CONSIGNEE = "consignee"
SHIPPER = "shipper"

CEVA_SHIPPER_FIELDS = [COMPANY, ADDRESS]
CEVA_CONSIGNEE_FIELDS = [NAME, ADDRESS]
Expand Down
28 changes: 13 additions & 15 deletions extraction/extract.py → servers/extraction/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,25 +271,23 @@ def generate_doclist(_list):
HOUSE_REF: _list[HOUSE_REF] if HOUSE_REF in _list else "",
BARCODE: _list[BARCODE] if BARCODE in _list else "",
PCS: _list[PCS] if PCS in _list else [],
NUM_PCS: _list[NUM_PCS] if NUM_PCS in _list else "",
NUM_PCS: _list[NUM_PCS] if NUM_PCS in _list else 0,
WEIGHT: _list[WEIGHT] if WEIGHT in _list else "",
BOL_NUM: _list[BOL_NUM] if BOL_NUM in _list else "",
SPECIAL_SERVICES: _list[SPECIAL_SERVICES] if SPECIAL_SERVICES in _list else "",
SPECIAL_INSTRUCTIONS: _list[SPECIAL_INSTRUCTIONS] if SPECIAL_INSTRUCTIONS in _list else "",
CONSIGNEE: {
COMPANY: _list[CONSIGNEE][COMPANY] if CONSIGNEE in _list and COMPANY in _list[CONSIGNEE] else "",
NAME: _list[CONSIGNEE][NAME] if CONSIGNEE in _list and NAME in _list[CONSIGNEE] else "",
ADDRESS: _list[CONSIGNEE][ADDRESS] if CONSIGNEE in _list and ADDRESS in _list[CONSIGNEE] else "",
POSTAL_CODE: _list[CONSIGNEE][POSTAL_CODE] if CONSIGNEE in _list and POSTAL_CODE in _list[CONSIGNEE] else "",
PHONE_NUMBER: _list[CONSIGNEE][PHONE_NUMBER] if CONSIGNEE in _list and PHONE_NUMBER in _list[CONSIGNEE] else ""
},
SHIPPER: {
COMPANY: _list[SHIPPER][COMPANY] if SHIPPER in _list and COMPANY in _list[SHIPPER] else "",
NAME: _list[SHIPPER][NAME] if SHIPPER in _list and NAME in _list[SHIPPER] else "",
ADDRESS: _list[SHIPPER][ADDRESS] if SHIPPER in _list and ADDRESS in _list[SHIPPER] else "",
POSTAL_CODE: _list[SHIPPER][POSTAL_CODE] if SHIPPER in _list and POSTAL_CODE in _list[SHIPPER] else "",
PHONE_NUMBER: _list[SHIPPER][PHONE_NUMBER] if SHIPPER in _list and PHONE_NUMBER in _list[SHIPPER] else ""
}
CONSIGNEE+COMPANY: _list[CONSIGNEE][COMPANY] if CONSIGNEE in _list and COMPANY in _list[CONSIGNEE] else "",
CONSIGNEE+NAME: _list[CONSIGNEE][NAME] if CONSIGNEE in _list and NAME in _list[CONSIGNEE] else "",
CONSIGNEE+ADDRESS: _list[CONSIGNEE][ADDRESS] if CONSIGNEE in _list and ADDRESS in _list[CONSIGNEE] else "",
CONSIGNEE+POSTAL_CODE: _list[CONSIGNEE][POSTAL_CODE] if CONSIGNEE in _list and POSTAL_CODE in _list[CONSIGNEE] else "",
CONSIGNEE+PHONE_NUMBER: _list[CONSIGNEE][PHONE_NUMBER] if CONSIGNEE in _list and PHONE_NUMBER in _list[CONSIGNEE] else "",
SHIPPER+COMPANY: _list[SHIPPER][COMPANY] if SHIPPER in _list and COMPANY in _list[SHIPPER] else "",
SHIPPER+NAME: _list[SHIPPER][NAME] if SHIPPER in _list and NAME in _list[SHIPPER] else "",
SHIPPER+ADDRESS: _list[SHIPPER][ADDRESS] if SHIPPER in _list and ADDRESS in _list[SHIPPER] else "",
SHIPPER+POSTAL_CODE: _list[SHIPPER][POSTAL_CODE] if SHIPPER in _list and POSTAL_CODE in _list[SHIPPER] else "",
SHIPPER+PHONE_NUMBER: _list[SHIPPER][PHONE_NUMBER] if SHIPPER in _list and PHONE_NUMBER in _list[SHIPPER] else "",
NO_SIGNATURE_REQUIRED: _list[NO_SIGNATURE_REQUIRED] if NO_SIGNATURE_REQUIRED in _list else False,
TAILGATE_AUTHORIZED: _list[TAILGATE_AUTHORIZED] if TAILGATE_AUTHORIZED in _list else False
}


Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
22 changes: 0 additions & 22 deletions servers/tenant/Pipfile

This file was deleted.

Loading