Skip to content

Commit 2000ac6

Browse files
authored
Create python_main.py
1 parent a7d2428 commit 2000ac6

File tree

1 file changed

+58
-0
lines changed

1 file changed

+58
-0
lines changed

python_main.py

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from urllib.parse import unquote_plus
2+
import boto3
3+
from io import BytesIO
4+
import gzip
5+
6+
fixed_widths = {
7+
"date": [0, 15],
8+
"serial_number": [15, 36],
9+
"model": [36, 79],
10+
"capacity_bytes": [79, 98],
11+
"failure": [98, 109]
12+
}
13+
14+
15+
def download(s3_client: object, bucket: str, key: str) -> BytesIO:
16+
file_object = BytesIO()
17+
s3_client.download_fileobj(bucket, key, file_object)
18+
file_object.seek(0)
19+
return file_object
20+
21+
22+
def read_mem_file(fo: BytesIO) -> list:
23+
with gzip.open(fo, mode="rt") as f:
24+
rows = f.readlines()
25+
return rows
26+
27+
28+
def convert_row_to_tab(raw_row: str, meta: dict) -> str:
29+
row = ''
30+
for k, v in meta.items():
31+
column_value = raw_row[v[0]:v[1]]
32+
row += column_value.strip() + '\t'
33+
return row + '\n'
34+
35+
36+
def rows_to_file_object_gz(rws: list):
37+
with gzip.open('/tmp/file.gz', 'wt') as f:
38+
f.writelines(rws)
39+
40+
41+
def file_object_to_s3(s3_client: object, bucket: str, key: str) -> None:
42+
s3_client.upload_file('/tmp/file.gz', bucket,
43+
key.replace('.gz', '_tab.gz').replace('fixed_width_raw', 'tab_converted'))
44+
45+
46+
def lambda_handler(event, _):
47+
s3_client = boto3.client('s3')
48+
for record in event['Records']:
49+
bucket = record['s3']['bucket']['name']
50+
key = unquote_plus(record['s3']['object']['key'])
51+
fo = download(s3_client, bucket, key)
52+
data = read_mem_file(fo)
53+
tab_rows = []
54+
for row in data:
55+
tab_row = convert_row_to_tab(raw_row=row, meta=fixed_widths)
56+
tab_rows.append(tab_row)
57+
rows_to_file_object_gz(tab_rows)
58+
file_object_to_s3(s3_client, bucket, key)

0 commit comments

Comments
 (0)