Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes parse error #54

Merged
merged 3 commits into from
Jul 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/integrate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ jobs:
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
make install
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand Down
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,17 @@ deploy: build

stop:
$(compose_cmd) stop

install:
pip install -e .['develop']

test:
pytest crawlclima/ -v

clean:
@find ./ -name '*.pyc' -exec rm -f {} \;
@find ./ -name '*~' -exec rm -f {} \;
rm -rf .cache
rm -rf build
rm -rf dist
rm -rf *.egg-info
28 changes: 21 additions & 7 deletions crawlclima/redemet/rmet.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import math
import os
import re
import time

import pandas as pd
Expand Down Expand Up @@ -148,16 +149,18 @@ def describe(dataframe):
return data


def capture_date_range(station, date):
def capture_date_range(station, date_start, date_end=None):
"""
Baixa dados da estação específica a partir da data especificada até a data de hoje
:param station: código da estação
:param date: data de início da captura
:param date_start: data de início da captura
:param date_end: data final captura
:return:
"""
today = datetime.datetime.today()
if date_end is None:
date_end = datetime.datetime.today()
check_day_station = lambda d: check_day(d, station)
dates = filter(check_day_station, date_generator(today, date))
dates = filter(check_day_station, date_generator(date_end, date_start))
return list(filter(len, map(lambda d: capture(station, d), dates)))


Expand All @@ -179,17 +182,26 @@ def capture(station, date):
"""
url = redemet_url(station, date)
status = 0
wait = 1
wait = 3
while status != 200 and wait <= 16:
resp = requests.get(url)
status = resp.status_code
time.sleep(wait)
wait *= 2
wait *= 3
resp_data = resp.json()

with open('logs/capture-rmet.log', 'a') as f:
f.write("{}".format(resp_data['data']["data"]))

page = ''
for dados in resp_data["data"]["data"]:
mensagem = dados['mens']
# check if there is more cases that pattern should treat(#53)
pattern = re.compile(r' [WSNE][0-9]{1,2}/[WSNE][0-9]{1,2}')
result = pattern.findall(mensagem)
for r in result:
mensagem = mensagem.replace(r, '')

date_receive = dados['recebimento']
# format date
date_time_str = datetime.datetime.strptime(
Expand All @@ -200,9 +212,11 @@ def capture(station, date):

dataframe = parse_page(page)
data = describe(dataframe)

if len(data) == 0:
logger.warning("Empty data for %s", date)
logger.warning("Empty data for %s in %s", station, date)
return {}

data["date"] = date
data["station"] = station

Expand Down
15 changes: 13 additions & 2 deletions crawlclima/tasks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import csv
import os
import sys
import time
from datetime import datetime, timedelta
from io import StringIO
Expand All @@ -23,6 +25,10 @@

logger = get_task_logger("Captura")

work_dir = os.getcwd()
route_abs = os.path.dirname(os.path.abspath(work_dir))
sys.path.insert(0, route_abs)


def get_connection():
try:
Expand Down Expand Up @@ -199,12 +205,17 @@ def fetch_redemet(self, station, date):
data = capture_date_range(station, date)
except Exception as e:
logger.error(
"Error fetching from {} at {}: {}".format(station, date, e)
"Error fetching from {} at {} data is {}: error: {}".format(
station, date, data, e
)
)
return
try:
logger.info("Saving {}".format(station))
if len(data) > 0:
save(data, schema="Municipio", table="Clima_wu")
logger.info("Saving {}".format(station))
else:
logger.info("No data found {}".format(station))
except Exception as e:
logger.error(
"Error saving to db with {} at {}: {}".format(station, date, e)
Expand Down
8 changes: 5 additions & 3 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
-r requirements.txt
# -r requirements.txt
nose==1.3.7
responses
pgcli
black
flake8==3.5.0
flake8==3.7.9
isort==4.3.19
pre-commit==2.4.0

pandas
pytest
pyflakes==2.1.0
13 changes: 10 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import setuptools
from setuptools import find_packages, setup

with open("README.md", "r") as fh:
long_description = fh.read()

setuptools.setup(

def read(filename):
return [req.strip() for req in open(filename).readlines()]


setup(
name="crawlclima",
version="0.1.0",
author="fccoelho",
Expand All @@ -12,11 +17,13 @@
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/AlertaDengue/AlertaDengueCaptura.git",
packages=setuptools.find_packages(),
packages=find_packages(),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GPL V3 License",
"Operating System :: Linux",
],
python_requires='>=3.7',
install_requires=read("requirements.txt"),
extras_require={'develop': read("requirements-dev.txt")},
)