Skip to content

Commit 896a04e

Browse files
authored
1073 Remove credentials from Population Data Download (#1074)
1 parent 717c026 commit 896a04e

File tree

6 files changed

+7
-146
lines changed

6 files changed

+7
-146
lines changed

.github/workflows/epidata_main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ jobs:
135135
run: |
136136
mkdir -p data_dl
137137
getcasedata -o data_dl --no-progress-indicators
138-
getpopuldata -o data_dl --no-progress-indicators --username=${{ secrets.REGIODBUSER }} --password=${{ secrets.REGIODBPW }}
138+
getpopuldata -o data_dl --no-progress-indicators
139139
getjhdata -o data_dl --no-progress-indicators
140140
getdividata -o data_dl --no-progress-indicators
141141
getcommutermobility -o data_dl --no-progress-indicators

pycode/memilio-epidata/memilio/epidata/README.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,6 @@ optional arguments working for some are:
130130
| --sanitize-data | Different ways to distribute vaccinations to home |
131131
| | locations of vaccinated persons[vaccination] |
132132
+---------------------------------------------+-----------------------------------------------------------+
133-
| --username | Username for regionalstatistik.de [population] |
134-
+---------------------------------------------+-----------------------------------------------------------+
135-
| --password | Password for regionalstatistik.de [population] |
136-
+---------------------------------------------+-----------------------------------------------------------+
137133
| --files | Files to write [case] |
138134
+---------------------------------------------+-----------------------------------------------------------+
139135

pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -364,8 +364,6 @@ def cli(what):
364364
- verbose
365365
- skip_checks
366366
- no_raw
367-
- username
368-
- password
369367
- to_dataset
370368
371369
@param what Defines what packages calls and thus what kind of command line arguments should be defined.
@@ -379,7 +377,7 @@ def cli(what):
379377

380378
cli_dict = {"divi": ['Downloads data from DIVI', 'start_date', 'end_date', 'impute_dates', 'moving_average'],
381379
"cases": ['Download case data from RKI', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date', 'files'],
382-
"population": ['Download population data from official sources', 'username'],
380+
"population": ['Download population data from official sources'],
383381
"commuter_official": ['Download commuter data from official sources'],
384382
"vaccination": ['Download vaccination data', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'sanitize_data'],
385383
"testing": ['Download testing data', 'start_date', 'end_date', 'impute_dates', 'moving_average'],
@@ -498,14 +496,6 @@ def cli(what):
498496
'--skip-checks', dest='run_checks', action='store_false',
499497
help='Skips sanity checks etc.')
500498

501-
if 'username' in what_list:
502-
parser.add_argument(
503-
'--username', type=str
504-
)
505-
506-
parser.add_argument(
507-
'--password', type=str
508-
)
509499
if '--to-dataset' in sys.argv:
510500
parser.add_argument(
511501
'--to-dataset', dest='to_dataset',

pycode/memilio-epidata/memilio/epidata/getPopulationData.py

Lines changed: 3 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@
2323
@brief Downloads data about population statistic
2424
2525
"""
26-
import configparser
2726
import warnings
28-
import getpass
2927
import requests
3028
import os
3129
import io
@@ -41,82 +39,19 @@
4139
pd.options.mode.copy_on_write = True
4240

4341

44-
def read_population_data(username, password):
42+
def read_population_data():
4543
"""! Reads Population data from regionalstatistik.de
4644
47-
Username and Password are required to sign in on regionalstatistik.de.
4845
A request is made to regionalstatistik.de and the StringIO is read in as a csv into the dataframe format.
49-
50-
@param username Username to sign in at regionalstatistik.de.
51-
@param password Password to sign in at regionalstatistik.de.
5246
@return DataFrame
5347
"""
5448

5549
download_url = 'https://www.regionalstatistik.de/genesis/online?operation=download&code=12411-02-03-4&option=csv'
56-
req = requests.get(download_url, auth=(username, password))
50+
req = requests.get(download_url)
5751
df_pop_raw = pd.read_csv(io.StringIO(req.text), sep=';', header=6)
5852

5953
return df_pop_raw
6054

61-
# This function is needed for unittests
62-
# Fakefilesystem has problems with os.path
63-
64-
65-
def path_to_credential_file():
66-
"""! Returns path to .ini file where credentials are stored.
67-
The Path can be changed if neccessary.
68-
"""
69-
return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'CredentialsRegio.ini')
70-
71-
72-
def manage_credentials(interactive):
73-
"""! Manages credentials for regionalstatistik.de (needed for dowload).
74-
75-
A connfig file inside the epidata folder is either written (if not existent yet)
76-
with input from user or read with following format:
77-
[CREDENTIALS]
78-
Username = XXXXX
79-
Password = XXXXX
80-
81-
@return Username and password to sign in at regionalstatistik.de.
82-
"""
83-
# path where ini file is found
84-
path = path_to_credential_file()
85-
86-
gd.default_print(
87-
'Info', 'No passwaord and/or username for regionalstatistik.de provided. Try to read from .ini file.')
88-
89-
# check if .ini file exists
90-
if not os.path.exists(path):
91-
if interactive:
92-
gd.default_print(
93-
'Info', '.ini file not found. Writing CredentialsRegio.ini...')
94-
username = input(
95-
"Please enter username for https://www.regionalstatistik.de/genesis/online\n")
96-
password = getpass.getpass(
97-
"Please enter password for https://www.regionalstatistik.de/genesis/online\n")
98-
# create file
99-
write_ini = gd.user_choice(
100-
message='Do you want the credentials to be stored in an unencrypted .ini file?\n' +
101-
'The next time this function is called, the credentials can be read from that file.')
102-
if write_ini:
103-
string = '[CREDENTIALS]\nUsername = ' + \
104-
username+'\nPassword = '+password
105-
with open(path, 'w+') as file:
106-
file.write(string)
107-
else:
108-
raise gd.DataError(
109-
'No .ini file found. Cannot access regionalstatistik.de for downloading population data.')
110-
111-
else:
112-
parser = configparser.ConfigParser()
113-
parser.read(path)
114-
115-
username = parser['CREDENTIALS']['Username']
116-
password = parser['CREDENTIALS']['Password']
117-
118-
return username, password
119-
12055

12156
def export_population_dataframe(df_pop: pd.DataFrame, directory: str, file_format: str, merge_eisenach: bool):
12257
"""! Writes population dataframe into directory with new column names and age groups
@@ -285,8 +220,6 @@ def test_total_population(df_pop, age_cols):
285220

286221
def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
287222
out_folder: str = dd.defaultDict['out_folder'],
288-
username='',
289-
password='',
290223
**kwargs
291224
) -> pd.DataFrame:
292225
"""! Downloads or reads the population data.
@@ -299,9 +232,6 @@ def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
299232
downloaded. Default defined in defaultDict.
300233
@param out_folder Path to folder where data is written in folder
301234
out_folder/Germany. Default defined in defaultDict.
302-
@param username Username to sign in at regionalstatistik.de.
303-
@param password Password to sign in at regionalstatistik.de.
304-
305235
@return DataFrame with adjusted population data for all ages to current level.
306236
"""
307237
conf = gd.Conf(out_folder, **kwargs)
@@ -312,14 +242,10 @@ def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
312242
'Warning', 'Read_data is not supportet for getPopulationData.py. Setting read_data = False')
313243
read_data = False
314244

315-
# If no username or password is provided, the credentials are either read from an .ini file or,
316-
# if the file does not exist they have to be given as user input.
317-
if (username is None) or (password is None):
318-
username, password = manage_credentials(conf.interactive)
319245
directory = os.path.join(out_folder, 'Germany')
320246
gd.check_dir(directory)
321247

322-
df_pop_raw = read_population_data(username, password)
248+
df_pop_raw = read_population_data()
323249

324250
return df_pop_raw
325251

@@ -411,8 +337,6 @@ def get_population_data(read_data: bool = dd.defaultDict['read_data'],
411337
file_format: str = dd.defaultDict['file_format'],
412338
out_folder: str = dd.defaultDict['out_folder'],
413339
merge_eisenach: bool = True,
414-
username='',
415-
password='',
416340
**kwargs
417341
):
418342
"""! Download age-stratified population data for the German counties.
@@ -453,8 +377,6 @@ def get_population_data(read_data: bool = dd.defaultDict['read_data'],
453377
read_data=read_data,
454378
out_folder=out_folder,
455379
file_format=file_format,
456-
username=username,
457-
password=password,
458380
**kwargs
459381
)
460382
preprocess_df = preprocess_population_data(

pycode/memilio-epidata/memilio/epidata_test/test_epidata_getDataIntoPandasDataFrame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ def test_call_functions(
454454
# change start-date of jh to 2020-01-22
455455
arg_dict_jh["start_date"] = date(2020, 1, 22)
456456

457-
arg_dict_popul = {**arg_dict_all, "username": None, "password": None}
457+
arg_dict_popul = {**arg_dict_all}
458458

459459
getVaccinationData.main()
460460
mock_vaccination.assert_called()

pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_population_data.py

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,6 @@ class Test_getPopulationData(fake_filesystem_unittest.TestCase):
3333

3434
path = '/home/Population_Data'
3535

36-
config_file_name = 'CredentialsRegio.ini'
37-
test_username = 'username_test'
38-
test_password = 'password_test'
39-
4036
here = os.path.dirname(os.path.abspath(__file__))
4137
filename = os.path.join(
4238
here, 'test_data', 'TestSetPopulationExport.json')
@@ -71,53 +67,10 @@ def test_export_population_data(self):
7167
return_value=df_pop_raw)
7268
@patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop)
7369
@patch('memilio.epidata.getPopulationData.test_total_population')
74-
def test_get_population_data_full(self, mock_test, mock_export, mock_download):
70+
def test_get_population_data_full(self, mock_test, mock_assign, mock_download):
7571
# should not raise any errors
7672
gpd.get_population_data(out_folder=self.path)
7773

78-
@patch('builtins.input', return_value=test_username)
79-
@patch('getpass.getpass', return_value=test_password)
80-
@patch('memilio.epidata.getDataIntoPandasDataFrame.user_choice', return_value=True)
81-
@patch('memilio.epidata.getPopulationData.path_to_credential_file', return_value='./CredentialsRegio.ini')
82-
@patch('memilio.epidata.getPopulationData.read_population_data', return_value=df_pop_raw)
83-
@patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop)
84-
@patch('memilio.epidata.getPopulationData.test_total_population')
85-
def test_config_write(self, mock_test, mock_export, mock_raw, mock_path, mock_choice, mock_pw, mock_un):
86-
# username and password should be written into the config file.
87-
# The download and assigning to counties of the population data is mocked.
88-
gpd.get_population_data(username=None, password=None, interactive=True)
89-
# Check if the file is written.
90-
self.assertTrue(self.config_file_name in os.listdir(os.getcwd()))
91-
# Check content of the file.
92-
# Read file.
93-
parser = configparser.ConfigParser()
94-
parser.read(os.path.join(os.getcwd(), self.config_file_name))
95-
# Test content.
96-
self.assertEqual(parser['CREDENTIALS']['Username'], self.test_username)
97-
self.assertEqual(parser['CREDENTIALS']['Password'], self.test_password)
98-
99-
@patch('memilio.epidata.getPopulationData.path_to_credential_file', return_value='./CredentialsRegio.ini')
100-
@patch('memilio.epidata.getPopulationData.read_population_data', return_value=df_pop_raw)
101-
@patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop)
102-
@patch('memilio.epidata.getPopulationData.test_total_population')
103-
def test_config_read(self, mock_test, mock_export, mock_read, mock_path):
104-
# File should not exist yet.
105-
self.assertFalse(self.config_file_name in os.listdir(os.getcwd()))
106-
# Create config file.
107-
string = '[CREDENTIALS]\nUsername = ' + \
108-
self.test_username+'\nPassword = '+self.test_password
109-
path = os.path.join(os.getcwd(), self.config_file_name)
110-
with open(path, 'w+') as file:
111-
file.write(string)
112-
# Check if the file is written.
113-
self.assertTrue(self.config_file_name in os.listdir(os.getcwd()))
114-
# The download and assigning to counties of the population data is mocked.
115-
gpd.get_population_data(
116-
username=None, password=None, read_data=False, out_folder=self.path, interactive=False)
117-
# The file exist in the directory (mocked) and the credentials should be read.
118-
mock_read.assert_called_with(
119-
self.test_username, self.test_password)
120-
12174

12275
if __name__ == '__main__':
12376
unittest.main()

0 commit comments

Comments
 (0)