forked from respec/HSPsquared
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHDF5.py
113 lines (95 loc) · 4.54 KB
/
HDF5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from weakref import finalize
from pandas.core.frame import DataFrame
import HSP2tools
import pandas as pd
import os
from typing import Union, Dict, Tuple #, final
#turns out that HDF5 is not threading safe, need to implement manual locking
from threading import Lock
class HDF5:
REQUIRES_MAPPING = ['GQUAL','CONS','IQUAL','PQUAL']
def __init__(self, file_name:str) -> None:
self.file_name = file_name
self.aliases = self._read_aliases_csv()
self.data = {}
self.lock = Lock()
self.gqual_prefixes = self._read_gqual_mapping()
self.cons_prefixes = self._read_cons_mapping()
self.iqual_prefixes = self._read_iqual_mapping()
self.pqual_prefixes = self._read_pqual_mapping()
def _read_nqual_mapping(self, key:str, target_col:str, nquals:int = 10) -> Dict[str,str]:
"""Some modules, like GQUAL, allow for number which corresponds to the consistent
being modeled. However which number is assoicated with which parameter changes
based on the UCI file. Need to read from specification tables
"""
dict_mappings = {}
for i in range(1,nquals):
try:
with pd.HDFStore(self.file_name,'r') as store:
if key.endswith('IQUAL') or key.endswith('PQUAL'):
df = pd.read_hdf(store, f'{key}{i}'+'/FLAGS')
else:
df = pd.read_hdf(store,f'{key}{i}')
row = df.iloc[0]
gqid = row[target_col]
dict_mappings[gqid] = str(i)
except KeyError:
#Mean no nqual number (e.g. GQUAL3) for this run
pass
return dict_mappings
def _read_gqual_mapping(self) -> Dict[str,str]:
return self._read_nqual_mapping(R'RCHRES/GQUAL/GQUAL', 'GQID', 7)
def _read_cons_mapping(self) -> Dict[str,str]:
return self._read_nqual_mapping(R'RCHRES/CONS/CONS','CONID', 7)
def _read_iqual_mapping(self) -> Dict[str,str]:
return self._read_nqual_mapping(R'IMPLND/IQUAL/IQUAL','QUALID', 10)
def _read_pqual_mapping(self) -> Dict[str,str]:
return self._read_nqual_mapping(R'PERLND/PQUAL/PQUAL','QUALID', 10)
def _read_aliases_csv(self) -> Dict[Tuple[str,str,str],str]:
datapath = os.path.join(HSP2tools.__path__[0], 'data', 'HBNAliases.csv')
df = pd.read_csv(datapath)
df = df.set_index(['operation','activity','hspf_name'])
df_dict = df['hsp2_name'].to_dict()
return df_dict
def get_time_series(self, operation:str, id:str, constituent:str, activity:str) -> Union[pd.Series, None]:
"""Reads timeseries from HDF5 are returns the desired."""
operation = operation.upper()
constituent = constituent.upper()
activity = activity.upper()
constituent_prefix = ''
if activity in self.REQUIRES_MAPPING:
constituent_prefix = ''
prefix_dict = getattr(self, f'{activity.lower()}_prefixes')
for key, value in prefix_dict.items():
if activity == 'PQUAL' or activity == 'IQUAL':
if constituent.endswith(key):
constituent_prefix = f'{activity}{value}_'
constituent = constituent.replace(key,'')
else:
if constituent.startswith(key):
constituent_prefix = f'{activity}{value}_'
constituent = constituent.replace(key,'')
key = (operation,id,activity)
try:
self.lock.acquire()
if key not in self.data.keys():
self.data[key] = self._read_table(operation, id, activity)
self.lock.release()
df = self.data[key]
if constituent_prefix + constituent in df.columns:
return df[constituent_prefix + constituent]
else:
constituent_alias = self.aliases[(operation,activity,constituent)]
return df[constituent_prefix + constituent_alias]
except KeyError:
return None
def _read_table(self, operation:str, id:str, activity:str) -> pd.DataFrame:
key = f'RESULTS/{operation}_{operation[0]}{id}/{activity}/table'
try:
with pd.HDFStore(self.file_name, 'r') as store:
df = pd.read_hdf(store, key=key)
df['index'] = pd.to_datetime(df['index'], unit='ns')
df = df.set_index('index')
return df
except KeyError:
return pd.DataFrame()