Skip to content

Commit 88f53e3

Browse files
author
Álvaro Bartolomé
committed
fix id & alpha setup
1 parent 5942cd4 commit 88f53e3

File tree

9 files changed

+594
-499
lines changed

9 files changed

+594
-499
lines changed

Diff for: .idea/workspace.xml

+144-140
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: investing_scrapper/Data.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def __init__(self, date_, close_, open_, max_, min_, volume_):
1010
self.min = min_
1111
self.volume = volume_
1212

13-
def to_dict(self):
13+
def equity_to_dict(self):
1414
return {
1515
'Date': self.date,
1616
'Close': self.close,
@@ -19,3 +19,12 @@ def to_dict(self):
1919
'Min': self.min,
2020
'Volume': self.volume,
2121
}
22+
23+
def fund_to_dict(self):
24+
return {
25+
'Date': self.date,
26+
'Close': self.close,
27+
'Open': self.open,
28+
'Max': self.max,
29+
'Min': self.min,
30+
}

Diff for: investing_scrapper/__init__.py

+66-14
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def get_recent_data(equity):
5757

5858
result = result[::-1]
5959

60-
df = pd.DataFrame.from_records([value.to_dict() for value in result])
60+
df = pd.DataFrame.from_records([value.equity_to_dict() for value in result])
6161
df.set_index('Date', inplace=True)
6262

6363
return df
@@ -84,12 +84,12 @@ def get_historical_data(equity, start, end):
8484
status = req.status_code
8585
html = BeautifulSoup(req.text, 'html.parser')
8686

87-
selection = html.select('div.instrumentHeader > h2.float_lang_base_1')
87+
selection = html.select('div.instrumentHeader > h2')
8888
for element in selection:
8989
header = element.text
9090

9191
params = {
92-
"curr_id": "558",
92+
"curr_id": row.id,
9393
"smlID": "1159685",
9494
"header": header,
9595
"st_date": start,
@@ -117,7 +117,6 @@ def get_historical_data(equity, start, end):
117117

118118
for element in selection:
119119
info = element.getText().strip().split()
120-
print(info)
121120

122121
stock_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y')
123122
stock_close = float(info[1].replace(',', '.'))
@@ -137,13 +136,13 @@ def get_historical_data(equity, start, end):
137136

138137
result = result[::-1]
139138

140-
df = pd.DataFrame.from_records([value.to_dict() for value in result])
139+
df = pd.DataFrame.from_records([value.equity_to_dict() for value in result])
141140
df.set_index('Date', inplace=True)
142141

143142
return df
144143

145144

146-
"""def get_fund_recent_data(fund):
145+
def get_fund_recent_data(fund):
147146
resource_package = __name__
148147
resource_path = '/'.join(('resources', 'funds.csv'))
149148
if pkg_resources.resource_exists(resource_package, resource_path):
@@ -176,18 +175,71 @@ def get_historical_data(equity, start, end):
176175
stock_open = float(info[2].replace(',', '.'))
177176
stock_max = float(info[3].replace(',', '.'))
178177
stock_min = float(info[4].replace(',', '.'))
179-
stock_volume = 0
180178

181-
if info[5].__contains__('M'):
182-
stock_volume = int(float(info[5].replace('M', '').replace(',', '.')) * 1000000)
183-
elif info[5].__contains__('B'):
184-
stock_volume = int(float(info[5].replace('B', '').replace(',', '.')) * 1000000000)
179+
result.insert(len(result), Data(stock_date, stock_close, stock_open, stock_max, stock_min, None,))
185180

186-
result.insert(len(result), Data(stock_date, stock_close, stock_open, stock_max, stock_min, stock_volume,))
181+
result = result[::-1]
182+
183+
df = pd.DataFrame.from_records([value.fund_to_dict() for value in result])
184+
df.set_index('Date', inplace=True)
185+
186+
return df
187+
188+
189+
def get_fund_historical_data(fund, start, end):
190+
resource_package = __name__
191+
resource_path = '/'.join(('resources', 'funds.csv'))
192+
if pkg_resources.resource_exists(resource_package, resource_path):
193+
funds = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
194+
else:
195+
names = fs.get_fund_names()
196+
funds = pd.DataFrame(names)
197+
198+
for row in funds.itertuples():
199+
if row.name.lower() == fund.lower():
200+
header = "Datos históricos " + row.symbol
201+
202+
params = {
203+
"curr_id": row.id,
204+
"smlID": "15361696",
205+
"header": header,
206+
"st_date": start,
207+
"end_date": end,
208+
"interval_sec": "Daily",
209+
"sort_col": "date",
210+
"sort_ord": "DESC",
211+
"action": "historical_data"
212+
}
213+
214+
head = {
215+
"User-Agent": ua.get_random(),
216+
"X-Requested-With": "XMLHttpRequest"
217+
}
218+
219+
url = "https://es.investing.com/instruments/HistoricalDataAjax"
220+
221+
req = requests.post(url, data=params, headers=head)
222+
223+
html = BeautifulSoup(req.content, 'html.parser')
224+
225+
selection = html.select('div#results_box > table#curr_table > tbody > tr')
226+
227+
result = list()
228+
229+
for element in selection:
230+
info = element.getText().strip().split()
231+
232+
stock_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y')
233+
stock_close = float(info[1].replace(',', '.'))
234+
stock_open = float(info[2].replace(',', '.'))
235+
stock_max = float(info[3].replace(',', '.'))
236+
stock_min = float(info[4].replace(',', '.'))
237+
238+
result.insert(len(result), Data(stock_date, stock_close, stock_open, stock_max, stock_min, None,))
187239

188240
result = result[::-1]
189241

190-
df = pd.DataFrame.from_records([value.to_dict() for value in result])
242+
df = pd.DataFrame.from_records([value.fund_to_dict() for value in result])
191243
df.set_index('Date', inplace=True)
192244

193-
return df"""
245+
return df

Diff for: investing_scrapper/equities.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,16 @@ def get_equity_names():
3333
results = list()
3434

3535
for element in selection:
36+
id_ = element.get("id")
37+
id_ = id_.replace('pair_', '')
3638
for nested in element.select("a"):
3739
info = nested.get("href")
3840
info = info.replace("/equities/", "")
3941

4042
data = {
4143
"name": nested.text,
42-
"tag": info
44+
"tag": info,
45+
"id": id_
4346
}
4447

4548
results.append(data)

Diff for: investing_scrapper/funds.py

+27-3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ def get_fund_names():
2525
results = list()
2626

2727
for element in selection:
28+
id_ = element.get('id')
29+
id_ = id_.replace('pair_', '')
30+
2831
symbol = None
2932
for symbol in element.select("td.symbol"):
3033
symbol = symbol.get("title")
@@ -37,13 +40,15 @@ def get_fund_names():
3740
data = {
3841
"name": nested.text,
3942
"symbol": symbol,
40-
"tag": info
43+
"tag": info,
44+
"id": id_
4145
}
4246
else:
4347
data = {
4448
"name": nested.text,
4549
"symbol": "undefined",
46-
"tag": info
50+
"tag": info,
51+
"id": id_
4752
}
4853

4954
results.append(data)
@@ -55,4 +60,23 @@ def get_fund_names():
5560
df = pd.DataFrame(results)
5661
df.to_csv(file, index=False)
5762

58-
return results
63+
return results
64+
65+
66+
# def get_id_value(fund):
67+
# url = "https://es.investing.com/funds/" + fund + "-historical-data"
68+
# headers = {
69+
# 'User-Agent': ua.get_random()
70+
# }
71+
#
72+
# req = requests.get(url, headers=headers)
73+
#
74+
# html = BeautifulSoup(req.text, 'html.parser')
75+
#
76+
# selection = html.select('div.js-inject-add-alert-widget > div')
77+
#
78+
# for element in selection:
79+
# id_ = element['data-pair-id']
80+
# return id_
81+
#
82+
# return 0

0 commit comments

Comments
 (0)