Skip to content

Commit 5263f0e

Browse files
author
alvarob96
committed
updated resources and requirements & fixed errors
- fixed scraping errors (xpath) - added todo tasks - completed done tasks - 0.8.5 version preparation - efficiency/errors checks need
1 parent e7cd86f commit 5263f0e

16 files changed

+654
-584
lines changed

Diff for: .idea/.DS_Store

6 KB
Binary file not shown.

Diff for: .idea/dictionaries/.DS_Store

6 KB
Binary file not shown.

Diff for: .idea/dictionaries/alvarobartt.xml

-9
This file was deleted.

Diff for: .idea/workspace.xml

+463-427
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: .travis.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ dist: xenial
99

1010
install:
1111
- pip install pandas==0.24.2
12-
- pip install requests==2.21.0
12+
- pip install requests==2.22.0
1313
- pip install lxml==4.3.3
1414
- pip install unidecode==1.0.23
15-
- pip install investpy==0.8.4.3
16-
- pip install pytest==4.1.1
15+
- pip install investpy==0.8.4.5
16+
- pip install pytest==4.5
1717

1818
script:
1919
- pytest

Diff for: README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ To conclude this section, I am in the need to specify that this is not the final
1616

1717
In order to get this package working you will need to install [**investpy**](https://pypi.org/project/investpy/) from PyPi via Terminal typing:
1818

19-
``pip install investpy==0.8.4.4``
19+
``pip install investpy==0.8.4.5``
2020

2121
All the dependencies are already listed on the setup file of the package, but to sum them up, you will need the following requirements:
2222

@@ -51,6 +51,7 @@ If needed you can open an [issue](https://github.com/alvarob96/investpy/issues)
5151
* Updated docstrings as reStructuredText (via PyCharm)
5252
* Modified JSON output to fit current standard for historical data
5353
* Added function to retrieve information from listed ETFs (id, name, symbol and tag)
54+
*
5455

5556
## Additional Information
5657

Diff for: investpy/Data.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,13 @@ def fund_to_dict(self):
7575
}
7676

7777
def fund_as_json(self):
78-
return {self.date.strftime('%d/%m/%Y'): {
79-
'Open': self.open,
80-
'High': self.high,
81-
'Low': self.low,
82-
'Close': self.close,
83-
}}
78+
return {
79+
'date': self.date.strftime('%d/%m/%Y'),
80+
'open': self.open,
81+
'high': self.high,
82+
'low': self.low,
83+
'close': self.close,
84+
}
8485

8586
def etf_to_dict(self):
8687
return {

Diff for: investpy/__init__.py

+23-9
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
__author__ = "Alvaro Bartolome <[email protected]>"
77

88
import datetime
9+
import json
910
from random import randint
10-
import unidecode
1111

1212
import pandas as pd
1313
import pkg_resources
1414
import requests
15-
import json
15+
import unidecode
1616
from lxml.html import fromstring
1717

1818
from investpy import user_agent as ua, equities as ts, funds as fs, etfs as es
@@ -22,6 +22,8 @@
2222
# TODO: add country/market param and mapping of ‘resources/available_markets’ in order to allow users retrieve
2323
# historical data from different markets.
2424

25+
# DONE: available_languages replaced by available_markets
26+
2527
# TODO: create thread pools to increase scraping efficiency and improve ‘investpy’ performance => CHECK BOOK DOC
2628

2729
# TODO: generate sphinx documentation for version 1.0
@@ -51,6 +53,12 @@
5153

5254
# TODO: fix dosctrings and unify structure with Google docstrings or similar
5355

56+
# WARNING: RE-GENERATE MARKET FILES BEFORE EVERY RELEASE
57+
58+
# TODO: add 'clase de activo', 'isin' and 'emisor' to funds
59+
60+
# DONE: updated equities, funds and etfs retrieval functions
61+
5462

5563
def get_equities_list():
5664
"""
@@ -164,10 +172,12 @@ def get_recent_data(equity, as_json=False, order='ascending'):
164172
'recent':
165173
[value.equity_as_json() for value in result]
166174
}
167-
return json.dumps(json_)
175+
176+
return json.dumps(json_, sort_keys=False)
168177
elif as_json is False:
169178
df = pd.DataFrame.from_records([value.equity_to_dict() for value in result])
170179
df.set_index('Date', inplace=True)
180+
171181
return df
172182
else:
173183
raise RuntimeError("ERR#004: data retrieval error while scraping.")
@@ -364,7 +374,7 @@ def get_historical_data(equity, start, end, as_json=False, order='ascending'):
364374
raise RuntimeError("ERR#004: data retrieval error while scraping.")
365375

366376
if as_json is True:
367-
return json.dumps(final)
377+
return json.dumps(final, sort_keys=False)
368378
elif as_json is False:
369379
return pd.concat(final)
370380
else:
@@ -592,10 +602,12 @@ def get_fund_recent_data(fund, as_json=False, order='ascending'):
592602
'recent':
593603
[value.fund_as_json() for value in result]
594604
}
595-
return json.dumps(json_)
605+
606+
return json.dumps(json_, sort_keys=False)
596607
elif as_json is False:
597608
df = pd.DataFrame.from_records([value.fund_to_dict() for value in result])
598609
df.set_index('Date', inplace=True)
610+
599611
return df
600612

601613
else:
@@ -768,7 +780,7 @@ def get_fund_historical_data(fund, start, end, as_json=False, order='ascending')
768780
raise RuntimeError("ERR#004: data retrieval error while scraping.")
769781

770782
if as_json is True:
771-
return json.dumps(final)
783+
return json.dumps(final, sort_keys=False)
772784
elif as_json is False:
773785
return pd.concat(final)
774786
else:
@@ -1032,10 +1044,12 @@ def get_etf_recent_data(etf, as_json=False, order='ascending'):
10321044
'recent':
10331045
[value.etf_as_json() for value in result]
10341046
}
1035-
return json.dumps(json_)
1047+
1048+
return json.dumps(json_, sort_keys=False)
10361049
elif as_json is False:
10371050
df = pd.DataFrame.from_records([value.etf_to_dict() for value in result])
10381051
df.set_index('Date', inplace=True)
1052+
10391053
return df
10401054

10411055
else:
@@ -1201,14 +1215,14 @@ def get_etf_historical_data(etf, start, end, as_json=False, order='ascending'):
12011215
final.append(json_)
12021216
elif as_json is False:
12031217
df = pd.DataFrame.from_records([value.etf_to_dict() for value in result])
1204-
df.set_index('date', inplace=True)
1218+
df.set_index('Date', inplace=True)
12051219

12061220
final.append(df)
12071221
else:
12081222
raise RuntimeError("ERR#004: data retrieval error while scraping.")
12091223

12101224
if as_json is True:
1211-
return json.dumps(final)
1225+
return json.dumps(final, sort_keys=False)
12121226
elif as_json is False:
12131227
return pd.concat(final)
12141228
else:

Diff for: investpy/equities.py

+25-11
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ def get_equity_names():
4949
raise ConnectionError("ERR#015: error " + req.status_code + ", try again later.")
5050

5151
root_ = fromstring(req.text)
52-
path_ = root_.xpath(".//table[@id='cross_rate_markets_stocks_1']/tbody/tr")
52+
path_ = root_.xpath(".//table[@id='cross_rate_markets_stocks_1']"
53+
"/tbody"
54+
"/tr")
5355

5456
results = list()
5557

@@ -60,7 +62,11 @@ def get_equity_names():
6062
for element_ in elements_.xpath('.//a'):
6163
tag_ = element_.get('href').replace('/equities/', '')
6264
full_name_ = element_.get('title').replace(' (CFD)', '')
63-
isin_ = get_isin_code(tag_)
65+
66+
try:
67+
isin_ = get_isin_code(tag_)
68+
except (ConnectionError, IndexError):
69+
isin_ = None
6470

6571
data = {
6672
"name": element_.text,
@@ -105,22 +111,30 @@ def get_isin_code(info):
105111
req = requests.get(url, headers=head, timeout=5)
106112

107113
if req.status_code != 200:
108-
# raise ConnectionError("ERR#015: error " + req.status_code + ", try again later.")
109-
return None
114+
raise ConnectionError("ERR#015: error " + req.status_code + ", try again later.")
110115

111116
root_ = fromstring(req.text)
112-
path_ = root_.xpath("/html/body/div[5]/section/div[4]/div[1]/div[2]/div[3]/span[2]")
117+
path_ = root_.xpath(".//div[contains(@class, 'overViewBox')]"
118+
"/div[@id='quotes_summary_current_data']"
119+
"/div[@class='right']"
120+
"/div")
113121

114-
code = None
115-
116-
if path_:
122+
for p in path_:
117123
try:
118-
code = path_[0].text_content().rstrip()
119-
time.sleep(.5)
124+
if p.xpath("span[not(@class)]")[0].text_content().__contains__('ISIN'):
125+
try:
126+
code = p.xpath("span[@class='elp']")[0].text_content().rstrip()
127+
time.sleep(.5)
128+
129+
return code
130+
except IndexError:
131+
raise IndexError("ERR#017: isin code unavailable or not found.")
132+
else:
133+
continue
120134
except IndexError:
121135
raise IndexError("ERR#017: isin code unavailable or not found.")
122136

123-
return code
137+
return None
124138

125139

126140
def list_equities():

Diff for: investpy/etfs.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
__author__ = "Alvaro Bartolome <[email protected]>"
77

8+
import time
9+
810
import pandas as pd
911
import requests
1012
import json
@@ -40,7 +42,9 @@ def get_etf_names():
4042
raise ConnectionError("ERR#015: error " + req.status_code + ", try again later.")
4143

4244
root_ = fromstring(req.text)
43-
path_ = root_.xpath(".//table[@id='etfs']/tbody/tr")
45+
path_ = root_.xpath(".//table[@id='etfs']"
46+
"/tbody"
47+
"/tr")
4448

4549
results = list()
4650

@@ -99,11 +103,11 @@ def list_etfs():
99103

100104
if etfs is None:
101105
raise IOError("ERR#009: etf list not found or unable to retrieve.")
102-
103-
return etfs['name'].tolist()
106+
else:
107+
return etfs['name'].tolist()
104108

105109

106-
def dict_etfs(columns=['id', 'name', 'symbol', 'tag'], as_json=False):
110+
def dict_etfs(columns=None, as_json=False):
107111
"""
108112
This function retrieves all the available etfs and returns a dictionary with the specified columns.
109113
Available columns are: 'id', 'name', 'symbol' and 'tag'
@@ -114,8 +118,11 @@ def dict_etfs(columns=['id', 'name', 'symbol', 'tag'], as_json=False):
114118
:returns a dictionary that contains all the available etf values specified in the columns
115119
"""
116120

117-
if not isinstance(columns, list):
118-
raise ValueError("ERR#020: specified columns argument is not a list, it can just be list type.")
121+
if columns is None:
122+
columns = ['id', 'name', 'symbol', 'tag']
123+
else:
124+
if not isinstance(columns, list):
125+
raise ValueError("ERR#020: specified columns argument is not a list, it can just be list type.")
119126

120127
if not isinstance(as_json, bool):
121128
raise ValueError("ERR#002: as_json argument can just be True or False, bool type.")

Diff for: investpy/funds.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ def get_fund_names():
4040
raise ConnectionError("ERR#015: error " + req.status_code + ", try again later.")
4141

4242
root_ = fromstring(req.text)
43-
path_ = root_.xpath(".//table[@id='etfs']/tbody/tr")
43+
path_ = root_.xpath(".//table[@id='etfs']"
44+
"/tbody"
45+
"/tr")
4446

4547
results = list()
4648

@@ -49,19 +51,19 @@ def get_fund_names():
4951
id_ = elements_.get('id').replace('pair_', '')
5052
symbol = elements_.xpath(".//td[contains(@class, 'symbol')]")[0].get('title')
5153

52-
nested = elements_.xpath(".//a")[0]
53-
info = nested.get('href').replace('/funds/', '')
54+
nested = elements_.xpath(".//a")[0].get('title').rstrip()
55+
info = elements_.xpath(".//a")[0].get('href').replace('/funds/', '')
5456

5557
if symbol:
5658
data = {
57-
"name": nested.text,
59+
"name": nested,
5860
"symbol": symbol,
5961
"tag": info,
6062
"id": id_
6163
}
6264
else:
6365
data = {
64-
"name": nested.text,
66+
"name": nested,
6567
"symbol": "undefined",
6668
"tag": info,
6769
"id": id_
@@ -107,7 +109,7 @@ def fund_information_to_json(df):
107109
'Category': str(df['Category'][0])
108110
}
109111

110-
result = json.dumps(json_)
112+
result = json.dumps(json_, sort_keys=False)
111113

112114
return result
113115

@@ -132,5 +134,9 @@ def list_funds():
132134

133135
if funds is None:
134136
raise IOError("ERR#005: fund list not found or unable to retrieve.")
137+
else:
138+
return funds['name'].tolist()
139+
135140

136-
return funds['name'].tolist()
141+
if __name__ == '__main__':
142+
get_fund_names()

Diff for: investpy/resources/es/equities.csv

+1-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ PharmaMar S.A.,959215,ES0169501030,PharmaMar,pharma-mar-sau
164164
Gigas Hosting SA,960710,ES0105093001,Gigas Hosting SA,gigas-hosting-sa
165165
Neol Biosolutions SA,960711,ES0105081006,Neol Biosolutions SA,neol-biosolutions-sa
166166
Agile Content SA,961658,ES0105102000,Agile Content SA,agile-content-sa
167-
Think Smart SA,961659,ES0105097002,Think Smart SA,think-smart-sa
168167
Zambal Spain Socimi SA,961773,ES0105080008,Zambal Spain Socimi SA,zambal-spain-socimi-sa
169168
Oryzon Genomics SA,962024,ES0167733015,Oryzon Genomics,oryzon-genomics-sa
170169
Tecnoquark Trust SA,962349,ES0105076006,Tecnoquark Trust SA,tecnoquark-trust-sa
@@ -234,3 +233,4 @@ Euripo Properties Socimi,1122284,ES0105387007,Euripo Properties,euripo-propertie
234233
Proeduca Altus SA,1123486,ES0105400008,Proeduca Altus,proeduca-altus
235234
Meridia Real Estate III Socimi SA,1123955,ES0105313003,Meridia RE III,meridia-re-iii
236235
Iffe Futura SA,1123956,ES0171613005,Iffe Futura,iffe-futura
236+
Compania Espanola de Viviendas en Alquiler SA,1130889,ES0132955008,Viviendas en Alquiler,viviendas-en-alquiler

Diff for: investpy/resources/es/etfs.csv

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
id,name,symbol,tag
22
37633,Lyxor Ibex 35 Doble Inverso Diario,2INVE,lyxor-ibex-35-x2-inverso
33
37631,Lyxor Ibex 35 Doble Apalancado Diario C-EUR,IBEXA,lyxor-ibex-35-doble-apalancado
4-
37632,Accion IBEX 35 Cotizado Armonizado FI,BBVAI,bbva-accion-ibex-35
54
47649,BBVA Accion DJ Eurostoxx 50,BBVAE,bbva-accion-dj-eurostoxx-50
6-
38897,Lyxor Ibex 35 Inverso Diario,INVEX,lyxor-ibex-35-invers
75
38898,Lyxor Ibex35 (DR) D-EUR,LYXIB,lyxor-ibex-35
6+
37632,Accion IBEX 35 Cotizado Armonizado FI,BBVAI,bbva-accion-ibex-35
7+
38897,Lyxor Ibex 35 Inverso Diario,INVEX,lyxor-ibex-35-invers

0 commit comments

Comments
 (0)