@@ -39,6 +39,7 @@ The SDK provides four main functionalities:
39
39
``` python
40
40
from scrapegraph_py import ScrapeGraphClient, scrape
41
41
from dotenv import load_dotenv
42
+ import os
42
43
43
44
load_dotenv()
44
45
api_key = os.getenv(" SCRAPEGRAPH_API_KEY" )
@@ -58,6 +59,14 @@ You can also scrape content from local HTML files:
58
59
``` python
59
60
from scrapegraph_py import ScrapeGraphClient, scrape_text
60
61
from bs4 import BeautifulSoup
62
+ import os
63
+
64
+ # Load environment variables
65
+ from dotenv import load_dotenv
66
+ load_dotenv()
67
+ api_key = os.getenv(" SCRAPEGRAPH_API_KEY" )
68
+
69
+ client = ScrapeGraphClient(api_key)
61
70
62
71
def scrape_local_html (client : ScrapeGraphClient, file_path : str , prompt : str ):
63
72
with open (file_path, ' r' , encoding = ' utf-8' ) as file :
@@ -71,7 +80,6 @@ def scrape_local_html(client: ScrapeGraphClient, file_path: str, prompt: str):
71
80
return scrape_text(client, text_content, prompt)
72
81
73
82
# Usage
74
- client = ScrapeGraphClient(api_key)
75
83
result = scrape_local_html(
76
84
client,
77
85
' sample.html' ,
@@ -87,6 +95,12 @@ For more structured data extraction, you can define a Pydantic schema:
87
95
``` python
88
96
from pydantic import BaseModel, Field
89
97
from scrapegraph_py import scrape
98
+ import os
99
+
100
+ # Load environment variables
101
+ from dotenv import load_dotenv
102
+ load_dotenv()
103
+ api_key = os.getenv(" SCRAPEGRAPH_API_KEY" )
90
104
91
105
class CompanyInfoSchema (BaseModel ):
92
106
company_name: str = Field(description = " The name of the company" )
@@ -109,6 +123,12 @@ Monitor your API usage:
109
123
110
124
``` python
111
125
from scrapegraph_py import credits
126
+ import os
127
+
128
+ # Load environment variables
129
+ from dotenv import load_dotenv
130
+ load_dotenv()
131
+ api_key = os.getenv(" SCRAPEGRAPH_API_KEY" )
112
132
113
133
response = credits (api_key)
114
134
print (response)
@@ -120,6 +140,12 @@ You can provide feedback on scraping results and check the API status:
120
140
121
141
``` python
122
142
from scrapegraph_py import feedback, status
143
+ import os
144
+
145
+ # Load environment variables
146
+ from dotenv import load_dotenv
147
+ load_dotenv()
148
+ api_key = os.getenv(" SCRAPEGRAPH_API_KEY" )
123
149
124
150
# Check API status
125
151
status_response = status(api_key)
@@ -135,6 +161,33 @@ feedback_response = feedback(
135
161
print (f " Feedback Response: { feedback_response} " )
136
162
```
137
163
164
+ ## Expected Output Example
165
+
166
+ The following is an example of the expected output when scraping articles from a webpage:
167
+
168
+ ``` json
169
+ {
170
+ "articles" : [
171
+ {
172
+ "title" : " Thousands of People Are Cloning Their Dead Pets. This Is the Woman They Call First" ,
173
+ "url" : " https://www.wired.com/story/your-next-job-pet-cloner/"
174
+ },
175
+ {
176
+ "title" : " The Quantum Geometry That Exists Outside of Space and Time" ,
177
+ "url" : " https://www.wired.com/story/physicists-reveal-a-quantum-geometry-that-exists-outside-of-space-and-time/"
178
+ },
179
+ {
180
+ "title" : " How a PhD Student Discovered a Lost Mayan City From Hundreds of Miles Away" ,
181
+ "url" : " https://www.wired.com/story/lost-maya-city-valeriana-interview/"
182
+ },
183
+ {
184
+ "title" : " The Maker of Ozempic Is Trying to Block Compounded Versions of Its Blockbuster Drug" ,
185
+ "url" : " https://www.wired.com/story/novo-nordisk-ozempic-compounded-fda-block-pharmacies/"
186
+ }
187
+ ]
188
+ }
189
+ ```
190
+
138
191
## Development
139
192
140
193
### Requirements
@@ -196,3 +249,4 @@ For support:
196
249
- Contact our support team
197
250
- Check the examples in the ` examples/ ` directory
198
251
252
+
0 commit comments