Skip to content

Commit f7e54c7

Browse files
authored
Merge pull request #33 from ScrapeGraphAI/js-error-handling
2 parents 5d90227 + e72de2e commit f7e54c7

7 files changed

+140
-6
lines changed

scrapegraph-js/README.md

+20
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,26 @@ const schema = z.object({
107107
})();
108108
```
109109

110+
### Search Scraping
111+
112+
Search and extract information from multiple web sources using AI.
113+
114+
```javascript
115+
import { searchScraper } from 'scrapegraph-js';
116+
117+
const apiKey = 'your-api-key';
118+
const prompt = 'What is the latest version of Python and what are its main features?';
119+
120+
(async () => {
121+
try {
122+
const response = await searchScraper(apiKey, prompt);
123+
console.log(response.result);
124+
} catch (error) {
125+
console.error('Error:', error);
126+
}
127+
})();
128+
```
129+
110130
### Scraping local HTML
111131

112132
Extract structured data from local HTML content
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import { getSearchScraperRequest } from 'scrapegraph-js';
2+
import 'dotenv/config';
3+
4+
const apiKey = process.env.SGAI_APIKEY;
5+
const requestId = '64801288-6e3b-41f3-9d94-07cff3829e15';
6+
7+
try {
8+
const requestInfo = await getSearchScraperRequest(apiKey, requestId);
9+
console.log(requestInfo);
10+
} catch (error) {
11+
console.error(error);
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import { searchScraper } from 'scrapegraph-js';
2+
import { z } from 'zod';
3+
import 'dotenv/config';
4+
5+
const apiKey = process.env.SGAI_APIKEY;
6+
const prompt = 'What is the latest version of Python and what are its main features?';
7+
8+
const schema = z.object({
9+
version: z.string().describe('The latest version'),
10+
release_date: z.string().describe('The release date of latest version'),
11+
major_features: z.array(z.string()),
12+
});
13+
14+
try {
15+
const response = await searchScraper(apiKey, prompt, schema);
16+
console.log(response.result);
17+
} catch (error) {
18+
console.error(error);
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import { searchScraper } from 'scrapegraph-js';
2+
import 'dotenv/config';
3+
4+
const apiKey = process.env.SGAI_APIKEY;
5+
const prompt = 'What is the latest version of Python and what are its main features?';
6+
7+
try {
8+
const response = await searchScraper(apiKey, prompt);
9+
console.log(response);
10+
} catch (error) {
11+
console.error(error);
12+
}

scrapegraph-js/index.js

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
export { smartScraper, getSmartScraperRequest } from './src/smartScraper.js';
22
export { markdownify, getMarkdownifyRequest } from './src/markdownify.js';
33
export { localScraper, getLocalScraperRequest } from './src/localScraper.js';
4+
export { searchScraper, getSearchScraperRequest } from './src/searchScraper.js';
45
export { getCredits } from './src/credits.js';
56
export { sendFeedback } from './src/feedback.js';

scrapegraph-js/src/searchScraper.js

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import axios from 'axios';
2+
import handleError from './utils/handleError.js';
3+
import { ZodType } from 'zod';
4+
import { zodToJsonSchema } from 'zod-to-json-schema';
5+
6+
/**
7+
* Search and extract information from multiple web sources using AI.
8+
*
9+
* @param {string} apiKey - Your ScrapeGraph AI API key
10+
* @param {string} prompt - Natural language prompt describing what data to extract
11+
* @param {Object} [schema] - Optional schema object defining the output structure
12+
* @param {String} userAgent - the user agent like "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
13+
* @returns {Promise<string>} Extracted data in JSON format matching the provided schema
14+
* @throws - Will throw an error in case of an HTTP failure.
15+
*/
16+
export async function searchScraper(apiKey, prompt, schema = null, userAgent = null) {
17+
const endpoint = 'https://api.scrapegraphai.com/v1/searchscraper';
18+
const headers = {
19+
'accept': 'application/json',
20+
'SGAI-APIKEY': apiKey,
21+
'Content-Type': 'application/json',
22+
};
23+
24+
if (userAgent) headers['User-Agent'] = userAgent;
25+
26+
const payload = {
27+
user_prompt: prompt,
28+
};
29+
30+
if (schema) {
31+
if (schema instanceof ZodType) {
32+
payload.output_schema = zodToJsonSchema(schema);
33+
} else {
34+
throw new Error('The schema must be an instance of a valid Zod schema');
35+
}
36+
}
37+
38+
try {
39+
const response = await axios.post(endpoint, payload, { headers });
40+
return response.data;
41+
} catch (error) {
42+
handleError(error);
43+
}
44+
}
45+
46+
/**
47+
* Retrieve the status or the result of searchScraper request. It also allows you to see the result of old requests.
48+
*
49+
* @param {string} apiKey - Your ScrapeGraph AI API key
50+
* @param {string} requestId - The request ID associated with the output of a searchScraper request.
51+
* @returns {Promise<string>} Information related to the status or result of a scraping request.
52+
*/
53+
export async function getSearchScraperRequest(apiKey, requestId) {
54+
const endpoint = 'https://api.scrapegraphai.com/v1/searchscraper/' + requestId;
55+
const headers = {
56+
'accept': 'application/json',
57+
'SGAI-APIKEY': apiKey,
58+
};
59+
60+
try {
61+
const response = await axios.get(endpoint, { headers });
62+
return response.data;
63+
} catch (error) {
64+
handleError(error);
65+
}
66+
}

scrapegraph-js/src/utils/handleError.js

+10-6
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
class HttpError extends Error {
2-
constructor(statusCode, title, detail) {
3-
super(HttpError.makeMessage(statusCode, title, detail));
2+
constructor(statusCode, title, data) {
3+
super(HttpError.makeMessage(statusCode, title, data));
44
this.statusCode = statusCode;
55
this.title = title;
6-
this.detail = detail;
6+
this.info = data;
77
}
88

9-
static makeMessage(statusCode, title, detail) {
9+
static makeMessage(statusCode, title, data) {
1010
let message = '';
1111

1212
message += statusCode ? `${statusCode} - ` : '(unknown status code) - ';
1313
message += title ? `${title} - ` : '(unknown error message) - ';
14-
message += detail ? `${JSON.stringify(detail)}` : '(unknown error detail)';
14+
message += data.detail
15+
? 'Error located in: ' + `${JSON.stringify(data.detail[0].loc)}` + ', ' + `${data.detail[0].msg}`
16+
: data.error
17+
? `${data.error}`
18+
: '(unknown error detail)';
1519

1620
return message;
1721
}
@@ -31,7 +35,7 @@ class UnexpectedError extends Error {
3135

3236
export default function handleError(error) {
3337
if (error.response) {
34-
throw new HttpError(error.response.status, error.response.statusText, error.response.data.detail);
38+
throw new HttpError(error.response.status, error.response.statusText, error.response.data);
3539
} else if (error.request) {
3640
throw new NetworkError('Impossible to contact the server. Check your internet connection.');
3741
} else {

0 commit comments

Comments
 (0)