Skip to content

Commit a2bdd59

Browse files
authored
Firecrawl - Extract actions (#16069)
* new extract actions * update * update * version
1 parent 23d2606 commit a2bdd59

File tree

11 files changed

+145
-8
lines changed

11 files changed

+145
-8
lines changed

components/bloomerang/bloomerang.app.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ export default {
88
console.log(Object.keys(this.$auth));
99
},
1010
},
11-
};
11+
};

components/firecrawl/actions/crawl-url/crawl-url.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ export default {
55
key: "firecrawl-crawl-url",
66
name: "Crawl URL",
77
description: "Crawls a given URL and returns the contents of sub-pages. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-post)",
8-
version: "1.0.1",
8+
version: "1.0.2",
99
type: "action",
1010
props: {
1111
firecrawl,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import firecrawl from "../../firecrawl.app.mjs";
2+
import { ConfigurationError } from "@pipedream/platform";
3+
import { parseObjectEntries } from "../../common/utils.mjs";
4+
5+
export default {
6+
key: "firecrawl-extract-data",
7+
name: "Extract Data",
8+
description: "Extract structured data from one or multiple URLs. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/extract)",
9+
version: "0.0.1",
10+
type: "action",
11+
props: {
12+
firecrawl,
13+
urls: {
14+
type: "string[]",
15+
label: "URLs",
16+
description: "An array of one or more URLs. Supports wildcards (/*) for broader crawling.",
17+
},
18+
prompt: {
19+
type: "string",
20+
label: "Prompt",
21+
description: "(Optional unless no schema): A natural language prompt describing the data you want or specifying how you want that data structured.",
22+
optional: true,
23+
},
24+
schema: {
25+
type: "object",
26+
label: "Schema",
27+
description: "(Optional unless no prompt): A more rigid structure if you already know the JSON layout.",
28+
optional: true,
29+
},
30+
enableWebSearch: {
31+
type: "boolean",
32+
label: "Enable Web Search",
33+
description: "When `true`, the extraction will use web search to find additional data",
34+
optional: true,
35+
},
36+
ignoreSitemap: {
37+
type: "boolean",
38+
label: "Ignore Sitemap",
39+
description: "When true, sitemap.xml files will be ignored during website scanning",
40+
optional: true,
41+
},
42+
includeSubdomains: {
43+
type: "boolean",
44+
label: "Include Subdomains",
45+
description: "When true, subdomains of the provided URLs will also be scanned",
46+
optional: true,
47+
},
48+
showSources: {
49+
type: "boolean",
50+
label: "Show Sources",
51+
description: "When true, the sources used to extract the data will be included in the response",
52+
optional: true,
53+
},
54+
waitForCompletion: {
55+
type: "boolean",
56+
label: "Wait For Completion",
57+
description: "Set to `true` to poll the API in 3-second intervals until the job is completed",
58+
optional: true,
59+
},
60+
},
61+
async run({ $ }) {
62+
if (!this.prompt && !this.schema) {
63+
throw new ConfigurationError("Must enter one of Prompt or Schema");
64+
}
65+
66+
let response = await this.firecrawl.extract({
67+
$,
68+
data: {
69+
urls: this.urls,
70+
prompt: this.prompt,
71+
schema: this.schema && parseObjectEntries(this.schema),
72+
enableWebSearch: this.enableWebSearch,
73+
ignoreSitemap: this.ignoreSitemap,
74+
includeSubdomains: this.includeSubdomains,
75+
showSources: this.showSources,
76+
},
77+
});
78+
79+
if (this.waitForCompletion) {
80+
const id = response.id;
81+
const timer = (ms) => new Promise((res) => setTimeout(res, ms));
82+
do {
83+
response = await this.firecrawl.getExtractStatus({
84+
$,
85+
id,
86+
});
87+
await timer(3000);
88+
} while (response.status === "processing");
89+
}
90+
91+
if (response.success) {
92+
$.export("$summary", "Successfully extracted data.");
93+
}
94+
return response;
95+
},
96+
};

components/firecrawl/actions/get-crawl-status/get-crawl-status.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ export default {
44
key: "firecrawl-get-crawl-status",
55
name: "Get Crawl Data",
66
description: "Obtains the status and data from a previous crawl operation. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-get)",
7-
version: "0.0.2",
7+
version: "0.0.3",
88
type: "action",
99
props: {
1010
firecrawl,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import firecrawl from "../../firecrawl.app.mjs";
2+
3+
export default {
4+
key: "firecrawl-get-extract-status",
5+
name: "Get Extract Data",
6+
description: "Obtains the status and data from a previous extract operation. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/extract-get)",
7+
version: "0.0.1",
8+
type: "action",
9+
props: {
10+
firecrawl,
11+
extractId: {
12+
type: "string",
13+
label: "Extract Job ID",
14+
description: "The ID of the extract job",
15+
},
16+
},
17+
async run({ $ }) {
18+
const response = await this.firecrawl.getExtractStatus({
19+
$,
20+
id: this.extractId,
21+
});
22+
23+
$.export("$summary", `Successfully retrieved status for extract (ID: ${this.extractId})`);
24+
return response;
25+
},
26+
};

components/firecrawl/actions/scrape-page/scrape-page.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ export default {
77
name: "Scrape Page",
88
description:
99
"Scrapes a URL and returns content from that page. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/scrape)",
10-
version: "1.0.0",
10+
version: "1.0.1",
1111
type: "action",
1212
props: {
1313
firecrawl,

components/firecrawl/firecrawl.app.mjs

+15
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,20 @@ export default {
6262
...opts,
6363
});
6464
},
65+
extract(opts = {}) {
66+
return this._makeRequest({
67+
method: "POST",
68+
path: "/extract",
69+
...opts,
70+
});
71+
},
72+
getExtractStatus({
73+
id, ...opts
74+
}) {
75+
return this._makeRequest({
76+
path: `/extract/${id}`,
77+
...opts,
78+
});
79+
},
6580
},
6681
};

components/firecrawl/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@pipedream/firecrawl",
3-
"version": "1.0.1",
3+
"version": "1.1.0",
44
"description": "Pipedream FireCrawl Components",
55
"main": "firecrawl.app.mjs",
66
"keywords": [

components/hyperbrowser/hyperbrowser.app.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ export default {
88
console.log(Object.keys(this.$auth));
99
},
1010
},
11-
};
11+
};

components/nutrient_workflow_automation/nutrient_workflow_automation.app.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ export default {
88
console.log(Object.keys(this.$auth));
99
},
1010
},
11-
};
11+
};

components/oracle_cloud_infrastructure/oracle_cloud_infrastructure.app.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ export default {
88
console.log(Object.keys(this.$auth));
99
},
1010
},
11-
};
11+
};

0 commit comments

Comments
 (0)