Skip to content

[Components] scrapingant #13316 #15493

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import app from "../../scrapingant.app.mjs";

export default {
key: "scrapingant-general-extraction",
name: "General Extraction",
description: "Send a request using the standard extraction method of ScrapingAnt. [See the documentation](https://docs.scrapingant.com/request-response-format)",
version: "0.0.1",
type: "action",
props: {
app,
url: {
propDefinition: [
app,
"url",
],
},
browser: {
propDefinition: [
app,
"browser",
],
reloadProps: true,
},
returnPageSource: {
propDefinition: [
app,
"returnPageSource",
],
disabled: true,
hidden: true,
},
cookies: {
propDefinition: [
app,
"cookies",
],
},
jsSnippet: {
propDefinition: [
app,
"jsSnippet",
],
disabled: true,
hidden: true,
},
proxyType: {
propDefinition: [
app,
"proxyType",
],
},
proxyCountry: {
propDefinition: [
app,
"proxyCountry",
],
},
waitForSelector: {
propDefinition: [
app,
"waitForSelector",
],
},
blockResource: {
propDefinition: [
app,
"blockResource",
],
disabled: true,
hidden: true,
},
},
async additionalProps(existingProps) {
const props = {};
if (this.browser) {
existingProps.returnPageSource.hidden = false;
existingProps.returnPageSource.disabled = false;
}
if (this.browser) {
existingProps.jsSnippet.hidden = false;
existingProps.jsSnippet.disabled = false;
}
if (this.browser) {
existingProps.blockResource.hidden = false;
existingProps.blockResource.disabled = false;
}
return props;
},

async run({ $ }) {
const response = await this.app.generalExtraction({
$,
params: {
url: this.url,
browser: this.browser,
return_page_source: this.returnPageSource,
cookies: this.cookies,
js_snippet: this.jsSnippet,
proxy_type: this.proxyType,
proxy_country: this.proxyCountry,
wait_for_selector: this.waitForSelector,
block_resource: this.blockResource,
},
});
$.export("$summary", "Successfully sent the request to ScrapingAnt");
return response;
},
};
126 changes: 126 additions & 0 deletions components/scrapingant/common/constants.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
export default {
PROXY_COUNTRIES: [
{
label: "World",
value: "",
},
{
label: "Brazil",
value: "BR",
},
{
label: "Canada",
value: "CA",
},
{
label: "China",
value: "CN",
},
{
label: "Czech Republic",
value: "CZ",
},
{
label: "France",
value: "FR",
},
{
label: "Germany",
value: "DE",
},
{
label: "Hong Kong",
value: "HK",
},
{
label: "India",
value: "IN",
},
{
label: "Indonesia",
value: "ID",
},
{
label: "Italy",
value: "IT",
},
{
label: "Israel",
value: "IL",
},
{
label: "Japan",
value: "JP",
},
{
label: "Netherlands",
value: "NL",
},
{
label: "Poland",
value: "PL",
},
{
label: "Russia",
value: "RU",
},
{
label: "Saudi Arabia",
value: "SA",
},
{
label: "Singapore",
value: "SG",
},
{
label: "South Korea",
value: "KR",
},
{
label: "Spain",
value: "ES",
},
{
label: "United Kingdom",
value: "GB",
},
{
label: "United Arab Emirates",
value: "AE",
},
{
label: "USA",
value: "US",
},
{
label: "Vietnam",
value: "VN",
},
],
PROXY_TYPES: [
{
label: "Residential",
value: "residential",
},
{
label: "Datacenter",
value: "datacenter",
},
],
RESOURCE_TYPES: [
"document",
"stylesheet",
"image",
"media",
"font",
"script",
"texttrack",
"xhr",
"fetch",
"eventsource",
"websocket",
"manifest",
"other",
],

};
5 changes: 4 additions & 1 deletion components/scrapingant/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@pipedream/scrapingant",
"version": "0.0.1",
"version": "0.1.0",
"description": "Pipedream ScrapingAnt Components",
"main": "scrapingant.app.mjs",
"keywords": [
Expand All @@ -11,5 +11,8 @@
"author": "Pipedream <[email protected]> (https://pipedream.com/)",
"publishConfig": {
"access": "public"
},
"dependencies": {
"@pipedream/platform": "^3.0.3"
}
}

Check failure on line 18 in components/scrapingant/package.json

View workflow job for this annotation

GitHub Actions / Lint Code Base

Newline required at end of file but not found
89 changes: 85 additions & 4 deletions components/scrapingant/scrapingant.app.mjs
Original file line number Diff line number Diff line change
@@ -1,11 +1,92 @@
import { axios } from "@pipedream/platform";
import constants from "./common/constants.mjs";

export default {
type: "app",
app: "scrapingant",
propDefinitions: {},
propDefinitions: {
url: {
type: "string",
label: "URL",
description: "The URL to scrape",
},
browser: {
type: "boolean",
label: "Browser",
description: "Enables using a headless browser for scraping",
optional: true,
},
returnPageSource: {
type: "boolean",
label: "Return Page Source",
description: "Enables returning data returned by the server and unaltered by the browser. When true JS won't be rendered",
optional: true,
},
cookies: {
type: "string",
label: "Cookies",
description: "Cookies to pass with a scraping request to the target site, i.e.: `cookie_name1=cookie_value1;cookie_name2=cookie_value2`",
optional: true,
},
jsSnippet: {
type: "string",
label: "JS Snippet",
description: "Base64 encoded JS snippet to run once page being loaded in the ScrapingAnt browser",
optional: true,
},
proxyType: {
type: "string",
label: "Proxy Type",
description: "Specifies the proxy type to make the request from",
options: constants.PROXY_TYPES,
optional: true,
},
proxyCountry: {
type: "string",
label: "Proxy Country",
description: "Specifies the proxy country to make the request from",
options: constants.PROXY_COUNTRIES,
optional: true,
},
waitForSelector: {
type: "string",
label: "Wait for Selector",
description: "The CSS selector of the element Scrapingant will wait for before returning the result",
optional: true,
},
blockResource: {
type: "string[]",
label: "Block Resource",
description: "Prevents cloud browser from loading specified resource types",
options: constants.RESOURCE_TYPES,
optional: true,
},
},
methods: {
// this.$auth contains connected account data
authKeys() {
console.log(Object.keys(this.$auth));
_baseUrl() {
return "https://api.scrapingant.com/v2";
},
async _makeRequest(opts = {}) {
const {
$ = this,
path,
headers,
...otherOpts
} = opts;
return axios($, {
...otherOpts,
url: this._baseUrl() + path,
headers: {
...headers,
"x-api-key": `${this.$auth.api_token}`,
},
});
},
async generalExtraction(args = {}) {
return this._makeRequest({
path: "/general",
...args,
});
},
},
};
6 changes: 5 additions & 1 deletion pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading