diff --git a/components/scrapingant/actions/general-extraction/general-extraction.mjs b/components/scrapingant/actions/general-extraction/general-extraction.mjs new file mode 100644 index 0000000000000..ec2b79e49dc49 --- /dev/null +++ b/components/scrapingant/actions/general-extraction/general-extraction.mjs @@ -0,0 +1,108 @@ +import app from "../../scrapingant.app.mjs"; + +export default { + key: "scrapingant-general-extraction", + name: "General Extraction", + description: "Send a request using the standard extraction method of ScrapingAnt. [See the documentation](https://docs.scrapingant.com/request-response-format)", + version: "0.0.1", + type: "action", + props: { + app, + url: { + propDefinition: [ + app, + "url", + ], + }, + browser: { + propDefinition: [ + app, + "browser", + ], + reloadProps: true, + }, + returnPageSource: { + propDefinition: [ + app, + "returnPageSource", + ], + disabled: true, + hidden: true, + }, + cookies: { + propDefinition: [ + app, + "cookies", + ], + }, + jsSnippet: { + propDefinition: [ + app, + "jsSnippet", + ], + disabled: true, + hidden: true, + }, + proxyType: { + propDefinition: [ + app, + "proxyType", + ], + }, + proxyCountry: { + propDefinition: [ + app, + "proxyCountry", + ], + }, + waitForSelector: { + propDefinition: [ + app, + "waitForSelector", + ], + }, + blockResource: { + propDefinition: [ + app, + "blockResource", + ], + disabled: true, + hidden: true, + }, + }, + async additionalProps(existingProps) { + const props = {}; + if (this.browser) { + existingProps.returnPageSource.hidden = false; + existingProps.returnPageSource.disabled = false; + } + if (this.browser) { + existingProps.jsSnippet.hidden = false; + existingProps.jsSnippet.disabled = false; + } + if (this.browser) { + existingProps.blockResource.hidden = false; + existingProps.blockResource.disabled = false; + } + return props; + }, + + async run({ $ }) { + const response = await this.app.generalExtraction({ + $, + params: { + url: this.url, + browser: this.browser, + return_page_source: this.returnPageSource, + cookies: this.cookies, + js_snippet: this.jsSnippet, + proxy_type: this.proxyType, + proxy_country: this.proxyCountry, + wait_for_selector: this.waitForSelector, + block_resource: this.blockResource, + }, + }); + $.export("$summary", "Successfully sent the request to ScrapingAnt"); + return response; + }, +}; diff --git a/components/scrapingant/common/constants.mjs b/components/scrapingant/common/constants.mjs new file mode 100644 index 0000000000000..d9e83df79edbe --- /dev/null +++ b/components/scrapingant/common/constants.mjs @@ -0,0 +1,126 @@ +export default { + PROXY_COUNTRIES: [ + { + label: "World", + value: "", + }, + { + label: "Brazil", + value: "BR", + }, + { + label: "Canada", + value: "CA", + }, + { + label: "China", + value: "CN", + }, + { + label: "Czech Republic", + value: "CZ", + }, + { + label: "France", + value: "FR", + }, + { + label: "Germany", + value: "DE", + }, + { + label: "Hong Kong", + value: "HK", + }, + { + label: "India", + value: "IN", + }, + { + label: "Indonesia", + value: "ID", + }, + { + label: "Italy", + value: "IT", + }, + { + label: "Israel", + value: "IL", + }, + { + label: "Japan", + value: "JP", + }, + { + label: "Netherlands", + value: "NL", + }, + { + label: "Poland", + value: "PL", + }, + { + label: "Russia", + value: "RU", + }, + { + label: "Saudi Arabia", + value: "SA", + }, + { + label: "Singapore", + value: "SG", + }, + { + label: "South Korea", + value: "KR", + }, + { + label: "Spain", + value: "ES", + }, + { + label: "United Kingdom", + value: "GB", + }, + { + label: "United Arab Emirates", + value: "AE", + }, + { + label: "USA", + value: "US", + }, + { + label: "Vietnam", + value: "VN", + }, + ], + PROXY_TYPES: [ + { + label: "Residential", + value: "residential", + }, + { + label: "Datacenter", + value: "datacenter", + }, + ], + RESOURCE_TYPES: [ + "document", + "stylesheet", + "image", + "media", + "font", + "script", + "texttrack", + "xhr", + "fetch", + "eventsource", + "websocket", + "manifest", + "other", + ], + +}; diff --git a/components/scrapingant/package.json b/components/scrapingant/package.json index a5db939a0a1aa..b166939b6c56a 100644 --- a/components/scrapingant/package.json +++ b/components/scrapingant/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/scrapingant", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream ScrapingAnt Components", "main": "scrapingant.app.mjs", "keywords": [ @@ -11,5 +11,8 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.0.3" } } \ No newline at end of file diff --git a/components/scrapingant/scrapingant.app.mjs b/components/scrapingant/scrapingant.app.mjs index 5204975b31454..7bfa28e192529 100644 --- a/components/scrapingant/scrapingant.app.mjs +++ b/components/scrapingant/scrapingant.app.mjs @@ -1,11 +1,92 @@ +import { axios } from "@pipedream/platform"; +import constants from "./common/constants.mjs"; + export default { type: "app", app: "scrapingant", - propDefinitions: {}, + propDefinitions: { + url: { + type: "string", + label: "URL", + description: "The URL to scrape", + }, + browser: { + type: "boolean", + label: "Browser", + description: "Enables using a headless browser for scraping", + optional: true, + }, + returnPageSource: { + type: "boolean", + label: "Return Page Source", + description: "Enables returning data returned by the server and unaltered by the browser. When true JS won't be rendered", + optional: true, + }, + cookies: { + type: "string", + label: "Cookies", + description: "Cookies to pass with a scraping request to the target site, i.e.: `cookie_name1=cookie_value1;cookie_name2=cookie_value2`", + optional: true, + }, + jsSnippet: { + type: "string", + label: "JS Snippet", + description: "Base64 encoded JS snippet to run once page being loaded in the ScrapingAnt browser", + optional: true, + }, + proxyType: { + type: "string", + label: "Proxy Type", + description: "Specifies the proxy type to make the request from", + options: constants.PROXY_TYPES, + optional: true, + }, + proxyCountry: { + type: "string", + label: "Proxy Country", + description: "Specifies the proxy country to make the request from", + options: constants.PROXY_COUNTRIES, + optional: true, + }, + waitForSelector: { + type: "string", + label: "Wait for Selector", + description: "The CSS selector of the element Scrapingant will wait for before returning the result", + optional: true, + }, + blockResource: { + type: "string[]", + label: "Block Resource", + description: "Prevents cloud browser from loading specified resource types", + options: constants.RESOURCE_TYPES, + optional: true, + }, + }, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); + _baseUrl() { + return "https://api.scrapingant.com/v2"; + }, + async _makeRequest(opts = {}) { + const { + $ = this, + path, + headers, + ...otherOpts + } = opts; + return axios($, { + ...otherOpts, + url: this._baseUrl() + path, + headers: { + ...headers, + "x-api-key": `${this.$auth.api_token}`, + }, + }); + }, + async generalExtraction(args = {}) { + return this._makeRequest({ + path: "/general", + ...args, + }); }, }, }; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b711ee2c35128..0812c60995e56 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9243,7 +9243,11 @@ importers: specifier: ^3.0.1 version: 3.0.3 - components/scrapingant: {} + components/scrapingant: + dependencies: + '@pipedream/platform': + specifier: ^3.0.3 + version: 3.0.3 components/scrapingbee: {}