Skip to content

Commit

Permalink
fix(scrapeURL/fire-engine): default to separate US-generic proxy list…
Browse files Browse the repository at this point in the history
… if no location is specified (FIR-728) (#1104)

* feat(location/country): default to us-generic

* add tests + fix mock
  • Loading branch information
mogery authored Jan 29, 2025
1 parent 5c1b675 commit 5733b82
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 12 deletions.
12 changes: 6 additions & 6 deletions apps/api/src/__tests__/snips/mocks/mocking-works-properly.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{
"time": 1735911273239,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape",
"url": "<fire-engine>/scrape",
"method": "POST",
"body": {
"url": "http://firecrawl.dev",
Expand All @@ -27,7 +27,7 @@
{
"time": 1735911273354,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
Expand All @@ -43,7 +43,7 @@
{
"time": 1735911273720,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
Expand All @@ -59,7 +59,7 @@
{
"time": 1735911274092,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
Expand All @@ -75,7 +75,7 @@
{
"time": 1735911274467,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
Expand All @@ -91,7 +91,7 @@
{
"time": 1735911274947,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
Expand Down
20 changes: 20 additions & 0 deletions apps/api/src/__tests__/snips/scrape.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,24 @@ describe("Scrape tests", () => {
"this is fake data coming from the mocking system!",
);
});

describe("Location API", () => {
it.concurrent("works without specifying an explicit location", async () => {
const response = await scrape({
url: "https://iplocation.com",
});

expectScrapeToSucceed(response);
});

it.concurrent("works with country US", async () => {
const response = await scrape({
url: "https://iplocation.com",
location: { country: "US" },
});

expectScrapeToSucceed(response);
expect(response.body.data.markdown).toContain("| Country | United States |");
});
})
});
6 changes: 3 additions & 3 deletions apps/api/src/controllers/v1/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,13 @@ export const scrapeOptions = z
.string()
.optional()
.refine(
(val) => !val || Object.keys(countries).includes(val.toUpperCase()),
(val) => !val || Object.keys(countries).includes(val.toUpperCase()) || val === "US-generic",
{
message:
"Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
},
)
.transform((val) => (val ? val.toUpperCase() : "US")),
.transform((val) => (val ? val.toUpperCase() : "US-generic")),
languages: z.string().array().optional(),
})
.optional(),
Expand All @@ -178,7 +178,7 @@ export const scrapeOptions = z
"Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
},
)
.transform((val) => (val ? val.toUpperCase() : "US")),
.transform((val) => (val ? val.toUpperCase() : "US-generic")),
languages: z.string().array().optional(),
})
.optional(),
Expand Down
8 changes: 6 additions & 2 deletions apps/api/src/scraper/scrapeURL/lib/fetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,14 @@ export async function robustFetch<
const makeRequestTypeId = (
request: (typeof mock)["requests"][number]["options"],
) => {
let out = request.url + ";" + request.method;
let trueUrl = (process.env.FIRE_ENGINE_BETA_URL && request.url.startsWith(process.env.FIRE_ENGINE_BETA_URL))
? request.url.replace(process.env.FIRE_ENGINE_BETA_URL, "<fire-engine>")
: request.url;

let out = trueUrl + ";" + request.method;
if (
process.env.FIRE_ENGINE_BETA_URL &&
url.startsWith(process.env.FIRE_ENGINE_BETA_URL) &&
(trueUrl.startsWith("<fire-engine>")) &&
request.method === "POST"
) {
out += "f-e;" + request.body?.engine + ";" + request.body?.url;
Expand Down
2 changes: 1 addition & 1 deletion apps/api/src/scraper/scrapeURL/lib/mock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import * as path from "path";
import { logger as _logger } from "../../../lib/logger";
import { Logger } from "winston";
const saveMocksDirPath = path.join(__dirname, "../mocks/").replace("dist/", "");
const loadMocksDirPath = path.join(__dirname, "../../../__tests__/snips/mocks");
const loadMocksDirPath = path.join(__dirname, "../../../__tests__/snips/mocks").replace("dist/", "");

export async function saveMock(options: unknown, result: unknown) {
if (process.env.FIRECRAWL_SAVE_MOCKS !== "true") return;
Expand Down

0 comments on commit 5733b82

Please sign in to comment.