Skip to content

Commit cd53432

Browse files
committed
fix crawl option conversion
1 parent 2a96717 commit cd53432

File tree

4 files changed

+24
-9
lines changed

4 files changed

+24
-9
lines changed

apps/api/src/controllers/v0/crawl.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import { getScrapeQueue } from "../../../src/services/queue-service";
1515
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
1616
import * as Sentry from "@sentry/node";
1717
import { getJobPriority } from "../../lib/job-priority";
18-
import { fromLegacyCrawlerOptions, fromLegacyScrapeOptions, url as urlSchema } from "../v1/types";
18+
import { fromLegacyScrapeOptions, url as urlSchema } from "../v1/types";
1919
import { ZodError } from "zod";
2020

2121
export async function crawlController(req: Request, res: Response) {
@@ -140,7 +140,7 @@ export async function crawlController(req: Request, res: Response) {
140140

141141
const sc: StoredCrawl = {
142142
originUrl: url,
143-
crawlerOptions: fromLegacyCrawlerOptions(crawlerOptions),
143+
crawlerOptions,
144144
scrapeOptions,
145145
internalOptions,
146146
team_id,
@@ -177,7 +177,7 @@ export async function crawlController(req: Request, res: Response) {
177177
data: {
178178
url,
179179
mode: "single_urls",
180-
crawlerOptions: crawlerOptions,
180+
crawlerOptions,
181181
team_id,
182182
plan,
183183
pageOptions: pageOptions,

apps/api/src/controllers/v0/crawlPreview.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "..
88
import { addScrapeJob } from "../../../src/services/queue-jobs";
99
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
1010
import * as Sentry from "@sentry/node";
11-
import { fromLegacyCrawlerOptions, fromLegacyScrapeOptions } from "../v1/types";
11+
import { fromLegacyScrapeOptions } from "../v1/types";
1212

1313
export async function crawlPreviewController(req: Request, res: Response) {
1414
try {
@@ -91,7 +91,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
9191

9292
const sc: StoredCrawl = {
9393
originUrl: url,
94-
crawlerOptions: fromLegacyCrawlerOptions(crawlerOptions),
94+
crawlerOptions,
9595
scrapeOptions,
9696
internalOptions,
9797
team_id,

apps/api/src/controllers/v1/crawl.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
crawlRequestSchema,
66
CrawlResponse,
77
RequestWithAuth,
8+
toLegacyCrawlerOptions,
89
} from "./types";
910
import {
1011
addCrawlJob,
@@ -70,7 +71,7 @@ export async function crawlController(
7071

7172
const sc: StoredCrawl = {
7273
originUrl: req.body.url,
73-
crawlerOptions,
74+
crawlerOptions: toLegacyCrawlerOptions(crawlerOptions),
7475
scrapeOptions,
7576
internalOptions: {},
7677
team_id: req.auth.team_id,

apps/api/src/controllers/v1/types.ts

+17-3
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,20 @@ export interface ResponseWithSentry<
440440
sentry?: string,
441441
}
442442

443+
export function toLegacyCrawlerOptions(x: CrawlerOptions) {
444+
return {
445+
includes: x.includePaths,
446+
excludes: x.excludePaths,
447+
maxCrawledLinks: x.limit,
448+
maxDepth: x.maxDepth,
449+
limit: x.limit,
450+
generateImgAltText: false,
451+
allowBackwardCrawling: x.allowBackwardLinks,
452+
allowExternalContentLinks: x.allowExternalLinks,
453+
ignoreSitemap: x.ignoreSitemap,
454+
};
455+
}
456+
443457
export function fromLegacyCrawlerOptions(x: any): { crawlOptions: CrawlerOptions; internalOptions: InternalOptions } {
444458
return {
445459
crawlOptions: crawlerOptions.parse({
@@ -493,10 +507,10 @@ export function fromLegacyScrapeOptions(pageOptions: PageOptions, extractorOptio
493507
}
494508
}
495509

496-
export function fromLegacyCombo(pageOptions: PageOptions, extractorOptions: ExtractorOptions | undefined, timeout: number | undefined, crawlerOptions: any): { scrapeOptions: ScrapeOptions, crawlOptions: CrawlerOptions, internalOptions: InternalOptions} {
510+
export function fromLegacyCombo(pageOptions: PageOptions, extractorOptions: ExtractorOptions | undefined, timeout: number | undefined, crawlerOptions: any): { scrapeOptions: ScrapeOptions, internalOptions: InternalOptions} {
497511
const { scrapeOptions, internalOptions: i1 } = fromLegacyScrapeOptions(pageOptions, extractorOptions, timeout);
498-
const { crawlOptions, internalOptions: i2 } = fromLegacyCrawlerOptions(crawlerOptions);
499-
return { scrapeOptions, crawlOptions, internalOptions: Object.assign(i1, i2) };
512+
const { internalOptions: i2 } = fromLegacyCrawlerOptions(crawlerOptions);
513+
return { scrapeOptions, internalOptions: Object.assign(i1, i2) };
500514
}
501515

502516
export function toLegacyDocument(document: Document, internalOptions: InternalOptions): V0Document | { url: string; } {

0 commit comments

Comments
 (0)