From 61d7ba76f76ce74e0d230f89a93436f29dc8d9df Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 26 Jan 2025 21:06:37 -0300 Subject: [PATCH] Revert "Nick: extract api reference" This reverts commit 522c5b35da7d5cd997aa5ebe2002a38ede7ace93. --- apps/api/src/controllers/v1/extract-status.ts | 2 - apps/api/src/controllers/v1/types.ts | 4 - apps/api/src/lib/extract/extract-redis.ts | 3 - .../api/src/lib/extract/extraction-service.ts | 78 ------------------- apps/api/src/services/rate-limiter.ts | 1 - 5 files changed, 88 deletions(-) diff --git a/apps/api/src/controllers/v1/extract-status.ts b/apps/api/src/controllers/v1/extract-status.ts index da3c08f137..3e166e5fd3 100644 --- a/apps/api/src/controllers/v1/extract-status.ts +++ b/apps/api/src/controllers/v1/extract-status.ts @@ -30,7 +30,6 @@ export async function extractStatusController( data = jobData[0].docs; } - console.log(extract.sources); return res.status(200).json({ success: extract.status === "failed" ? false : true, data: data, @@ -39,6 +38,5 @@ export async function extractStatusController( expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(), steps: extract.showSteps ? extract.steps : undefined, llmUsage: extract.showLLMUsage ? extract.llmUsage : undefined, - sources: extract.sources, }); } diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index e86f209eab..13b141168c 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -537,7 +537,6 @@ export interface URLTrace { }; relevanceScore?: number; usedInCompletion?: boolean; - extractedFields?: string[]; } export interface ExtractResponse { @@ -548,9 +547,6 @@ export interface ExtractResponse { id?: string; warning?: string; urlTrace?: URLTrace[]; - sources?: { - [key: string]: string[]; - }; } export interface ExtractResponseRequestTest { diff --git a/apps/api/src/lib/extract/extract-redis.ts b/apps/api/src/lib/extract/extract-redis.ts index 39cf23640d..a6a3d6d3d5 100644 --- a/apps/api/src/lib/extract/extract-redis.ts +++ b/apps/api/src/lib/extract/extract-redis.ts @@ -32,9 +32,6 @@ export type StoredExtract = { steps?: ExtractedStep[]; showLLMUsage?: boolean; llmUsage?: number; - sources?: { - [key: string]: string[]; - }; }; // Reduce TTL to 6 hours instead of 24 diff --git a/apps/api/src/lib/extract/extraction-service.ts b/apps/api/src/lib/extract/extraction-service.ts index 577daaa540..0ac3024581 100644 --- a/apps/api/src/lib/extract/extraction-service.ts +++ b/apps/api/src/lib/extract/extraction-service.ts @@ -56,9 +56,6 @@ interface ExtractResult { tokenUsageBreakdown?: TokenUsage[]; llmUsage?: number; totalUrlsScraped?: number; - sources?: { - [key: string]: string[]; - }; } async function analyzeSchemaAndPrompt( @@ -182,45 +179,6 @@ function getRootDomain(url: string): string { } } -// Add helper function to track sources -function trackFieldSources(data: any, url: string, parentPath: string = ''): string[] { - const extractedFields: string[] = []; - - if (data && typeof data === 'object') { - Object.entries(data).forEach(([key, value]) => { - const currentPath = parentPath ? `${parentPath}.${key}` : key; - - if (value !== null && value !== undefined) { - extractedFields.push(currentPath); - - if (typeof value === 'object') { - extractedFields.push(...trackFieldSources(value, url, currentPath)); - } - } - }); - } - - return extractedFields; -} - -// Add helper to merge sources from multiple extractions -function mergeSources(sources: { [key: string]: string[] }[]): { [key: string]: string[] } { - const mergedSources: { [key: string]: string[] } = {}; - - sources.forEach(sourceMap => { - Object.entries(sourceMap).forEach(([field, urls]) => { - if (!mergedSources[field]) { - mergedSources[field] = []; - } - mergedSources[field].push(...urls); - // Deduplicate URLs - mergedSources[field] = [...new Set(mergedSources[field])]; - }); - }); - - return mergedSources; -} - export async function performExtraction( extractId: string, options: ExtractServiceOptions, @@ -233,7 +191,6 @@ export async function performExtraction( let multiEntityResult: any = {}; let singleAnswerResult: any = {}; let totalUrlsScraped = 0; - let extractionSources: { [key: string]: string[] } = {}; const logger = _logger.child({ module: "extract", @@ -594,24 +551,6 @@ export async function performExtraction( // return null; // } - if (multiEntityCompletion?.extract) { - const extractedFields = trackFieldSources(multiEntityCompletion.extract, doc.metadata.url || doc.metadata.sourceURL!); - - // Update URL trace with extracted fields - const trace = urlTraces.find(t => t.url === (doc.metadata.url || doc.metadata.sourceURL!)); - if (trace) { - trace.extractedFields = extractedFields; - } - - // Track sources for each field - extractedFields.forEach(field => { - if (!extractionSources[field]) { - extractionSources[field] = []; - } - extractionSources[field].push(doc.metadata.url || doc.metadata.sourceURL!); - }); - } - return multiEntityCompletion.extract; } catch (error) { logger.error(`Failed to process document.`, { error, url: doc.metadata.url ?? doc.metadata.sourceURL! }); @@ -788,21 +727,6 @@ export async function performExtraction( // } // }); // } - - if (singleAnswerCompletions?.extract) { - const singleAnswerSources: { [key: string]: string[] } = {}; - const usedUrls = Array.from(docsMap.values()) - .map(doc => doc.metadata.url || doc.metadata.sourceURL!) - .filter(Boolean); - - const extractedFields = trackFieldSources(singleAnswerCompletions.extract, ''); - extractedFields.forEach(field => { - singleAnswerSources[field] = usedUrls; - }); - - // Merge with multi-entity sources - extractionSources = mergeSources([extractionSources, singleAnswerSources]); - } } let finalResult = reqSchema @@ -893,7 +817,6 @@ export async function performExtraction( updateExtract(extractId, { status: "completed", llmUsage, - sources: extractionSources }).catch((error) => { logger.error( `Failed to update extract ${extractId} status to completed: ${error}`, @@ -911,6 +834,5 @@ export async function performExtraction( urlTrace: request.urlTrace ? urlTraces : undefined, llmUsage, totalUrlsScraped, - sources: extractionSources }; } diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index f54019e1b7..68cea8faa8 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -227,7 +227,6 @@ export function getRateLimiterPoints( const points: number = rateLimitConfig[makePlanKey(plan)] || rateLimitConfig.default; // 5 - return points; }