Skip to content

Commit ad7e3f6

Browse files
committed
feat(extractSmartScrape): resolve refs in provided schema
1 parent 5ee2434 commit ad7e3f6

File tree

1 file changed

+36
-0
lines changed

1 file changed

+36
-0
lines changed

apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,33 @@ export function prepareSmartScrapeSchema(
181181
return { schemaToUse: wrappedSchema };
182182
}
183183

184+
// Resolve all $defs references in the schema
185+
const resolveRefs = (obj: any, defs: any): any => {
186+
if (!obj || typeof obj !== 'object') return obj;
187+
188+
if (obj.$ref && typeof obj.$ref === 'string') {
189+
// Handle $ref references
190+
const refPath = obj.$ref.split('/');
191+
if (refPath[0] === '#' && refPath[1] === '$defs') {
192+
const defName = refPath[refPath.length - 1];
193+
return resolveRefs({ ...defs[defName] }, defs);
194+
}
195+
}
196+
197+
// Handle arrays
198+
if (Array.isArray(obj)) {
199+
return obj.map(item => resolveRefs(item, defs));
200+
}
201+
202+
// Handle objects
203+
const resolved: any = {};
204+
for (const [key, value] of Object.entries(obj)) {
205+
if (key === '$defs') continue;
206+
resolved[key] = resolveRefs(value, defs);
207+
}
208+
return resolved;
209+
};
210+
184211
export async function extractData({
185212
extractOptions,
186213
urls,
@@ -221,6 +248,15 @@ export async function extractData({
221248
schema = genRes.extract;
222249
}
223250

251+
if (schema) {
252+
const defs = schema.$defs || {};
253+
schema = resolveRefs(schema, defs);
254+
delete schema.$defs;
255+
logger.info("Resolved schema refs", {
256+
schema,
257+
});
258+
}
259+
224260
const { schemaToUse } = prepareSmartScrapeSchema(schema, logger, isSingleUrl);
225261
const extractOptionsNewSchema = {
226262
...extractOptions,

0 commit comments

Comments
 (0)