Skip to content

Commit

Permalink
feat(route): add Transcript Forest (#14482)
Browse files Browse the repository at this point in the history
* feat(route): add Transcript Forest

* fix: add transcript text
  • Loading branch information
nczitzk authored Feb 17, 2024
1 parent 2c20ad3 commit 8c077f9
Show file tree
Hide file tree
Showing 6 changed files with 511 additions and 0 deletions.
116 changes: 116 additions & 0 deletions lib/v2/transcriptforest/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const { art } = require('@/utils/render');
const path = require('path');

const bakeTimestamp = (seconds) => {
const hours = Math.floor(seconds / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
const remainingSeconds = Math.floor(seconds % 60);

const formattedHours = String(hours).padStart(2, '0');
const formattedMinutes = String(minutes).padStart(2, '0');
const formattedSeconds = String(remainingSeconds).padStart(2, '0');

return `${formattedHours}:${formattedMinutes}:${formattedSeconds}`;
};

module.exports = async (ctx) => {
const { channel } = ctx.params;
const limit = ctx.query.limit ? Number.parseInt(ctx.query.limit, 10) : 10;

const rootUrl = 'https://www.transcriptforest.com';

const { data: firstResponse } = await got(rootUrl);

const data = JSON.parse(firstResponse.match(/({"props".*"scriptLoader":\[]})<\/script>/)?.[1]);

const buildId = data.buildId;
const defaultLocale = data.defaultLocale;
const channels = data.props.pageProps.listChannel;
const selected = channel ? channels.find((c) => c.channel_id === channel || c.channel_name === channel) : undefined;

const apiUrl = new URL(`_next/data/${buildId}/en${selected ? `/channel/${selected.channel_id}` : ''}.json`, rootUrl).href;
const currentUrl = new URL(selected ? `${defaultLocale}/channel/${selected.channel_id}` : '', rootUrl).href;

const { data: response } = await got(apiUrl, {
searchParams: {
channelName: selected ? selected.channel_id : '',
offset: 0,
},
});

let items = response.pageProps.listEpisode.data.slice(0, limit).map((item) => ({
title: item.episode_name,
link: new URL(`${defaultLocale}/${item.channel_id}/${item.episode_id}`, rootUrl).href,
detailUrl: new URL(`_next/data/${buildId}/${defaultLocale}/${item.channel_id}/${item.episode_id}.json`, rootUrl).href,
description: art(path.join(__dirname, 'templates/description.art'), {
texts: item.episode_description.split(/\n\n/).map((text) => ({
text,
})),
}),
author: item.channel_name,
guid: item.id,
pubDate: parseDate(item.published_at),
updated: parseDate(item.updated_at),
itunes_item_image: item.episode_cover.split(/\?/)[0],
itunes_duration: item.episode_duration,
enclosure_url: item.source_media,
enclosure_type: 'audio/mpeg',
}));

items = await Promise.all(
items.map((item) =>
ctx.cache.tryGet(item.link, async () => {
const { data: detailResponse } = await got(item.detailUrl);
const { data: textResponse } = await got(detailResponse.pageProps.currentEpisode.ps4_url);

item.description =
art(path.join(__dirname, 'templates/description.art'), {
audios: [
{
src: detailResponse.pageProps.currentEpisode.media,
type: 'audio/mpeg',
},
],
}) +
item.description +
art(path.join(__dirname, 'templates/description.art'), {
texts: textResponse.map((t) => ({
startTime: bakeTimestamp(t.startTime),
endTime: bakeTimestamp(t.endTime),
text: t.readOnlyText,
})),
});

delete item.detailUrl;

return item;
})
)
);

const { data: currentResponse } = await got(currentUrl);

const $ = cheerio.load(currentResponse);

const title = $('title').text();
const image = $('meta[property="og:image"]').prop('content');
const icon = new URL($('link[rel="apple-touch-icon"]').prop('href'), rootUrl).href;
const author = title.split(/\|/)[0].trim();

ctx.state.data = {
item: items,
title,
link: currentUrl,
description: $('meta[name="description"]').prop('content'),
language: $('html').prop('lang'),
image,
icon,
logo: icon,
author,
itunes_author: author,
allowEmpty: true,
};
};
3 changes: 3 additions & 0 deletions lib/v2/transcriptforest/maintainer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module.exports = {
'/:channel?': ['nczitzk'],
};
Loading

0 comments on commit 8c077f9

Please sign in to comment.