From 782b485470aff2179d859ce4a6ee6d502a4ac8f2 Mon Sep 17 00:00:00 2001 From: orzchen <83120673@qq.com> Date: Wed, 19 Feb 2025 13:05:07 +0800 Subject: [PATCH] add route `cspengyuan` --- lib/routes/cspengyuan/index.ts | 258 ++++++++++++++++++ lib/routes/cspengyuan/namespace.ts | 6 + .../cspengyuan/templates/description.art | 12 + 3 files changed, 276 insertions(+) create mode 100644 lib/routes/cspengyuan/index.ts create mode 100644 lib/routes/cspengyuan/namespace.ts create mode 100644 lib/routes/cspengyuan/templates/description.art diff --git a/lib/routes/cspengyuan/index.ts b/lib/routes/cspengyuan/index.ts new file mode 100644 index 00000000000000..a0af978407fe42 --- /dev/null +++ b/lib/routes/cspengyuan/index.ts @@ -0,0 +1,258 @@ +import { Route } from '@/types'; +import { load } from 'cheerio'; +import { parseDate } from '@/utils/parse-date'; +import timezone from '@/utils/timezone'; +import cache from '@/utils/cache'; +import logger from '@/utils/logger'; +import puppeteer from '@/utils/puppeteer'; +import { art } from '@/utils/render'; +import path from 'node:path'; +import { getCurrentPath } from '@/utils/helpers'; + +const __dirname = getCurrentPath(import.meta.url); + +export const route: Route = { + path: '/credit-research/:category/:type?', + name: '信用研究', + url: 'www.cspengyuan.com/pengyuancmscn/', + maintainers: ['orzchen'], + example: '/cspengyuan/credit-research/macro', + parameters: { + category: '(必须)匹配一级分类,例如 macro、bond-market、industry 等。', + type: '(可选)匹配报告类型或细节类型,例如 new、weekly、monthly、subject 等。', + }, + description: `::: TIP +**base route**: \`/cspengyuan/\` + +默认情况下只获取第一页的最新数据。 + +过滤了 文章/PDF 链接为空的文章。 + +| 宏观研究 | 结构融资研究 | 评级研究 | 国际研究 | +| :-------------------: | :--------------------------------: | :--------------------: | :------------------: | +| credit-research/macro | credit-research/structured-finance | credit-research/rating | credit-research/intl | + +| **债市研究** | 专题研究 | 热点分析 | 债市周报 | 债市月报 | 债市年报 | +| :----------: | :------------------------------: | :-------------------------------------: | :--------------------------------: | :---------------------------------: | :--------------------------------: | +| × | credit-research/industry/comment | credit-research/bond-market/hot-comment | credit-research/bond-market/weekly | credit-research/bond-market/monthly | credit-research/bond-market/annual | + +| **行业研究** | 行业点评 | 行业信用展望 | 行业专题 | +| :------: | :------------------------------: | :------------------------------: | :------------------------------: | +| × | credit-research/industry/comment | credit-research/industry/outlook | credit-research/industry/subject | + +| **出版物** | 期刊 | 专著 | +| :----: | :------------------------------------: | :-----------------------------------: | +| × | credit-research/publication/periodical | credit-research/publication/monograph | + :::`, + categories: ['finance'], + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + title: '宏观研究', + source: ['www.cspengyuan.com/pengyuancmscn/credit-research/macro-research.html'], + target: '/credit-research/macro', + }, + { + title: '债市周报', + source: ['www.cspengyuan.com/pengyuancmscn/credit-research/bond-market-research/weekly.html'], + target: '/credit-research/bond-market/weekly', + }, + { + title: '债市月报', + source: ['www.cspengyuan.com/pengyuancmscn/credit-research/bond-market-research/monthly.html'], + target: '/credit-research/bond-market/monthly', + }, + { + title: '债市年报', + source: ['www.cspengyuan.com/pengyuancmscn/credit-research/bond-market-research/annual.html'], + target: '/credit-research/bond-market/annual', + }, + { + title: '热点', + source: ['www.cspengyuan.com/pengyuancmscn/credit-research/bond-market-research/hot-comment.html'], + target: '/credit-research/bond-market/hot-comment', + }, + { + title: '专题研究', + source: ['www.cspengyuan.com/pengyuancmscn/credit-research/bond-market-research/subject-research.html'], + target: '/credit-research/bond-market/subject-research', + }, + { + title: '行业研究', + source: ['www.cspengyuan.com/pengyuancmscn/credit-research/industry-research/comment.html'], + target: '/credit-research/industry/comment', + }, + { + title: '行业信用展望', + source: ['www.cspengyuan.com/pengyuancmscn/credit-research/industry-research/outlook.html'], + target: '/credit-research/industry/outlook', + }, + { + title: '行业专题', + source: ['www.cspengyuan.com/pengyuancmscn/credit-research/industry-research/subject.html'], + target: '/credit-research/industry/subject', + }, + ], + handler, +}; + +async function handler(ctx) { + const { category, type } = ctx.req.param(); + + const rootUrl = 'https://www.cspengyuan.com/pengyuancmscn/credit-research/'; + const linkUrl: string = + type === undefined + ? (category === 'macro' + ? `${rootUrl}${category}-research.html` + : `${rootUrl}${category}.html`) + : (category === 'publication' + ? `${rootUrl}${category}/${type}.html` + : `${rootUrl}${category}-research/${type}.html`); + + const response = await browser(linkUrl); + + const $ = load(response); + + const subTitle = $('h3.py-common-subtitle').text().trim(); + + let itemsInfo = $('div.py-main'); + if (category === 'publication' && type) { + if (type === 'periodical') { + itemsInfo = itemsInfo.find('ul.py-list li div.py-periodical-box'); + } else if (type === 'monograph') { + itemsInfo = itemsInfo.find('div.py-mrh-list > div.py-mrh-item'); + } + } else { + itemsInfo = itemsInfo.find('ul.py-list li'); + } + + const list = itemsInfo.toArray().map((item) => getResearchItem(item, $, category, type)); + + const items = await Promise.all( + (list as any[]) + .filter((l) => l.link !== null) + .map((item) => + cache.tryGet(item.link, async () => { + if (category === 'publication') { + const response = await browser(item.link); + const content = load(response); + const p = content('div.mrh-dtl-right-top > p'); + const b = content('div.mrh-dtl-right-bom'); + const imgUrl = content('img').attr('src'); + const segment1 = content(p[0]).text().trim(); + const segment2 = content(p[1]).text().trim(); + const part = { segment1, segment2 }; + if (type === 'monograph') { + const segment3 = b.find('h4 > b').text().trim(); + const segment4 = b.find('p').text().trim(); + Object.assign(part, { segment3, segment4 }); + } + item.description = art(path.join(__dirname, 'templates/description.art'), { + part, + item, + imgUrl, + type, + }); + } else { + item.description = ` + pdf原链接: Download
+ pdf在线预览: 预览
+ `; + } + return item; + }) + ) + ); + + return { + title: `中证鹏元-信用研究-${subTitle}`, + link: linkUrl, + item: items, + }; +} + +const browser = async (link: string) => { + const browser = await puppeteer(); + const page = await browser.newPage(); + await page.setRequestInterception(true); + page.on('request', (request) => { + request.resourceType() === 'document' ? request.continue() : request.abort(); + }); + logger.http(`Requesting ${link}`); + await page.goto(link, { + waitUntil: 'domcontentloaded', + }); + const response = await page.content(); + await page.close(); + await browser.close(); + return response; +}; + +const isFullURL = (str: string) => { + const regex = /^(https?:\/\/)?([a-zA-Z0-9-]+\.[a-zA-Z0-9-]+)(\/\S*)?(\?\S*)?$/; + return regex.test(str); +}; + +const isPath = (str: string) => { + const regex = /^\/([a-zA-Z0-9\-/.]+(\?[a-zA-Z0-9\-/&=.]+)?)?$/; + return regex.test(str); +}; + +const isValidURL = (str: string) => isFullURL(str) || isPath(str); + +function getResearchItem(item, $, category, type) { + item = $(item); + const viewUrl = 'https://www.cspengyuan.com/static/clientlibs/pengyuancmscn/plugins/web/viewer.html?file=/content'; + const a = item.find('a').first(); + const pdfUrl = item.find('a.py-list-btn-download').attr('href'); + const pdfName = item.find('a.py-list-btn-download').attr('download'); + const pdfViewUrl = `${viewUrl}${pdfUrl}`; + + let title: any; + if (category === 'publication') { + if (type === 'periodical') { + title = item.find('div.py-periodical-title').attr('title').trim(); + } else if (type === 'monograph') { + title = item.find('span.mrh-item-right-title > b').text().trim(); + } else { + title = a.text().trim(); + } + } else { + title = a.text().trim(); + } + + const link = isValidURL(a.attr('href')) ? `https://www.cspengyuan.com${a.attr('href')}` : null; + + let pubDate: any; + if (category === 'publication') { + if (type === 'periodical') { + pubDate = a.attr('href').split('/').pop().split('.')[0].slice(0, 8); + } else if (type === 'monograph') { + pubDate = $(item.find('span.mrh-item-right > span')[2]) + .text() + .match(/\d{4}-\d{2}-\d{2}/)?.[0]; + } else { + pubDate = item.find('span.py-finance-date').text().trim(); + } + } else { + pubDate = item.find('span.py-finance-date').text().trim(); + } + pubDate = timezone(parseDate(pubDate, ['YYYYMMDD', 'YYYY-MM-DD']), +8); + + return { + title, + link, + pubDate, + category, + pdfUrl, + pdfName, + pdfViewUrl, + }; +} diff --git a/lib/routes/cspengyuan/namespace.ts b/lib/routes/cspengyuan/namespace.ts new file mode 100644 index 00000000000000..3e3f033373b5af --- /dev/null +++ b/lib/routes/cspengyuan/namespace.ts @@ -0,0 +1,6 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: '中证鹏元', + url: 'www.cspengyuan.com', +}; diff --git a/lib/routes/cspengyuan/templates/description.art b/lib/routes/cspengyuan/templates/description.art new file mode 100644 index 00000000000000..972f0844a58590 --- /dev/null +++ b/lib/routes/cspengyuan/templates/description.art @@ -0,0 +1,12 @@ +

{{ part.segment1 }}

+

{{ part.segment2 }}

+ {{if type == 'periodical'}} + 整刊下载
+ {{/if}} + {{if type == 'monograph' }} +

{{ part.segment3 }}

+

{{ part.segment4 }}


+ {{/if}} +