Skip to content

Commit

Permalink
add route cspengyuan
Browse files Browse the repository at this point in the history
  • Loading branch information
orzchen committed Feb 19, 2025
1 parent f97f8c3 commit 782b485
Show file tree
Hide file tree
Showing 3 changed files with 276 additions and 0 deletions.
258 changes: 258 additions & 0 deletions lib/routes/cspengyuan/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
import { Route } from '@/types';
import { load } from 'cheerio';
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';
import cache from '@/utils/cache';
import logger from '@/utils/logger';
import puppeteer from '@/utils/puppeteer';
import { art } from '@/utils/render';
import path from 'node:path';
import { getCurrentPath } from '@/utils/helpers';

const __dirname = getCurrentPath(import.meta.url);

export const route: Route = {
path: '/credit-research/:category/:type?',
name: '信用研究',
url: 'www.cspengyuan.com/pengyuancmscn/',
maintainers: ['orzchen'],
example: '/cspengyuan/credit-research/macro',
parameters: {
category: '(必须)匹配一级分类,例如 macro、bond-market、industry 等。',
type: '(可选)匹配报告类型或细节类型,例如 new、weekly、monthly、subject 等。',
},
description: `::: TIP
**base route**: \`/cspengyuan/\`
默认情况下只获取第一页的最新数据。
过滤了 文章/PDF 链接为空的文章。
| 宏观研究 | 结构融资研究 | 评级研究 | 国际研究 |
| :-------------------: | :--------------------------------: | :--------------------: | :------------------: |
| credit-research/macro | credit-research/structured-finance | credit-research/rating | credit-research/intl |
| **债市研究** | 专题研究 | 热点分析 | 债市周报 | 债市月报 | 债市年报 |
| :----------: | :------------------------------: | :-------------------------------------: | :--------------------------------: | :---------------------------------: | :--------------------------------: |
| × | credit-research/industry/comment | credit-research/bond-market/hot-comment | credit-research/bond-market/weekly | credit-research/bond-market/monthly | credit-research/bond-market/annual |
| **行业研究** | 行业点评 | 行业信用展望 | 行业专题 |
| :------: | :------------------------------: | :------------------------------: | :------------------------------: |
| × | credit-research/industry/comment | credit-research/industry/outlook | credit-research/industry/subject |
| **出版物** | 期刊 | 专著 |
| :----: | :------------------------------------: | :-----------------------------------: |
| × | credit-research/publication/periodical | credit-research/publication/monograph |
:::`,
categories: ['finance'],
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
title: '宏观研究',
source: ['www.cspengyuan.com/pengyuancmscn/credit-research/macro-research.html'],
target: '/credit-research/macro',
},
{
title: '债市周报',
source: ['www.cspengyuan.com/pengyuancmscn/credit-research/bond-market-research/weekly.html'],
target: '/credit-research/bond-market/weekly',
},
{
title: '债市月报',
source: ['www.cspengyuan.com/pengyuancmscn/credit-research/bond-market-research/monthly.html'],
target: '/credit-research/bond-market/monthly',
},
{
title: '债市年报',
source: ['www.cspengyuan.com/pengyuancmscn/credit-research/bond-market-research/annual.html'],
target: '/credit-research/bond-market/annual',
},
{
title: '热点',
source: ['www.cspengyuan.com/pengyuancmscn/credit-research/bond-market-research/hot-comment.html'],
target: '/credit-research/bond-market/hot-comment',
},
{
title: '专题研究',
source: ['www.cspengyuan.com/pengyuancmscn/credit-research/bond-market-research/subject-research.html'],
target: '/credit-research/bond-market/subject-research',
},
{
title: '行业研究',
source: ['www.cspengyuan.com/pengyuancmscn/credit-research/industry-research/comment.html'],
target: '/credit-research/industry/comment',
},
{
title: '行业信用展望',
source: ['www.cspengyuan.com/pengyuancmscn/credit-research/industry-research/outlook.html'],
target: '/credit-research/industry/outlook',
},
{
title: '行业专题',
source: ['www.cspengyuan.com/pengyuancmscn/credit-research/industry-research/subject.html'],
target: '/credit-research/industry/subject',
},
],
handler,
};

async function handler(ctx) {
const { category, type } = ctx.req.param();

const rootUrl = 'https://www.cspengyuan.com/pengyuancmscn/credit-research/';
const linkUrl: string =
type === undefined
? (category === 'macro'
? `${rootUrl}${category}-research.html`
: `${rootUrl}${category}.html`)
: (category === 'publication'
? `${rootUrl}${category}/${type}.html`
: `${rootUrl}${category}-research/${type}.html`);

const response = await browser(linkUrl);

const $ = load(response);

const subTitle = $('h3.py-common-subtitle').text().trim();

let itemsInfo = $('div.py-main');
if (category === 'publication' && type) {
if (type === 'periodical') {
itemsInfo = itemsInfo.find('ul.py-list li div.py-periodical-box');
} else if (type === 'monograph') {
itemsInfo = itemsInfo.find('div.py-mrh-list > div.py-mrh-item');
}
} else {
itemsInfo = itemsInfo.find('ul.py-list li');
}

const list = itemsInfo.toArray().map((item) => getResearchItem(item, $, category, type));

const items = await Promise.all(
(list as any[])
.filter((l) => l.link !== null)
.map((item) =>
cache.tryGet(item.link, async () => {
if (category === 'publication') {
const response = await browser(item.link);
const content = load(response);
const p = content('div.mrh-dtl-right-top > p');
const b = content('div.mrh-dtl-right-bom');
const imgUrl = content('img').attr('src');
const segment1 = content(p[0]).text().trim();
const segment2 = content(p[1]).text().trim();
const part = { segment1, segment2 };
if (type === 'monograph') {
const segment3 = b.find('h4 > b').text().trim();
const segment4 = b.find('p').text().trim();
Object.assign(part, { segment3, segment4 });
}
item.description = art(path.join(__dirname, 'templates/description.art'), {
part,
item,
imgUrl,
type,
});
} else {
item.description = `
pdf原链接: <a download="${item.pdfName}" href="https://www.cspengyuan.com${item.pdfUrl}">Download</a><br>
pdf在线预览: <a href="${item.pdfViewUrl}">预览</a><br>
`;
}
return item;
})
)
);

return {
title: `中证鹏元-信用研究-${subTitle}`,
link: linkUrl,
item: items,
};
}

const browser = async (link: string) => {
const browser = await puppeteer();
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' ? request.continue() : request.abort();
});
logger.http(`Requesting ${link}`);
await page.goto(link, {
waitUntil: 'domcontentloaded',
});
const response = await page.content();
await page.close();
await browser.close();
return response;
};

const isFullURL = (str: string) => {
const regex = /^(https?:\/\/)?([a-zA-Z0-9-]+\.[a-zA-Z0-9-]+)(\/\S*)?(\?\S*)?$/;
return regex.test(str);
};

const isPath = (str: string) => {
const regex = /^\/([a-zA-Z0-9\-/.]+(\?[a-zA-Z0-9\-/&=.]+)?)?$/;
return regex.test(str);
};

const isValidURL = (str: string) => isFullURL(str) || isPath(str);

function getResearchItem(item, $, category, type) {
item = $(item);
const viewUrl = 'https://www.cspengyuan.com/static/clientlibs/pengyuancmscn/plugins/web/viewer.html?file=/content';
const a = item.find('a').first();
const pdfUrl = item.find('a.py-list-btn-download').attr('href');
const pdfName = item.find('a.py-list-btn-download').attr('download');
const pdfViewUrl = `${viewUrl}${pdfUrl}`;

let title: any;
if (category === 'publication') {
if (type === 'periodical') {
title = item.find('div.py-periodical-title').attr('title').trim();
} else if (type === 'monograph') {
title = item.find('span.mrh-item-right-title > b').text().trim();
} else {
title = a.text().trim();
}
} else {
title = a.text().trim();
}

const link = isValidURL(a.attr('href')) ? `https://www.cspengyuan.com${a.attr('href')}` : null;

let pubDate: any;
if (category === 'publication') {
if (type === 'periodical') {
pubDate = a.attr('href').split('/').pop().split('.')[0].slice(0, 8);
} else if (type === 'monograph') {
pubDate = $(item.find('span.mrh-item-right > span')[2])
.text()
.match(/\d{4}-\d{2}-\d{2}/)?.[0];
} else {
pubDate = item.find('span.py-finance-date').text().trim();
}
} else {
pubDate = item.find('span.py-finance-date').text().trim();
}
pubDate = timezone(parseDate(pubDate, ['YYYYMMDD', 'YYYY-MM-DD']), +8);

return {
title,
link,
pubDate,
category,
pdfUrl,
pdfName,
pdfViewUrl,
};
}
6 changes: 6 additions & 0 deletions lib/routes/cspengyuan/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: '中证鹏元',
url: 'www.cspengyuan.com',
};
12 changes: 12 additions & 0 deletions lib/routes/cspengyuan/templates/description.art
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<h4>{{ part.segment1 }}</h4>
<h4>{{ part.segment2 }}</h4>
{{if type == 'periodical'}}
<a download="{{ item.pdfName }}" href="https://www.cspengyuan.com{{ item.pdfUrl }}"
style="display: inline-block; padding: 10px 20px; background-color: red; color: white;
text-align: center; text-decoration: none; border-radius: 5px; font-size: 16px;">整刊下载</a><br>
{{/if}}
{{if type == 'monograph' }}
<h4><b>{{ part.segment3 }}</b></h4>
<p>{{ part.segment4 }}</p><br>
{{/if}}
<img src="{{ imgUrl }}" height="50%" style="display: block; margin: 0 auto;">

0 comments on commit 782b485

Please sign in to comment.