diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6ac54514..3b912930 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,4 +31,4 @@ jobs: pnpm build - name: Test run: | - pnpm test + SKIP_NETWORK_TEST=true pnpm test diff --git a/CHANGELOG.md b/CHANGELOG.md index bd59c971..a47b5e1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ------------------ * センシティブフラグの判定を `` および `rating` ヘッダでも行うように * レスポンスに`Cache-Control`ヘッダを含むように +* Bluesky(bsky.app)のプレビューに対応 +* `fediverse:creator` のパースに対応 * 依存関係の更新 * eslintの設定を更新 diff --git a/README.md b/README.md index 79cb8a5f..e7060d99 100644 --- a/README.md +++ b/README.md @@ -85,15 +85,16 @@ A Promise of an Object that contains properties below: | Property | Type | Description | |:----------------|:-------------------|:-----------------------------------------------------------| -| **title** | *string* \| *null* | The title of the web page | -| **icon** | *string* \| *null* | The url of the icon of the web page | -| **description** | *string* \| *null* | The description of the web page | -| **thumbnail** | *string* \| *null* | The url of the thumbnail of the web page | -| **sitename** | *string* \| *null* | The name of the web site | -| **player** | *Player* | The player of the web page | -| **sensitive** | *boolean* | Whether the url is sensitive | -| **activityPub** | *string* \| *null* | The url of the ActivityPub representation of that web page | -| **url** | *string* | The url of the web page | +| **title** | *string* \| *null* | The title of the web page | +| **icon** | *string* \| *null* | The url of the icon of the web page | +| **description** | *string* \| *null* | The description of the web page | +| **thumbnail** | *string* \| *null* | The url of the thumbnail of the web page | +| **sitename** | *string* \| *null* | The name of the web site | +| **player** | *Player* | The player of the web page | +| **sensitive** | *boolean* | Whether the url is sensitive | +| **activityPub** | *string* \| *null* | The url of the ActivityPub representation of that web page | +| **fediverseCreator** | *string* \| *null* | The pages fediverse handle | +| **url** | *string* | The url of the web page | #### Summary diff --git a/src/general.ts b/src/general.ts index b1facceb..c9ef2552 100644 --- a/src/general.ts +++ b/src/general.ts @@ -138,19 +138,7 @@ export type GeneralScrapingOptions = { contentLengthRequired?: boolean; } -function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) { - if (!headerValue) { - return false; - } - - if (Array.isArray(headerValue)) { - return headerValue.some(value => value.toLowerCase() === search.toLowerCase()); - } - - return headerValue.toLowerCase() === search.toLowerCase(); -} - -export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOptions): Promise { +export async function general(_url: URL | string, opts?: GeneralScrapingOptions): Promise { let lang = opts?.lang; if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null; @@ -164,6 +152,24 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt contentLengthLimit: opts?.contentLengthLimit, contentLengthRequired: opts?.contentLengthRequired, }); + + return await parseGeneral(url, res); +} + +function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) { + if (!headerValue) { + return false; + } + + if (Array.isArray(headerValue)) { + return headerValue.some(value => value.toLowerCase() === search.toLowerCase()); + } + + return headerValue.toLowerCase() === search.toLowerCase(); +} + +export async function parseGeneral(_url: URL | string, res: Awaited>): Promise { + const url = typeof _url === 'string' ? new URL(_url) : _url; const $ = res.$; const twitterCard = $('meta[name="twitter:card"]').attr('content') || @@ -245,6 +251,9 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt const activityPub = $('link[rel="alternate"][type="application/activity+json"]').attr('href') || null; + const fediverseCreator: string | null = + $('meta[name=\'fediverse:creator\']').attr('content') || null; + // https://developer.mixi.co.jp/connect/mixi_plugin/mixi_check/spec_mixi_check/#toc-18- const sensitive = $('meta[property=\'mixi:content-rating\']').attr('content') === '1' || @@ -293,5 +302,6 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt sitename: siteName || null, sensitive, activityPub, + fediverseCreator, }; } diff --git a/src/index.ts b/src/index.ts index 1994b124..3e16b633 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,7 +7,7 @@ import { got, type Agents as GotAgents } from 'got'; import type { FastifyInstance } from 'fastify'; import { SummalyResult } from '@/summary.js'; import { SummalyPlugin as _SummalyPlugin } from '@/iplugin.js'; -import { parseGeneral, type GeneralScrapingOptions } from '@/general.js'; +import { general, type GeneralScrapingOptions } from '@/general.js'; import { DEFAULT_OPERATION_TIMEOUT, DEFAULT_RESPONSE_TIMEOUT, agent, setAgent } from '@/utils/got.js'; import { plugins as builtinPlugins } from '@/plugins/index.js'; @@ -125,7 +125,7 @@ export const summaly = async (url: string, options?: SummalyOptions): Promise { }, sitename: 'Amazon', activityPub: null, + fediverseCreator: null, }; } diff --git a/src/plugins/bluesky.ts b/src/plugins/bluesky.ts new file mode 100644 index 00000000..c1431e5f --- /dev/null +++ b/src/plugins/bluesky.ts @@ -0,0 +1,26 @@ +import * as cheerio from 'cheerio'; +import type Summary from '@/summary.js'; +import { getResponse, getGotOptions } from '@/utils/got.js'; +import { parseGeneral, type GeneralScrapingOptions } from '@/general.js'; + +export function test(url: URL): boolean { + return url.hostname === 'bsky.app'; +} + +export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promise { + const args = getGotOptions(url.href, opts); + + // HEADで取ると404が返るためGETのみで取得 + const res = await getResponse({ + ...args, + method: 'GET', + }); + const body = res.body; + const $ = cheerio.load(body); + + return await parseGeneral(url, { + body, + $, + response: res, + }); +} diff --git a/src/plugins/branchio-deeplinks.ts b/src/plugins/branchio-deeplinks.ts index 0623d94a..67d08dcd 100644 --- a/src/plugins/branchio-deeplinks.ts +++ b/src/plugins/branchio-deeplinks.ts @@ -1,4 +1,4 @@ -import { parseGeneral, type GeneralScrapingOptions } from '@/general.js'; +import { general, type GeneralScrapingOptions } from '@/general.js'; import Summary from '@/summary.js'; export function test(url: URL): boolean { @@ -12,5 +12,5 @@ export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promis // Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ url.searchParams.append('$web_only', 'true'); - return await parseGeneral(url, opts); + return await general(url, opts); } diff --git a/src/plugins/index.ts b/src/plugins/index.ts index 41078eb4..140380aa 100644 --- a/src/plugins/index.ts +++ b/src/plugins/index.ts @@ -1,10 +1,12 @@ import * as amazon from './amazon.js'; +import * as bluesky from './bluesky.js'; import * as wikipedia from './wikipedia.js'; import * as branchIoDeeplinks from './branchio-deeplinks.js'; import { SummalyPlugin } from '@/iplugin.js'; export const plugins: SummalyPlugin[] = [ amazon, + bluesky, wikipedia, branchIoDeeplinks, ]; diff --git a/src/plugins/wikipedia.ts b/src/plugins/wikipedia.ts index f6c976e6..b0356240 100644 --- a/src/plugins/wikipedia.ts +++ b/src/plugins/wikipedia.ts @@ -43,5 +43,6 @@ export async function summarize(url: URL): Promise { }, sitename: 'Wikipedia', activityPub: null, + fediverseCreator: null, }; } diff --git a/src/summary.ts b/src/summary.ts index 4560f50e..74d2143a 100644 --- a/src/summary.ts +++ b/src/summary.ts @@ -38,6 +38,11 @@ type Summary = { * The url of the ActivityPub representation of that web page */ activityPub: string | null; + + /** + * The @ handle of a fediverse user (https://blog.joinmastodon.org/2024/07/highlighting-journalism-on-mastodon/) + */ + fediverseCreator: string | null; }; export type SummalyResult = Summary & { diff --git a/src/utils/got.ts b/src/utils/got.ts index 6a2b6d9d..23dee916 100644 --- a/src/utils/got.ts +++ b/src/utils/got.ts @@ -4,8 +4,9 @@ import { readFileSync } from 'node:fs'; import got, * as Got from 'got'; import * as cheerio from 'cheerio'; import PrivateIp from 'private-ip'; -import { StatusError } from './status-error.js'; -import { detectEncoding, toUtf8 } from './encoding.js'; +import type { GeneralScrapingOptions } from '@/general.js'; +import { StatusError } from '@/utils/status-error.js'; +import { detectEncoding, toUtf8 } from '@/utils/encoding.js'; const _filename = fileURLToPath(import.meta.url); const _dirname = dirname(_filename); @@ -36,23 +37,13 @@ export const DEFAULT_OPERATION_TIMEOUT = 60 * 1000; export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024; export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`; -export async function scpaping( - url: string, - opts?: { - lang?: string; - userAgent?: string; - responseTimeout?: number; - operationTimeout?: number; - contentLengthLimit?: number; - contentLengthRequired?: boolean; - }, -) { - const args: Omit = { +export function getGotOptions(url: string, opts?: GeneralScrapingOptions): Omit { + return { url, headers: { 'accept': 'text/html,application/xhtml+xml', 'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA, - 'accept-language': opts?.lang, + 'accept-language': opts?.lang ?? undefined, }, typeFilter: /^(text\/html|application\/xhtml\+xml)/, responseTimeout: opts?.responseTimeout, @@ -60,6 +51,13 @@ export async function scpaping( contentLengthLimit: opts?.contentLengthLimit, contentLengthRequired: opts?.contentLengthRequired, }; +} + +export async function scpaping( + url: string, + opts?: GeneralScrapingOptions, +) { + const args = getGotOptions(url, opts); const headResponse = await getResponse({ ...args, @@ -110,7 +108,7 @@ export async function head(url: string) { }); } -async function getResponse(args: GotOptions) { +export async function getResponse(args: GotOptions) { const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT; const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT; diff --git a/test/htmls/fediverse-creator.html b/test/htmls/fediverse-creator.html new file mode 100644 index 00000000..725d6370 --- /dev/null +++ b/test/htmls/fediverse-creator.html @@ -0,0 +1,13 @@ + + + + + + + Meow + + +

Hellooo!

+

:3

+ + diff --git a/test/index.ts b/test/index.ts index e983e833..34978bc4 100644 --- a/test/index.ts +++ b/test/index.ts @@ -12,7 +12,7 @@ import { dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { Agent as httpAgent } from 'node:http'; import { Agent as httpsAgent } from 'node:https'; -import { expect, test, describe, beforeEach, afterEach } from '@jest/globals'; +import { expect, test, describe, beforeEach, afterEach, xtest } from '@jest/globals'; import fastify, { type FastifyInstance } from 'fastify'; import { summaly } from '../src/index.js'; import { StatusError } from '../src/utils/status-error.js'; @@ -36,6 +36,15 @@ process.on('unhandledRejection', console.dir); let app: FastifyInstance | null = null; +function skippableTest(name: string, fn: () => void) { + if (process.env.SKIP_NETWORK_TEST === 'true') { + console.log(`[SKIP] ${name}`); + xtest(name, fn); + } else { + test(name, fn); + } +} + afterEach(async () => { if (app) { await app.close(); @@ -73,10 +82,11 @@ test('basic', async () => { sensitive: false, url: host + '/', activityPub: null, + fediverseCreator: null, }); }); -test('Stage Bye Stage', async () => { +skippableTest('Stage Bye Stage', async () => { // If this test fails, you must rewrite the result data and the example in README.md. const summary = await summaly('https://www.youtube.com/watch?v=NMIEAhH_fTU'); @@ -102,6 +112,7 @@ test('Stage Bye Stage', async () => { 'sitename': 'YouTube', 'sensitive': false, 'activityPub': null, + 'fediverseCreator': null, 'url': 'https://www.youtube.com/watch?v=NMIEAhH_fTU', }, ); @@ -507,6 +518,36 @@ describe('ActivityPub', () => { }); }); +describe('Fediverse Creator', () => { + test('Basic', async () => { + app = fastify(); + app.get('*', (request, reply) => { + const content = fs.readFileSync(_dirname + '/htmls/fediverse-creator.html'); + reply.header('content-length', content.length); + reply.header('content-type', 'text/html'); + return reply.send(content); + }); + await app.listen({ port }); + + const summary = await summaly(host); + expect(summary.fediverseCreator).toBe('@test@example.com'); + }); + + test('Null', async () => { + app = fastify(); + app.get('*', (request, reply) => { + const content = fs.readFileSync(_dirname + '/htmls/basic.html'); + reply.header('content-length', content.length); + reply.header('content-type', 'text/html'); + return reply.send(content); + }); + await app.listen({ port }); + + const summary = await summaly(host); + expect(summary.fediverseCreator).toBeNull(); + }); +}); + describe('sensitive', () => { test('default', async () => { app = fastify();