From cc7ae8d00a0c96af0e36b155d6a3e432587d8122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=8B=E3=81=A3=E3=81=93=E3=81=8B=E3=82=8A?= <67428053+kakkokari-gtyih@users.noreply.github.com> Date: Sun, 2 Feb 2025 10:58:43 +0900 Subject: [PATCH 1/4] =?UTF-8?q?enhance(plugin):=20bluesky=E3=81=AEURL?= =?UTF-8?q?=E3=83=97=E3=83=AC=E3=83=93=E3=83=A5=E3=83=BC=E3=81=AB=E5=AF=BE?= =?UTF-8?q?=E5=BF=9C=20(#35)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * blueskyのURLプレビューに対応 (MisskeyIO#5) (cherry picked from commit 5407ae09230ab44693f2198a5cf639ae3a95c941) * Update Changelog * refactor * lint * refactor * lint --------- Co-authored-by: たーびん --- CHANGELOG.md | 1 + src/general.ts | 32 ++++++++++++++++++------------- src/index.ts | 4 ++-- src/plugins/bluesky.ts | 26 +++++++++++++++++++++++++ src/plugins/branchio-deeplinks.ts | 4 ++-- src/plugins/index.ts | 2 ++ src/utils/got.ts | 30 ++++++++++++++--------------- 7 files changed, 66 insertions(+), 33 deletions(-) create mode 100644 src/plugins/bluesky.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index ee9c5934..a1e69d8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ (unreleased) ------------------ * センシティブフラグの判定を `` および `rating` ヘッダでも行うように +* Bluesky(bsky.app)のプレビューに対応 * 依存関係の更新 * eslintの設定を更新 diff --git a/src/general.ts b/src/general.ts index b1facceb..6351d318 100644 --- a/src/general.ts +++ b/src/general.ts @@ -138,19 +138,7 @@ export type GeneralScrapingOptions = { contentLengthRequired?: boolean; } -function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) { - if (!headerValue) { - return false; - } - - if (Array.isArray(headerValue)) { - return headerValue.some(value => value.toLowerCase() === search.toLowerCase()); - } - - return headerValue.toLowerCase() === search.toLowerCase(); -} - -export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOptions): Promise { +export async function general(_url: URL | string, opts?: GeneralScrapingOptions): Promise { let lang = opts?.lang; if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null; @@ -164,6 +152,24 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt contentLengthLimit: opts?.contentLengthLimit, contentLengthRequired: opts?.contentLengthRequired, }); + + return await parseGeneral(url, res); +} + +function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) { + if (!headerValue) { + return false; + } + + if (Array.isArray(headerValue)) { + return headerValue.some(value => value.toLowerCase() === search.toLowerCase()); + } + + return headerValue.toLowerCase() === search.toLowerCase(); +} + +export async function parseGeneral(_url: URL | string, res: Awaited>): Promise { + const url = typeof _url === 'string' ? new URL(_url) : _url; const $ = res.$; const twitterCard = $('meta[name="twitter:card"]').attr('content') || diff --git a/src/index.ts b/src/index.ts index 2fa4b962..a8f544fd 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,7 +7,7 @@ import { got, type Agents as GotAgents } from 'got'; import type { FastifyInstance } from 'fastify'; import { SummalyResult } from '@/summary.js'; import { SummalyPlugin as _SummalyPlugin } from '@/iplugin.js'; -import { parseGeneral, type GeneralScrapingOptions } from '@/general.js'; +import { general, type GeneralScrapingOptions } from '@/general.js'; import { DEFAULT_OPERATION_TIMEOUT, DEFAULT_RESPONSE_TIMEOUT, agent, setAgent } from '@/utils/got.js'; import { plugins as builtinPlugins } from '@/plugins/index.js'; @@ -125,7 +125,7 @@ export const summaly = async (url: string, options?: SummalyOptions): Promise { + const args = getGotOptions(url.href, opts); + + // HEADで取ると404が返るためGETのみで取得 + const res = await getResponse({ + ...args, + method: 'GET', + }); + const body = res.body; + const $ = cheerio.load(body); + + return await parseGeneral(url, { + body, + $, + response: res, + }); +} diff --git a/src/plugins/branchio-deeplinks.ts b/src/plugins/branchio-deeplinks.ts index 0623d94a..67d08dcd 100644 --- a/src/plugins/branchio-deeplinks.ts +++ b/src/plugins/branchio-deeplinks.ts @@ -1,4 +1,4 @@ -import { parseGeneral, type GeneralScrapingOptions } from '@/general.js'; +import { general, type GeneralScrapingOptions } from '@/general.js'; import Summary from '@/summary.js'; export function test(url: URL): boolean { @@ -12,5 +12,5 @@ export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promis // Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ url.searchParams.append('$web_only', 'true'); - return await parseGeneral(url, opts); + return await general(url, opts); } diff --git a/src/plugins/index.ts b/src/plugins/index.ts index 41078eb4..140380aa 100644 --- a/src/plugins/index.ts +++ b/src/plugins/index.ts @@ -1,10 +1,12 @@ import * as amazon from './amazon.js'; +import * as bluesky from './bluesky.js'; import * as wikipedia from './wikipedia.js'; import * as branchIoDeeplinks from './branchio-deeplinks.js'; import { SummalyPlugin } from '@/iplugin.js'; export const plugins: SummalyPlugin[] = [ amazon, + bluesky, wikipedia, branchIoDeeplinks, ]; diff --git a/src/utils/got.ts b/src/utils/got.ts index 6a2b6d9d..23dee916 100644 --- a/src/utils/got.ts +++ b/src/utils/got.ts @@ -4,8 +4,9 @@ import { readFileSync } from 'node:fs'; import got, * as Got from 'got'; import * as cheerio from 'cheerio'; import PrivateIp from 'private-ip'; -import { StatusError } from './status-error.js'; -import { detectEncoding, toUtf8 } from './encoding.js'; +import type { GeneralScrapingOptions } from '@/general.js'; +import { StatusError } from '@/utils/status-error.js'; +import { detectEncoding, toUtf8 } from '@/utils/encoding.js'; const _filename = fileURLToPath(import.meta.url); const _dirname = dirname(_filename); @@ -36,23 +37,13 @@ export const DEFAULT_OPERATION_TIMEOUT = 60 * 1000; export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024; export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`; -export async function scpaping( - url: string, - opts?: { - lang?: string; - userAgent?: string; - responseTimeout?: number; - operationTimeout?: number; - contentLengthLimit?: number; - contentLengthRequired?: boolean; - }, -) { - const args: Omit = { +export function getGotOptions(url: string, opts?: GeneralScrapingOptions): Omit { + return { url, headers: { 'accept': 'text/html,application/xhtml+xml', 'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA, - 'accept-language': opts?.lang, + 'accept-language': opts?.lang ?? undefined, }, typeFilter: /^(text\/html|application\/xhtml\+xml)/, responseTimeout: opts?.responseTimeout, @@ -60,6 +51,13 @@ export async function scpaping( contentLengthLimit: opts?.contentLengthLimit, contentLengthRequired: opts?.contentLengthRequired, }; +} + +export async function scpaping( + url: string, + opts?: GeneralScrapingOptions, +) { + const args = getGotOptions(url, opts); const headResponse = await getResponse({ ...args, @@ -110,7 +108,7 @@ export async function head(url: string) { }); } -async function getResponse(args: GotOptions) { +export async function getResponse(args: GotOptions) { const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT; const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT; From fc9f7db477565bce540ce7ad53539d6791066105 Mon Sep 17 00:00:00 2001 From: Leah <45321184+ChaoticLeah@users.noreply.github.com> Date: Sun, 2 Feb 2025 02:59:18 +0100 Subject: [PATCH 2/4] Add fediverse creator tag support (#41) * add Fediverse Creator * Update README.md --- README.md | 19 +++++++++--------- src/general.ts | 4 ++++ src/summary.ts | 5 +++++ test/htmls/fediverse-creator.html | 13 +++++++++++++ test/index.ts | 32 +++++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 test/htmls/fediverse-creator.html diff --git a/README.md b/README.md index 79cb8a5f..e7060d99 100644 --- a/README.md +++ b/README.md @@ -85,15 +85,16 @@ A Promise of an Object that contains properties below: | Property | Type | Description | |:----------------|:-------------------|:-----------------------------------------------------------| -| **title** | *string* \| *null* | The title of the web page | -| **icon** | *string* \| *null* | The url of the icon of the web page | -| **description** | *string* \| *null* | The description of the web page | -| **thumbnail** | *string* \| *null* | The url of the thumbnail of the web page | -| **sitename** | *string* \| *null* | The name of the web site | -| **player** | *Player* | The player of the web page | -| **sensitive** | *boolean* | Whether the url is sensitive | -| **activityPub** | *string* \| *null* | The url of the ActivityPub representation of that web page | -| **url** | *string* | The url of the web page | +| **title** | *string* \| *null* | The title of the web page | +| **icon** | *string* \| *null* | The url of the icon of the web page | +| **description** | *string* \| *null* | The description of the web page | +| **thumbnail** | *string* \| *null* | The url of the thumbnail of the web page | +| **sitename** | *string* \| *null* | The name of the web site | +| **player** | *Player* | The player of the web page | +| **sensitive** | *boolean* | Whether the url is sensitive | +| **activityPub** | *string* \| *null* | The url of the ActivityPub representation of that web page | +| **fediverseCreator** | *string* \| *null* | The pages fediverse handle | +| **url** | *string* | The url of the web page | #### Summary diff --git a/src/general.ts b/src/general.ts index 6351d318..c9ef2552 100644 --- a/src/general.ts +++ b/src/general.ts @@ -251,6 +251,9 @@ export async function parseGeneral(_url: URL | string, res: Awaited + + + + + + Meow + + +

Hellooo!

+

:3

+ + diff --git a/test/index.ts b/test/index.ts index e983e833..9e53dfe1 100644 --- a/test/index.ts +++ b/test/index.ts @@ -73,6 +73,7 @@ test('basic', async () => { sensitive: false, url: host + '/', activityPub: null, + fediverseCreator: null, }); }); @@ -102,6 +103,7 @@ test('Stage Bye Stage', async () => { 'sitename': 'YouTube', 'sensitive': false, 'activityPub': null, + 'fediverseCreator': null, 'url': 'https://www.youtube.com/watch?v=NMIEAhH_fTU', }, ); @@ -507,6 +509,36 @@ describe('ActivityPub', () => { }); }); +describe('Fediverse Creator', () => { + test('Basic', async () => { + app = fastify(); + app.get('*', (request, reply) => { + const content = fs.readFileSync(_dirname + '/htmls/fediverse-creator.html'); + reply.header('content-length', content.length); + reply.header('content-type', 'text/html'); + return reply.send(content); + }); + await app.listen({ port }); + + const summary = await summaly(host); + expect(summary.fediverseCreator).toBe('@test@example.com'); + }); + + test('Null', async () => { + app = fastify(); + app.get('*', (request, reply) => { + const content = fs.readFileSync(_dirname + '/htmls/basic.html'); + reply.header('content-length', content.length); + reply.header('content-type', 'text/html'); + return reply.send(content); + }); + await app.listen({ port }); + + const summary = await summaly(host); + expect(summary.fediverseCreator).toBeNull(); + }); +}); + describe('sensitive', () => { test('default', async () => { app = fastify(); From 71a6aefb8ee958a36069a9e21846698f027a1a75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=8B=E3=81=A3=E3=81=93=E3=81=8B=E3=82=8A?= <67428053+kakkokari-gtyih@users.noreply.github.com> Date: Sun, 2 Feb 2025 10:59:35 +0900 Subject: [PATCH 3/4] =?UTF-8?q?fix(test):=20Github=20Actions=E4=B8=8A?= =?UTF-8?q?=E3=81=A7=E5=A4=96=E9=83=A8=E3=82=B5=E3=82=A4=E3=83=88=E3=81=B8?= =?UTF-8?q?=E3=81=AE=E5=8F=96=E5=BE=97=E3=82=92=E8=A1=8C=E3=81=86=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=92=E3=82=B9=E3=82=AD=E3=83=83=E3=83=97?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=20(#38)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 2 +- test/index.ts | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6ac54514..3b912930 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,4 +31,4 @@ jobs: pnpm build - name: Test run: | - pnpm test + SKIP_NETWORK_TEST=true pnpm test diff --git a/test/index.ts b/test/index.ts index 9e53dfe1..34978bc4 100644 --- a/test/index.ts +++ b/test/index.ts @@ -12,7 +12,7 @@ import { dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { Agent as httpAgent } from 'node:http'; import { Agent as httpsAgent } from 'node:https'; -import { expect, test, describe, beforeEach, afterEach } from '@jest/globals'; +import { expect, test, describe, beforeEach, afterEach, xtest } from '@jest/globals'; import fastify, { type FastifyInstance } from 'fastify'; import { summaly } from '../src/index.js'; import { StatusError } from '../src/utils/status-error.js'; @@ -36,6 +36,15 @@ process.on('unhandledRejection', console.dir); let app: FastifyInstance | null = null; +function skippableTest(name: string, fn: () => void) { + if (process.env.SKIP_NETWORK_TEST === 'true') { + console.log(`[SKIP] ${name}`); + xtest(name, fn); + } else { + test(name, fn); + } +} + afterEach(async () => { if (app) { await app.close(); @@ -77,7 +86,7 @@ test('basic', async () => { }); }); -test('Stage Bye Stage', async () => { +skippableTest('Stage Bye Stage', async () => { // If this test fails, you must rewrite the result data and the example in README.md. const summary = await summaly('https://www.youtube.com/watch?v=NMIEAhH_fTU'); From e9547a556ae45b2e8d4aaa59d9cf3de31abf4701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=8B=E3=81=A3=E3=81=93=E3=81=8B=E3=82=8A?= <67428053+kakkokari-gtyih@users.noreply.github.com> Date: Sun, 2 Feb 2025 11:05:56 +0900 Subject: [PATCH 4/4] fix: missing properties (#43) * fix: missing properties * Update Changelog for misskey-dev#41 --- CHANGELOG.md | 1 + src/plugins/amazon.ts | 1 + src/plugins/wikipedia.ts | 1 + 3 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1e69d8e..759f2bfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ------------------ * センシティブフラグの判定を `` および `rating` ヘッダでも行うように * Bluesky(bsky.app)のプレビューに対応 +* `fediverse:creator` のパースに対応 * 依存関係の更新 * eslintの設定を更新 diff --git a/src/plugins/amazon.ts b/src/plugins/amazon.ts index 6b61b48c..1a749847 100644 --- a/src/plugins/amazon.ts +++ b/src/plugins/amazon.ts @@ -55,5 +55,6 @@ export async function summarize(url: URL): Promise { }, sitename: 'Amazon', activityPub: null, + fediverseCreator: null, }; } diff --git a/src/plugins/wikipedia.ts b/src/plugins/wikipedia.ts index f6c976e6..b0356240 100644 --- a/src/plugins/wikipedia.ts +++ b/src/plugins/wikipedia.ts @@ -43,5 +43,6 @@ export async function summarize(url: URL): Promise { }, sitename: 'Wikipedia', activityPub: null, + fediverseCreator: null, }; }