diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6ac54514..3b912930 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -31,4 +31,4 @@ jobs:
pnpm build
- name: Test
run: |
- pnpm test
+ SKIP_NETWORK_TEST=true pnpm test
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bd59c971..a47b5e1e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,8 @@
------------------
* センシティブフラグの判定を `` および `rating` ヘッダでも行うように
* レスポンスに`Cache-Control`ヘッダを含むように
+* Bluesky(bsky.app)のプレビューに対応
+* `fediverse:creator` のパースに対応
* 依存関係の更新
* eslintの設定を更新
diff --git a/README.md b/README.md
index 79cb8a5f..e7060d99 100644
--- a/README.md
+++ b/README.md
@@ -85,15 +85,16 @@ A Promise of an Object that contains properties below:
| Property | Type | Description |
|:----------------|:-------------------|:-----------------------------------------------------------|
-| **title** | *string* \| *null* | The title of the web page |
-| **icon** | *string* \| *null* | The url of the icon of the web page |
-| **description** | *string* \| *null* | The description of the web page |
-| **thumbnail** | *string* \| *null* | The url of the thumbnail of the web page |
-| **sitename** | *string* \| *null* | The name of the web site |
-| **player** | *Player* | The player of the web page |
-| **sensitive** | *boolean* | Whether the url is sensitive |
-| **activityPub** | *string* \| *null* | The url of the ActivityPub representation of that web page |
-| **url** | *string* | The url of the web page |
+| **title** | *string* \| *null* | The title of the web page |
+| **icon** | *string* \| *null* | The url of the icon of the web page |
+| **description** | *string* \| *null* | The description of the web page |
+| **thumbnail** | *string* \| *null* | The url of the thumbnail of the web page |
+| **sitename** | *string* \| *null* | The name of the web site |
+| **player** | *Player* | The player of the web page |
+| **sensitive** | *boolean* | Whether the url is sensitive |
+| **activityPub** | *string* \| *null* | The url of the ActivityPub representation of that web page |
+| **fediverseCreator** | *string* \| *null* | The pages fediverse handle |
+| **url** | *string* | The url of the web page |
#### Summary
diff --git a/src/general.ts b/src/general.ts
index b1facceb..c9ef2552 100644
--- a/src/general.ts
+++ b/src/general.ts
@@ -138,19 +138,7 @@ export type GeneralScrapingOptions = {
contentLengthRequired?: boolean;
}
-function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
- if (!headerValue) {
- return false;
- }
-
- if (Array.isArray(headerValue)) {
- return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
- }
-
- return headerValue.toLowerCase() === search.toLowerCase();
-}
-
-export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOptions): Promise {
+export async function general(_url: URL | string, opts?: GeneralScrapingOptions): Promise {
let lang = opts?.lang;
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
@@ -164,6 +152,24 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt
contentLengthLimit: opts?.contentLengthLimit,
contentLengthRequired: opts?.contentLengthRequired,
});
+
+ return await parseGeneral(url, res);
+}
+
+function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
+ if (!headerValue) {
+ return false;
+ }
+
+ if (Array.isArray(headerValue)) {
+ return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
+ }
+
+ return headerValue.toLowerCase() === search.toLowerCase();
+}
+
+export async function parseGeneral(_url: URL | string, res: Awaited>): Promise {
+ const url = typeof _url === 'string' ? new URL(_url) : _url;
const $ = res.$;
const twitterCard =
$('meta[name="twitter:card"]').attr('content') ||
@@ -245,6 +251,9 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt
const activityPub =
$('link[rel="alternate"][type="application/activity+json"]').attr('href') || null;
+ const fediverseCreator: string | null =
+ $('meta[name=\'fediverse:creator\']').attr('content') || null;
+
// https://developer.mixi.co.jp/connect/mixi_plugin/mixi_check/spec_mixi_check/#toc-18-
const sensitive =
$('meta[property=\'mixi:content-rating\']').attr('content') === '1' ||
@@ -293,5 +302,6 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt
sitename: siteName || null,
sensitive,
activityPub,
+ fediverseCreator,
};
}
diff --git a/src/index.ts b/src/index.ts
index 1994b124..3e16b633 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -7,7 +7,7 @@ import { got, type Agents as GotAgents } from 'got';
import type { FastifyInstance } from 'fastify';
import { SummalyResult } from '@/summary.js';
import { SummalyPlugin as _SummalyPlugin } from '@/iplugin.js';
-import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
+import { general, type GeneralScrapingOptions } from '@/general.js';
import { DEFAULT_OPERATION_TIMEOUT, DEFAULT_RESPONSE_TIMEOUT, agent, setAgent } from '@/utils/got.js';
import { plugins as builtinPlugins } from '@/plugins/index.js';
@@ -125,7 +125,7 @@ export const summaly = async (url: string, options?: SummalyOptions): Promise {
},
sitename: 'Amazon',
activityPub: null,
+ fediverseCreator: null,
};
}
diff --git a/src/plugins/bluesky.ts b/src/plugins/bluesky.ts
new file mode 100644
index 00000000..c1431e5f
--- /dev/null
+++ b/src/plugins/bluesky.ts
@@ -0,0 +1,26 @@
+import * as cheerio from 'cheerio';
+import type Summary from '@/summary.js';
+import { getResponse, getGotOptions } from '@/utils/got.js';
+import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
+
+export function test(url: URL): boolean {
+ return url.hostname === 'bsky.app';
+}
+
+export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promise {
+ const args = getGotOptions(url.href, opts);
+
+ // HEADで取ると404が返るためGETのみで取得
+ const res = await getResponse({
+ ...args,
+ method: 'GET',
+ });
+ const body = res.body;
+ const $ = cheerio.load(body);
+
+ return await parseGeneral(url, {
+ body,
+ $,
+ response: res,
+ });
+}
diff --git a/src/plugins/branchio-deeplinks.ts b/src/plugins/branchio-deeplinks.ts
index 0623d94a..67d08dcd 100644
--- a/src/plugins/branchio-deeplinks.ts
+++ b/src/plugins/branchio-deeplinks.ts
@@ -1,4 +1,4 @@
-import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
+import { general, type GeneralScrapingOptions } from '@/general.js';
import Summary from '@/summary.js';
export function test(url: URL): boolean {
@@ -12,5 +12,5 @@ export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promis
// Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ
url.searchParams.append('$web_only', 'true');
- return await parseGeneral(url, opts);
+ return await general(url, opts);
}
diff --git a/src/plugins/index.ts b/src/plugins/index.ts
index 41078eb4..140380aa 100644
--- a/src/plugins/index.ts
+++ b/src/plugins/index.ts
@@ -1,10 +1,12 @@
import * as amazon from './amazon.js';
+import * as bluesky from './bluesky.js';
import * as wikipedia from './wikipedia.js';
import * as branchIoDeeplinks from './branchio-deeplinks.js';
import { SummalyPlugin } from '@/iplugin.js';
export const plugins: SummalyPlugin[] = [
amazon,
+ bluesky,
wikipedia,
branchIoDeeplinks,
];
diff --git a/src/plugins/wikipedia.ts b/src/plugins/wikipedia.ts
index f6c976e6..b0356240 100644
--- a/src/plugins/wikipedia.ts
+++ b/src/plugins/wikipedia.ts
@@ -43,5 +43,6 @@ export async function summarize(url: URL): Promise {
},
sitename: 'Wikipedia',
activityPub: null,
+ fediverseCreator: null,
};
}
diff --git a/src/summary.ts b/src/summary.ts
index 4560f50e..74d2143a 100644
--- a/src/summary.ts
+++ b/src/summary.ts
@@ -38,6 +38,11 @@ type Summary = {
* The url of the ActivityPub representation of that web page
*/
activityPub: string | null;
+
+ /**
+ * The @ handle of a fediverse user (https://blog.joinmastodon.org/2024/07/highlighting-journalism-on-mastodon/)
+ */
+ fediverseCreator: string | null;
};
export type SummalyResult = Summary & {
diff --git a/src/utils/got.ts b/src/utils/got.ts
index 6a2b6d9d..23dee916 100644
--- a/src/utils/got.ts
+++ b/src/utils/got.ts
@@ -4,8 +4,9 @@ import { readFileSync } from 'node:fs';
import got, * as Got from 'got';
import * as cheerio from 'cheerio';
import PrivateIp from 'private-ip';
-import { StatusError } from './status-error.js';
-import { detectEncoding, toUtf8 } from './encoding.js';
+import type { GeneralScrapingOptions } from '@/general.js';
+import { StatusError } from '@/utils/status-error.js';
+import { detectEncoding, toUtf8 } from '@/utils/encoding.js';
const _filename = fileURLToPath(import.meta.url);
const _dirname = dirname(_filename);
@@ -36,23 +37,13 @@ export const DEFAULT_OPERATION_TIMEOUT = 60 * 1000;
export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`;
-export async function scpaping(
- url: string,
- opts?: {
- lang?: string;
- userAgent?: string;
- responseTimeout?: number;
- operationTimeout?: number;
- contentLengthLimit?: number;
- contentLengthRequired?: boolean;
- },
-) {
- const args: Omit = {
+export function getGotOptions(url: string, opts?: GeneralScrapingOptions): Omit {
+ return {
url,
headers: {
'accept': 'text/html,application/xhtml+xml',
'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA,
- 'accept-language': opts?.lang,
+ 'accept-language': opts?.lang ?? undefined,
},
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
responseTimeout: opts?.responseTimeout,
@@ -60,6 +51,13 @@ export async function scpaping(
contentLengthLimit: opts?.contentLengthLimit,
contentLengthRequired: opts?.contentLengthRequired,
};
+}
+
+export async function scpaping(
+ url: string,
+ opts?: GeneralScrapingOptions,
+) {
+ const args = getGotOptions(url, opts);
const headResponse = await getResponse({
...args,
@@ -110,7 +108,7 @@ export async function head(url: string) {
});
}
-async function getResponse(args: GotOptions) {
+export async function getResponse(args: GotOptions) {
const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT;
const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT;
diff --git a/test/htmls/fediverse-creator.html b/test/htmls/fediverse-creator.html
new file mode 100644
index 00000000..725d6370
--- /dev/null
+++ b/test/htmls/fediverse-creator.html
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+ Meow
+
+
+ Hellooo!
+ :3
+
+
diff --git a/test/index.ts b/test/index.ts
index e983e833..34978bc4 100644
--- a/test/index.ts
+++ b/test/index.ts
@@ -12,7 +12,7 @@ import { dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { Agent as httpAgent } from 'node:http';
import { Agent as httpsAgent } from 'node:https';
-import { expect, test, describe, beforeEach, afterEach } from '@jest/globals';
+import { expect, test, describe, beforeEach, afterEach, xtest } from '@jest/globals';
import fastify, { type FastifyInstance } from 'fastify';
import { summaly } from '../src/index.js';
import { StatusError } from '../src/utils/status-error.js';
@@ -36,6 +36,15 @@ process.on('unhandledRejection', console.dir);
let app: FastifyInstance | null = null;
+function skippableTest(name: string, fn: () => void) {
+ if (process.env.SKIP_NETWORK_TEST === 'true') {
+ console.log(`[SKIP] ${name}`);
+ xtest(name, fn);
+ } else {
+ test(name, fn);
+ }
+}
+
afterEach(async () => {
if (app) {
await app.close();
@@ -73,10 +82,11 @@ test('basic', async () => {
sensitive: false,
url: host + '/',
activityPub: null,
+ fediverseCreator: null,
});
});
-test('Stage Bye Stage', async () => {
+skippableTest('Stage Bye Stage', async () => {
// If this test fails, you must rewrite the result data and the example in README.md.
const summary = await summaly('https://www.youtube.com/watch?v=NMIEAhH_fTU');
@@ -102,6 +112,7 @@ test('Stage Bye Stage', async () => {
'sitename': 'YouTube',
'sensitive': false,
'activityPub': null,
+ 'fediverseCreator': null,
'url': 'https://www.youtube.com/watch?v=NMIEAhH_fTU',
},
);
@@ -507,6 +518,36 @@ describe('ActivityPub', () => {
});
});
+describe('Fediverse Creator', () => {
+ test('Basic', async () => {
+ app = fastify();
+ app.get('*', (request, reply) => {
+ const content = fs.readFileSync(_dirname + '/htmls/fediverse-creator.html');
+ reply.header('content-length', content.length);
+ reply.header('content-type', 'text/html');
+ return reply.send(content);
+ });
+ await app.listen({ port });
+
+ const summary = await summaly(host);
+ expect(summary.fediverseCreator).toBe('@test@example.com');
+ });
+
+ test('Null', async () => {
+ app = fastify();
+ app.get('*', (request, reply) => {
+ const content = fs.readFileSync(_dirname + '/htmls/basic.html');
+ reply.header('content-length', content.length);
+ reply.header('content-type', 'text/html');
+ return reply.send(content);
+ });
+ await app.listen({ port });
+
+ const summary = await summaly(host);
+ expect(summary.fediverseCreator).toBeNull();
+ });
+});
+
describe('sensitive', () => {
test('default', async () => {
app = fastify();