From 4530a0135cc8dff8cafb76f7d3070b2a84abed0c Mon Sep 17 00:00:00 2001 From: kakkokari-gtyih <67428053+kakkokari-gtyih@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:40:02 +0900 Subject: [PATCH] refactor --- src/plugins/bluesky.ts | 13 ++++++------- src/utils/got.ts | 28 +++++++++++++--------------- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/plugins/bluesky.ts b/src/plugins/bluesky.ts index 797bf9a8..c1431e5f 100644 --- a/src/plugins/bluesky.ts +++ b/src/plugins/bluesky.ts @@ -1,20 +1,19 @@ import * as cheerio from 'cheerio'; import type Summary from '@/summary.js'; -import { getResponse } from '@/utils/got.js'; -import { parseGeneral } from '@/general.js'; +import { getResponse, getGotOptions } from '@/utils/got.js'; +import { parseGeneral, type GeneralScrapingOptions } from '@/general.js'; export function test(url: URL): boolean { return url.hostname === 'bsky.app'; } -export async function summarize(url: URL): Promise { +export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promise { + const args = getGotOptions(url.href, opts); + // HEADで取ると404が返るためGETのみで取得 const res = await getResponse({ - url: url.href, + ...args, method: 'GET', - headers: { - 'accept': '*/*', - }, }); const body = res.body; const $ = cheerio.load(body); diff --git a/src/utils/got.ts b/src/utils/got.ts index c278a6ba..c25c17b7 100644 --- a/src/utils/got.ts +++ b/src/utils/got.ts @@ -4,8 +4,9 @@ import { readFileSync } from 'node:fs'; import got, * as Got from 'got'; import * as cheerio from 'cheerio'; import PrivateIp from 'private-ip'; -import { StatusError } from './status-error.js'; -import { detectEncoding, toUtf8 } from './encoding.js'; +import { StatusError } from '@/utils/status-error.js'; +import { detectEncoding, toUtf8 } from '@/utils/encoding.js'; +import type { GeneralScrapingOptions } from '@/general.js'; const _filename = fileURLToPath(import.meta.url); const _dirname = dirname(_filename); @@ -36,23 +37,13 @@ export const DEFAULT_OPERATION_TIMEOUT = 60 * 1000; export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024; export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`; -export async function scpaping( - url: string, - opts?: { - lang?: string; - userAgent?: string; - responseTimeout?: number; - operationTimeout?: number; - contentLengthLimit?: number; - contentLengthRequired?: boolean; - }, -) { - const args: Omit = { +export function getGotOptions(url: string, opts?: GeneralScrapingOptions): Omit { + return { url, headers: { 'accept': 'text/html,application/xhtml+xml', 'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA, - 'accept-language': opts?.lang, + 'accept-language': opts?.lang ?? undefined, }, typeFilter: /^(text\/html|application\/xhtml\+xml)/, responseTimeout: opts?.responseTimeout, @@ -60,6 +51,13 @@ export async function scpaping( contentLengthLimit: opts?.contentLengthLimit, contentLengthRequired: opts?.contentLengthRequired, }; +} + +export async function scpaping( + url: string, + opts?: GeneralScrapingOptions, +) { + const args = getGotOptions(url, opts); const headResponse = await getResponse({ ...args,