forked from syuilo/summaly
-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
30 changed files
with
639 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,2 @@ | ||
/node_modules | ||
/built | ||
npm-debug.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
import * as URL from 'node:url'; | ||
import Summary from './summary.js'; | ||
declare const _default: (url: URL.Url, lang?: string | null) => Promise<Summary | null>; | ||
export default _default; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import * as URL from 'node:url'; | ||
import clip from './utils/clip.js'; | ||
import cleanupTitle from './utils/cleanup-title.js'; | ||
import { decode as decodeHtml } from 'html-entities'; | ||
import { head, scpaping } from './utils/got.js'; | ||
export default async (url, lang = null) => { | ||
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) | ||
lang = null; | ||
const res = await scpaping(url.href, { lang: lang || undefined }); | ||
const $ = res.$; | ||
const twitterCard = $('meta[property="twitter:card"]').attr('content'); | ||
let title = $('meta[property="og:title"]').attr('content') || | ||
$('meta[property="twitter:title"]').attr('content') || | ||
$('title').text(); | ||
if (title === undefined || title === null) { | ||
return null; | ||
} | ||
title = clip(decodeHtml(title), 100); | ||
let image = $('meta[property="og:image"]').attr('content') || | ||
$('meta[property="twitter:image"]').attr('content') || | ||
$('link[rel="image_src"]').attr('href') || | ||
$('link[rel="apple-touch-icon"]').attr('href') || | ||
$('link[rel="apple-touch-icon image_src"]').attr('href'); | ||
image = image ? URL.resolve(url.href, image) : null; | ||
const playerUrl = (twitterCard !== 'summary_large_image' && $('meta[property="twitter:player"]').attr('content')) || | ||
(twitterCard !== 'summary_large_image' && $('meta[name="twitter:player"]').attr('content')) || | ||
$('meta[property="og:video"]').attr('content') || | ||
$('meta[property="og:video:secure_url"]').attr('content') || | ||
$('meta[property="og:video:url"]').attr('content'); | ||
const playerWidth = parseInt($('meta[property="twitter:player:width"]').attr('content') || | ||
$('meta[name="twitter:player:width"]').attr('content') || | ||
$('meta[property="og:video:width"]').attr('content') || | ||
''); | ||
const playerHeight = parseInt($('meta[property="twitter:player:height"]').attr('content') || | ||
$('meta[name="twitter:player:height"]').attr('content') || | ||
$('meta[property="og:video:height"]').attr('content') || | ||
''); | ||
let description = $('meta[property="og:description"]').attr('content') || | ||
$('meta[property="twitter:description"]').attr('content') || | ||
$('meta[name="description"]').attr('content'); | ||
description = description | ||
? clip(decodeHtml(description), 300) | ||
: null; | ||
if (title === description) { | ||
description = null; | ||
} | ||
let siteName = $('meta[property="og:site_name"]').attr('content') || | ||
$('meta[name="application-name"]').attr('content') || | ||
url.hostname; | ||
siteName = siteName ? decodeHtml(siteName) : null; | ||
const favicon = $('link[rel="shortcut icon"]').attr('href') || | ||
$('link[rel="icon"]').attr('href') || | ||
'/favicon.ico'; | ||
const sensitive = $('.tweet').attr('data-possibly-sensitive') === 'true'; | ||
const find = async (path) => { | ||
const target = URL.resolve(url.href, path); | ||
try { | ||
await head(target); | ||
return target; | ||
} | ||
catch (e) { | ||
return null; | ||
} | ||
}; | ||
// 相対的なURL (ex. test) を絶対的 (ex. /test) に変換 | ||
const toAbsolute = (relativeURLString) => { | ||
const relativeURL = URL.parse(relativeURLString); | ||
const isAbsolute = relativeURL.slashes || relativeURL.path !== null && relativeURL.path[0] === '/'; | ||
// 既に絶対的なら、即座に値を返却 | ||
if (isAbsolute) { | ||
return relativeURLString; | ||
} | ||
// スラッシュを付けて返却 | ||
return '/' + relativeURLString; | ||
}; | ||
const icon = await find(favicon) || | ||
// 相対指定を絶対指定に変換し再試行 | ||
await find(toAbsolute(favicon)) || | ||
null; | ||
// Clean up the title | ||
title = cleanupTitle(title, siteName); | ||
if (title === '') { | ||
title = siteName; | ||
} | ||
return { | ||
title: title || null, | ||
icon: icon || null, | ||
description: description || null, | ||
thumbnail: image || null, | ||
player: { | ||
url: playerUrl || null, | ||
width: Number.isNaN(playerWidth) ? null : playerWidth, | ||
height: Number.isNaN(playerHeight) ? null : playerHeight | ||
}, | ||
sitename: siteName || null, | ||
sensitive, | ||
}; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/** | ||
* summaly | ||
* https://github.com/syuilo/summaly | ||
*/ | ||
import Summary from './summary.js'; | ||
import type { IPlugin as _IPlugin } from './iplugin.js'; | ||
export declare type IPlugin = _IPlugin; | ||
import * as Got from 'got'; | ||
import type { FastifyInstance } from 'fastify'; | ||
declare type Options = { | ||
/** | ||
* Accept-Language for the request | ||
*/ | ||
lang?: string | null; | ||
/** | ||
* Whether follow redirects | ||
*/ | ||
followRedirects?: boolean; | ||
/** | ||
* Custom Plugins | ||
*/ | ||
plugins?: IPlugin[]; | ||
/** | ||
* Custom HTTP agent | ||
*/ | ||
agent?: Got.Agents; | ||
}; | ||
declare type Result = Summary & { | ||
/** | ||
* The actual url of that web page | ||
*/ | ||
url: string; | ||
}; | ||
/** | ||
* Summarize an web page | ||
*/ | ||
export declare const summaly: (url: string, options?: Options | undefined) => Promise<Result>; | ||
export default function (fastify: FastifyInstance, options: Options, done: (err?: Error) => void): void; | ||
export {}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/** | ||
* summaly | ||
* https://github.com/syuilo/summaly | ||
*/ | ||
import * as URL from 'node:url'; | ||
import tracer from 'trace-redirect'; | ||
import general from './general.js'; | ||
import { setAgent } from './utils/got.js'; | ||
import { plugins as builtinPlugins } from './plugins/index.js'; | ||
const defaultOptions = { | ||
lang: null, | ||
followRedirects: true, | ||
plugins: [], | ||
}; | ||
/** | ||
* Summarize an web page | ||
*/ | ||
export const summaly = async (url, options) => { | ||
if (options?.agent) | ||
setAgent(options.agent); | ||
const opts = Object.assign(defaultOptions, options); | ||
const plugins = builtinPlugins.concat(opts.plugins || []); | ||
let actualUrl = url; | ||
if (opts.followRedirects) { | ||
// .catch(() => url)にすればいいけど、jestにtrace-redirectを食わせるのが面倒なのでtry-catch | ||
try { | ||
actualUrl = await tracer(url); | ||
} | ||
catch (e) { | ||
actualUrl = url; | ||
} | ||
} | ||
const _url = URL.parse(actualUrl, true); | ||
// Find matching plugin | ||
const match = plugins.filter(plugin => plugin.test(_url))[0]; | ||
// Get summary | ||
const summary = await (match ? match.summarize : general)(_url, opts.lang || undefined); | ||
if (summary == null) { | ||
throw 'failed summarize'; | ||
} | ||
return Object.assign(summary, { | ||
url: actualUrl | ||
}); | ||
}; | ||
export default function (fastify, options, done) { | ||
fastify.get('/url', async (req, reply) => { | ||
const url = req.query.url; | ||
if (url == null) { | ||
return reply.status(400).send({ | ||
error: 'url is required' | ||
}); | ||
} | ||
try { | ||
const summary = await summaly(url, { | ||
lang: req.query.lang, | ||
followRedirects: false, | ||
...options, | ||
}); | ||
return summary; | ||
} | ||
catch (e) { | ||
return reply.status(500).send({ | ||
error: e | ||
}); | ||
} | ||
}); | ||
done(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/// <reference types="node" /> | ||
import * as URL from 'node:url'; | ||
import Summary from './summary.js'; | ||
export interface IPlugin { | ||
test: (url: URL.Url) => boolean; | ||
summarize: (url: URL.Url, lang?: string) => Promise<Summary>; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
export {}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
/// <reference types="node" /> | ||
import * as URL from 'node:url'; | ||
import summary from '../summary.js'; | ||
export declare function test(url: URL.Url): boolean; | ||
export declare function summarize(url: URL.Url): Promise<summary>; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import { scpaping } from '../utils/got.js'; | ||
export function test(url) { | ||
return url.hostname === 'www.amazon.com' || | ||
url.hostname === 'www.amazon.co.jp' || | ||
url.hostname === 'www.amazon.ca' || | ||
url.hostname === 'www.amazon.com.br' || | ||
url.hostname === 'www.amazon.com.mx' || | ||
url.hostname === 'www.amazon.co.uk' || | ||
url.hostname === 'www.amazon.de' || | ||
url.hostname === 'www.amazon.fr' || | ||
url.hostname === 'www.amazon.it' || | ||
url.hostname === 'www.amazon.es' || | ||
url.hostname === 'www.amazon.nl' || | ||
url.hostname === 'www.amazon.cn' || | ||
url.hostname === 'www.amazon.in' || | ||
url.hostname === 'www.amazon.au'; | ||
} | ||
export async function summarize(url) { | ||
const res = await scpaping(url.href); | ||
const $ = res.$; | ||
const title = $('#title').text(); | ||
const description = $('#productDescription').text() || | ||
$('meta[name="description"]').attr('content'); | ||
const thumbnail = $('#landingImage').attr('src'); | ||
const playerUrl = $('meta[property="twitter:player"]').attr('content') || | ||
$('meta[name="twitter:player"]').attr('content'); | ||
const playerWidth = $('meta[property="twitter:player:width"]').attr('content') || | ||
$('meta[name="twitter:player:width"]').attr('content'); | ||
const playerHeight = $('meta[property="twitter:player:height"]').attr('content') || | ||
$('meta[name="twitter:player:height"]').attr('content'); | ||
return { | ||
title: title ? title.trim() : null, | ||
icon: 'https://www.amazon.com/favicon.ico', | ||
description: description ? description.trim() : null, | ||
thumbnail: thumbnail ? thumbnail.trim() : null, | ||
player: { | ||
url: playerUrl || null, | ||
width: playerWidth ? parseInt(playerWidth) : null, | ||
height: playerHeight ? parseInt(playerHeight) : null | ||
}, | ||
sitename: 'Amazon' | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
import { IPlugin } from '@/iplugin.js'; | ||
export declare const plugins: IPlugin[]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import * as amazon from './amazon.js'; | ||
import * as wikipedia from './wikipedia.js'; | ||
export const plugins = [ | ||
amazon, | ||
wikipedia, | ||
]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
/// <reference types="node" /> | ||
import * as URL from 'node:url'; | ||
import summary from '../summary.js'; | ||
export declare function test(url: URL.Url): boolean; | ||
export declare function summarize(url: URL.Url): Promise<summary>; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import { get } from '../utils/got.js'; | ||
import debug from 'debug'; | ||
import clip from './../utils/clip.js'; | ||
const log = debug('summaly:plugins:wikipedia'); | ||
export function test(url) { | ||
if (!url.hostname) | ||
return false; | ||
return /\.wikipedia\.org$/.test(url.hostname); | ||
} | ||
export async function summarize(url) { | ||
const lang = url.host ? url.host.split('.')[0] : null; | ||
const title = url.pathname ? url.pathname.split('/')[2] : null; | ||
const endpoint = `https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`; | ||
log(`lang is ${lang}`); | ||
log(`title is ${title}`); | ||
log(`endpoint is ${endpoint}`); | ||
let body = await get(endpoint); | ||
body = JSON.parse(body); | ||
log(body); | ||
if (!('query' in body) || !('pages' in body.query)) { | ||
throw 'fetch failed'; | ||
} | ||
const info = body.query.pages[Object.keys(body.query.pages)[0]]; | ||
return { | ||
title: info.title, | ||
icon: 'https://wikipedia.org/static/favicon/wikipedia.ico', | ||
description: clip(info.extract, 300), | ||
thumbnail: `https://wikipedia.org/static/images/project-logos/${lang}wiki.png`, | ||
player: { | ||
url: null, | ||
width: null, | ||
height: null | ||
}, | ||
sitename: 'Wikipedia' | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
export {}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import * as http from 'http'; | ||
import * as Koa from 'koa'; | ||
import summaly from '../'; | ||
const app = new Koa(); | ||
app.use(async (ctx) => { | ||
if (!ctx.query.url) { | ||
ctx.status = 400; | ||
return; | ||
} | ||
try { | ||
const summary = await summaly(ctx.query.url, { | ||
lang: ctx.query.lang, | ||
followRedirects: false | ||
}); | ||
ctx.body = summary; | ||
} | ||
catch (e) { | ||
ctx.status = 500; | ||
} | ||
}); | ||
const server = http.createServer(app.callback()); | ||
server.listen(process.env.PORT || 80); |
Oops, something went wrong.