-
Notifications
You must be signed in to change notification settings - Fork 61.6k
/
Copy pathpageinfo.ts
293 lines (260 loc) · 9.97 KB
/
pageinfo.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
import express from 'express'
import type { NextFunction, RequestHandler, Response } from 'express'
import type { ExtendedRequest, Page, Context, Permalink } from '@/types'
import statsd from '@/observability/lib/statsd.js'
import { defaultCacheControl } from '@/frame/middleware/cache-control.js'
import catchMiddlewareError from '@/observability/middleware/catch-middleware-error.js'
import {
SURROGATE_ENUMS,
setFastlySurrogateKey,
makeLanguageSurrogateKey,
} from '@/frame/middleware/set-fastly-surrogate-key.js'
import shortVersions from '@/versions/middleware/short-versions.js'
import contextualize from '@/frame/middleware/context/context'
import features from '@/versions/middleware/features.js'
import getRedirect from '@/redirects/lib/get-redirect.js'
import { isArchivedVersionByPath } from '@/archives/lib/is-archived-version'
import { readCompressedJsonFile } from '@/frame/lib/read-json-file.js'
const router = express.Router()
// If you have pre-computed page info into a JSON file on disk, this is
// where it would be expected to be found.
// Note that if the file does not exist, it will be ignored and
// every pageinfo is computed every time.
// Note! The only reason this variable is exported is so that
// it can be imported by the script scripts/precompute-pageinfo.ts
export const CACHE_FILE_PATH = '.pageinfo-cache.json.br'
type ArchivedVersion = {
isArchived?: boolean
requestedVersion?: string
}
type ExtendedRequestWithPageInfo = ExtendedRequest & {
pageinfo: {
pathname: string
page?: Page
archived?: ArchivedVersion
}
}
const validationMiddleware = (
req: ExtendedRequestWithPageInfo,
res: Response,
next: NextFunction,
) => {
const pathname = req.query.pathname as string | string[] | undefined
if (!pathname) {
return res.status(400).json({ error: `No 'pathname' query` })
}
if (Array.isArray(pathname)) {
return res.status(400).json({ error: "Multiple 'pathname' keys" })
}
if (!pathname.trim()) {
return res.status(400).json({ error: `'pathname' query empty` })
}
if (!pathname.startsWith('/')) {
return res.status(400).json({ error: `'pathname' has to start with /` })
}
if (/\s/.test(pathname)) {
return res.status(400).json({ error: `'pathname' cannot contain whitespace` })
}
req.pageinfo = { pathname }
return next()
}
const pageinfoMiddleware = (
req: ExtendedRequestWithPageInfo,
res: Response,
next: NextFunction,
) => {
let { pathname } = req.pageinfo
// We can't use the `findPage` middleware utility function because we
// need to know when the pathname is a redirect.
// This is important so that the final `pathname` value
// matches the page's permalinks.
// This is important when rendering a page because of translations,
// if it needs to do a fallback, it needs to know the correct
// equivalent English page.
if (!req.context || !req.context.pages || !req.context.redirects)
throw new Error('request not yet contextualized')
const redirectsContext = { pages: req.context.pages, redirects: req.context.redirects }
// Similar to how the `handle-redirects.js` middleware works, let's first
// check if the URL is just having a trailing slash.
while (pathname.endsWith('/') && pathname.length > 1) {
pathname = pathname.slice(0, -1)
}
// E.g. a request for `/` is handled as a redirect outside the
// getRedirect() function.
if (pathname === '/') {
pathname = `/${req.context.currentLanguage}`
}
if (!(pathname in req.context.pages)) {
// If a pathname is not a known page, it might *either* be a redirect,
// or an archived enterprise version, or both.
// That's why it's import to not bother looking at the redirects
// if the pathname is an archived enterprise version.
// This mimics how our middleware work and their order.
req.pageinfo.archived = isArchivedVersionByPath(pathname) as ArchivedVersion
if (!req.pageinfo.archived.isArchived) {
const redirect = getRedirect(pathname, redirectsContext)
if (redirect) {
pathname = redirect
}
}
}
// Remember this might yield undefined if the pathname is not a page
req.pageinfo.page = req.context.pages[pathname]
// The pathname might have changed if it was a redirect
req.pageinfo.pathname = pathname
return next()
}
export async function getPageInfo(page: Page, pathname: string) {
const mockedContext: Context = {}
const renderingReq = {
path: pathname,
language: page.languageCode,
pagePath: pathname,
cookies: {},
context: mockedContext,
}
const next = () => {}
const res = {}
await contextualize(renderingReq as ExtendedRequest, res as Response, next)
await shortVersions(renderingReq as ExtendedRequest, res as Response, next)
renderingReq.context.page = page
features(renderingReq as ExtendedRequest, res as Response, next)
const context = renderingReq.context
const title = await page.renderProp('title', context, { textOnly: true })
const intro = await page.renderProp('intro', context, { textOnly: true })
let productPage = null
for (const permalink of page.permalinks) {
const rootHref = permalink.href
.split('/')
.slice(0, permalink.pageVersion === 'free-pro-team@latest' ? 3 : 4)
.join('/')
if (!context.pages) throw new Error('context.pages not yet set')
const rootPage = context.pages[rootHref]
if (rootPage) {
productPage = rootPage
break
}
}
const product = productPage ? await getProductPageInfo(productPage, context) : ''
return { title, intro, product }
}
const _productPageCache: {
[key: string]: string
} = {}
// The title of the product is much easier to cache because it's often
// repeated. What determines the title of the product is the language
// and the version. A lot of pages have the same title for the product.
async function getProductPageInfo(page: Page, context: Context) {
const cacheKey = `${page.relativePath}:${context.currentVersion}:${context.currentLanguage}`
if (!(cacheKey in _productPageCache)) {
const title =
(await page.renderProp('shortTitle', context, {
textOnly: true,
})) ||
(await page.renderProp('title', context, {
textOnly: true,
}))
_productPageCache[cacheKey] = title
}
return _productPageCache[cacheKey]
}
type CachedPageInfo = {
[url: string]: {
title: string
intro: string
product: string
cacheInfo?: string
}
}
let _cache: CachedPageInfo | null = null
async function getPageInfoFromCache(page: Page, pathname: string) {
let cacheInfo = ''
if (_cache === null) {
try {
_cache = readCompressedJsonFile(CACHE_FILE_PATH) as CachedPageInfo
cacheInfo = 'initial-load'
} catch (error) {
cacheInfo = 'initial-fail'
if (error instanceof Error && (error as any).code !== 'ENOENT') {
throw error
}
_cache = {}
}
}
let info = _cache[pathname]
if (!cacheInfo) {
cacheInfo = info ? 'hit' : 'miss'
}
if (!info) {
info = await getPageInfo(page, pathname)
// You might wonder; why do we not store this compute information
// into the `_cache` from here?
// The short answer is; it won't be used again.
// In production, which is the only place where performance matters,
// a HTTP GET request will only happen once per deployment. That's
// because the CDN will cache it until the next deployment (which is
// followed by a CDN purge).
// In development (local review), the performance doesn't really matter.
// In CI, we use the caching because the CI runs
// `npm run precompute-pageinfo` right before it runs vitest tests.
}
info.cacheInfo = cacheInfo
return info
}
router.get(
'/v1',
validationMiddleware as RequestHandler,
pageinfoMiddleware as RequestHandler,
catchMiddlewareError(async function pageInfo(req: ExtendedRequestWithPageInfo, res: Response) {
// Remember, the `validationMiddleware` will use redirects if the
// `pathname` used is a redirect (e.g. /en/articles/foo or
// /articles or '/en/enterprise-server@latest/foo/bar)
// So by the time we get here, the pathname should be one of the
// page's valid permalinks.
const { page, pathname, archived } = req.pageinfo
if (archived && archived.isArchived) {
const { requestedVersion } = archived
const title = `GitHub Enterprise Server ${requestedVersion} Help Documentation`
const intro = ''
const product = 'GitHub Enterprise Server'
defaultCacheControl(res)
return res.json({ info: { intro, title, product } })
}
if (!page) {
return res.status(400).json({ error: `No page found for '${pathname}'` })
}
const pagePermalinks = page.permalinks.map((p: Permalink) => p.href)
if (!pagePermalinks.includes(pathname)) {
throw new Error(`pathname '${pathname}' not one of the page's permalinks`)
}
const fromCache = await getPageInfoFromCache(page, pathname)
const { cacheInfo, ...info } = fromCache
const tags = [
// According to https://docs.datadoghq.com/getting_started/tagging/#define-tags
// the max length of a tag is 200 characters. Most of ours are less than
// that but we truncate just to be safe.
`pathname:${pathname}`.slice(0, 200),
`language:${page.languageCode}`,
`cache:${cacheInfo}`,
]
statsd.increment('pageinfo.lookup', 1, tags)
defaultCacheControl(res)
// This is necessary so that the `Surrogate-Key` header is set with
// the correct language surrogate key bit. By default, it's set
// from the pathname but `/api/**` URLs don't have a language
// (other than the default 'en').
// We do this so that all of these URLs are cached in Fastly by language
// which we need for the staggered purge.
setFastlySurrogateKey(
res,
`${SURROGATE_ENUMS.DEFAULT} ${makeLanguageSurrogateKey(page.languageCode)}`,
true,
)
res.status(200).json({ info })
}),
)
// Alias for the latest version
router.get('/', (req, res) => {
res.redirect(307, req.originalUrl.replace('/pageinfo', '/pageinfo/v1'))
})
export default router