Skip to content

perf: parallelize page setup #334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11,126 changes: 10,907 additions & 219 deletions package-lock.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"commander": "^7.1.0",
"debounce-fn": "^4.0.0",
"execa": "^5.0.0",
"fastq": "^1.11.0",
"find-cache-dir": "^3.3.1",
"find-up": "^5.0.0",
"fs-extra": "^9.1.0",
Expand Down
25 changes: 14 additions & 11 deletions src/lib/helpers/copyDynamicImportChunks.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,30 @@ const copyDynamicImportChunks = async (functionPath) => {
const filesWP4 = readdirSync(chunksPathWebpack4)
const chunkRegexWP4 = new RegExp(/^(\.?[-$~\w]+)+\.js$/g)
const excludeFiles = new Set(['init-server.js.js', 'on-error-server.js.js'])
const copyPathWP4 = join(functionPath, 'nextPage')
if (filesWP4.length !== 0) {
logTitle('💼 Copying WP4 dynamic import chunks to', copyPathWP4)
}
filesWP4.forEach((file) => {
if (!excludeFiles.has(file) && chunkRegexWP4.test(file)) {
// WP4 files are looked for one level up (../) in runtime
// This is a hack to make the file one level up i.e. with
// nextPage/nextPage/index.js, the chunk is moved to the inner nextPage
const copyPath = join(functionPath, 'nextPage')
logTitle('💼 Copying WP4 dynamic import chunks to', copyPath)
copySync(join(chunksPathWebpack4, file), join(copyPath, file), {
copySync(join(chunksPathWebpack4, file), join(copyPathWP4, file), {
overwrite: false,
errorOnExist: true,
})
}
})

// Chunks are copied into the nextPage directory, as a sibling to "pages" or "api".
// This matches the Next output, so that imports work correctly
const chunksPathWebpack5 = join(nextDistDir, 'serverless', 'chunks')
const filesWP5 = existsSync(chunksPathWebpack5) ? readdirSync(chunksPathWebpack5) : []
const copyPathWP5 = join(functionPath, 'nextPage', 'chunks')
if (filesWP5.length !== 0) {
logTitle('💼 Copying WB5 dynamic import chunks to', copyPathWP5)
}

filesWP5.forEach((file) => {
// Chunks are copied into the nextPage directory, as a sibling to pages or api.
// This matches the Next output, so that imports work correctly
const copyPath = join(functionPath, 'nextPage', 'chunks')
logTitle('💼 Copying WB5 dynamic import chunks to', copyPath)
copySync(join(chunksPathWebpack5, file), join(copyPath, file), {
copySync(join(chunksPathWebpack5, file), join(copyPathWP5, file), {
overwrite: false,
errorOnExist: true,
})
Expand Down
18 changes: 18 additions & 0 deletions src/lib/helpers/runJobsQueue.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
const os = require('os')

const fastq = require('fastq')

const { processPage } = require('../pages/worker')

// TODO: benchmark a large site to find a good value for this
const PAGE_CONCURRENCY = 4

const runJobsQueue = (jobs) => {
console.log(`Building ${jobs.length} pages`)

const queue = fastq.promise(processPage, PAGE_CONCURRENCY)

return Promise.all(jobs.map((job) => queue.push(job)))
}

module.exports = runJobsQueue
11 changes: 3 additions & 8 deletions src/lib/pages/api/setup.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
const asyncForEach = require('../../helpers/asyncForEach')
const { logTitle, logItem } = require('../../helpers/logger')
const setupNetlifyFunctionForPage = require('../../helpers/setupNetlifyFunctionForPage')
const { logTitle } = require('../../helpers/logger')

const getPages = require('./pages')

Expand All @@ -10,11 +8,8 @@ const setup = async (functionsPath) => {

const pages = await getPages()

// Create Netlify Function for every page
await asyncForEach(pages, async ({ filePath }) => {
logItem(filePath)
await setupNetlifyFunctionForPage({ filePath, functionsPath, isApiPage: true })
})
// Create Netlify Function job for every page
return pages.map(({ filePath }) => ({ type: 'function', filePath, functionsPath, isApiPage: true }))
}

module.exports = setup
9 changes: 2 additions & 7 deletions src/lib/pages/getInitialProps/setup.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
const asyncForEach = require('../../helpers/asyncForEach')
const { logTitle, logItem } = require('../../helpers/logger')
const setupNetlifyFunctionForPage = require('../../helpers/setupNetlifyFunctionForPage')
const { logTitle } = require('../../helpers/logger')

const getPages = require('./pages')

Expand All @@ -11,10 +9,7 @@ const setup = async (functionsPath) => {
const pages = await getPages()

// Create Netlify Function for every page
await asyncForEach(pages, async ({ filePath }) => {
logItem(filePath)
await setupNetlifyFunctionForPage({ filePath, functionsPath })
})
return pages.map(({ filePath }) => ({ type: 'function', filePath, functionsPath }))
}

module.exports = setup
9 changes: 2 additions & 7 deletions src/lib/pages/getServerSideProps/setup.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
const asyncForEach = require('../../helpers/asyncForEach')
const { logTitle, logItem } = require('../../helpers/logger')
const setupNetlifyFunctionForPage = require('../../helpers/setupNetlifyFunctionForPage')
const { logTitle } = require('../../helpers/logger')

const getPages = require('./pages')

Expand All @@ -11,10 +9,7 @@ const setup = async (functionsPath) => {
const pages = await getPages()

// Create Netlify Function for every page
await asyncForEach(pages, async ({ filePath }) => {
logItem(filePath)
await setupNetlifyFunctionForPage({ filePath, functionsPath })
})
return pages.map(({ filePath }) => ({ type: 'function', filePath, functionsPath }))
}

module.exports = setup
73 changes: 39 additions & 34 deletions src/lib/pages/getStaticProps/setup.js
Original file line number Diff line number Diff line change
@@ -1,51 +1,56 @@
const { join } = require('path')

const asyncForEach = require('../../helpers/asyncForEach')
const getFilePathForRoute = require('../../helpers/getFilePathForRoute')
const isRouteWithFallback = require('../../helpers/isRouteWithFallback')
const { logTitle, logItem } = require('../../helpers/logger')
const setupNetlifyFunctionForPage = require('../../helpers/setupNetlifyFunctionForPage')
const setupStaticFileForPage = require('../../helpers/setupStaticFileForPage')
const { logTitle } = require('../../helpers/logger')

const getPages = require('./pages')

// Copy pre-rendered SSG pages
const setup = async ({ functionsPath, publishPath }) => {
logTitle('🔥 Copying pre-rendered pages with getStaticProps and JSON data to', publishPath)

// Keep track of the functions that have been set up, so that we do not set up
// a function for the same file path twice
const filePathsDone = []

const filePathsDone = new Set()
const pages = await getPages()

await asyncForEach(pages, async ({ route, dataRoute, srcRoute }) => {
logItem(route)

// Copy pre-rendered HTML page
const htmlPath = getFilePathForRoute(route, 'html')
await setupStaticFileForPage({ inputPath: htmlPath, publishPath })

// Copy page's JSON data
const jsonPath = getFilePathForRoute(route, 'json')
await setupStaticFileForPage({
inputPath: jsonPath,
outputPath: dataRoute,
publishPath,
})

// Set up the Netlify function (this is ONLY for preview mode)
const relativePath = getFilePathForRoute(srcRoute || route, 'js')
const filePath = join('pages', relativePath)

// Skip if we have already set up a function for this file
// or if the source route has a fallback (handled by getStaticPropsWithFallback)
if (filePathsDone.includes(filePath) || (await isRouteWithFallback(srcRoute))) return

logItem(filePath)
await setupNetlifyFunctionForPage({ filePath, functionsPath })
filePathsDone.push(filePath)
})
const jobs = []

await Promise.all(
pages.map(async ({ route, dataRoute, srcRoute }) => {
// Copy pre-rendered HTML page
const htmlPath = getFilePathForRoute(route, 'html')

jobs.push({ type: 'static', inputPath: htmlPath, publishPath })

// Copy page's JSON data
const jsonPath = getFilePathForRoute(route, 'json')
jobs.push({
type: 'static',
inputPath: jsonPath,
outputPath: dataRoute,
publishPath,
})

// Set up the Netlify function (this is ONLY for preview mode)
const relativePath = getFilePathForRoute(srcRoute || route, 'js')
const filePath = join('pages', relativePath)

// Skip if we have already set up a function for this file

if (filePathsDone.has(filePath)) {
return
}
filePathsDone.add(filePath)

// or if the source route has a fallback (handled by getStaticPropsWithFallback)
if (await isRouteWithFallback(srcRoute)) {
return
}
jobs.push({ type: 'function', filePath, functionsPath })
}),
)
return jobs
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure what's happening here - why do we have a map (which returns nothing) inside an awaited Promise.all, only to return jobs outside of it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The callback in the map is async (because it has to await isRouteWithFallback) , so it automatically returns a promise which resolves (to void) when it's complete. This means that the map returns an array of promises (one for each page). Passing these to an awaited Promise.all means that it waits until all of the functions are complete. With an async callback like that, the map will immediately return the array of promises, before the callback has executed for each of the elements. It would be somehting like this. Apologies if you know all this:

const result = pages.map(async () => {
await something()
})

// We get here immediately, before the callbacks execute
console.log(result)
// [ <unresolved Promise>, <unresolved Promise>, <unresolved Promise>, ...]

// ...the callbacks execute in parallel, and resolve one by one. we might wait here a while
const done = await Promise.all(result)
// once we're here, the jobs are all complete
console.log(result)

// [ <resolved Promise>, <resolved Promise>, <resolved Promise>, ...]

console.log(done)

// [void, void, void, ...]

return
// 

The reason I'm using the jobs array from outside of the map is so that I can conditionally add elements. If I returned them, I'd end up with an array with undefined elements for the pages we're skipping. We could then filter that array, but that means we'd need to loop through them again. Pushing onto an array outside means we only need to loop through once. I could use reduce for the same effect, but I generally find that's harder to read.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ooh ok i dont think i processed that the map callback is async. ok wow this is wild!!!


module.exports = setup
9 changes: 3 additions & 6 deletions src/lib/pages/getStaticPropsWithFallback/setup.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
const { join } = require('path')

const asyncForEach = require('../../helpers/asyncForEach')
const getFilePathForRoute = require('../../helpers/getFilePathForRoute')
const { logTitle, logItem } = require('../../helpers/logger')
const setupNetlifyFunctionForPage = require('../../helpers/setupNetlifyFunctionForPage')
const { logTitle } = require('../../helpers/logger')

const getPages = require('./pages')

Expand All @@ -14,11 +12,10 @@ const setup = async (functionsPath) => {
const pages = await getPages()

// Create Netlify Function for every page
await asyncForEach(pages, async ({ route }) => {
return pages.map(({ route }) => {
const relativePath = getFilePathForRoute(route, 'js')
const filePath = join('pages', relativePath)
logItem(filePath)
await setupNetlifyFunctionForPage({ filePath, functionsPath, isISR: true })
return { type: 'function', filePath, functionsPath, isISR: true }
})
}

Expand Down
25 changes: 12 additions & 13 deletions src/lib/pages/getStaticPropsWithRevalidate/setup.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
const { join } = require('path')

const asyncForEach = require('../../helpers/asyncForEach')
const getFilePathForRoute = require('../../helpers/getFilePathForRoute')
const { logTitle, logItem } = require('../../helpers/logger')
const setupNetlifyFunctionForPage = require('../../helpers/setupNetlifyFunctionForPage')
const { logTitle } = require('../../helpers/logger')

const getPages = require('./pages')

Expand All @@ -17,23 +15,24 @@ const setup = async (functionsPath) => {

// Keep track of the functions that have been set up, so that we do not set up
// a function for the same file path twice
const filePathsDone = []
const filePathsDone = new Set()

const pages = await getPages()

// Create Netlify Function for every page
await asyncForEach(pages, async ({ route, srcRoute }) => {
const relativePath = getFilePathForRoute(srcRoute || route, 'js')
const filePath = join('pages', relativePath)

// Skip if we have already set up a function for this file
if (filePathsDone.includes(filePath)) return
const jobs = []

// Set up the function
logItem(filePath)
await setupNetlifyFunctionForPage({ filePath, functionsPath })
filePathsDone.push(filePath)
pages.forEach(({ route, srcRoute }) => {
const relativePath = getFilePathForRoute(srcRoute || route, 'js')
const filePath = join('pages', relativePath)
if (filePathsDone.has(filePath)) {
return
}
filePathsDone.add(filePath)
jobs.push({ type: 'function', filePath, functionsPath })
})
return jobs
}

module.exports = setup
7 changes: 2 additions & 5 deletions src/lib/pages/withoutProps/setup.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ const asyncForEach = require('../../helpers/asyncForEach')
const getI18n = require('../../helpers/getI18n')
const getNextDistDir = require('../../helpers/getNextDistDir')
const { logTitle, logItem } = require('../../helpers/logger')
const setupStaticFileForPage = require('../../helpers/setupStaticFileForPage')

const getPages = require('./pages')

Expand All @@ -20,17 +19,15 @@ const setup = async (publishPath) => {
const pages = await getPages()

// Copy each page to the Netlify publish directory
await asyncForEach(pages, async ({ filePath }) => {
logItem(filePath)

return pages.map(({ filePath }) => {
// HACK: If i18n, 404.html needs to be at the top level of the publish directory
if (i18n.defaultLocale && filePath === `pages/${i18n.defaultLocale}/404.html`) {
copySync(join(nextDistDir, 'serverless', filePath), join(publishPath, '404.html'))
}

// The path to the file, relative to the pages directory
const relativePath = relative('pages', filePath)
await setupStaticFileForPage({ inputPath: relativePath, publishPath })
return { type: 'static', inputPath: relativePath, publishPath }
})
}

Expand Down
19 changes: 19 additions & 0 deletions src/lib/pages/worker.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
const setupNetlifyFunctionForPage = require('../helpers/setupNetlifyFunctionForPage')
const setupStaticFileForPage = require('../helpers/setupStaticFileForPage')

exports.processPage = function processPage(job) {
try {
switch (job.type) {
case 'function': {
return setupNetlifyFunctionForPage(job)
}
case 'static': {
return setupStaticFileForPage(job)
}
default:
console.log('Unknown job type', job.type)
}
} catch (error) {
console.error('Error in worker', error, 'Job', job)
}
}
19 changes: 12 additions & 7 deletions src/lib/steps/setupPages.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
const runJobsQueue = require('../helpers/runJobsQueue')
const apiSetup = require('../pages/api/setup')
const getInitialPropsSetup = require('../pages/getInitialProps/setup')
const getServerSidePropsSetup = require('../pages/getServerSideProps/setup')
Expand All @@ -8,13 +9,17 @@ const withoutPropsSetup = require('../pages/withoutProps/setup')

// Set up all our NextJS pages according to the recipes defined in pages/
const setupPages = async ({ functionsPath, publishPath }) => {
await apiSetup(functionsPath)
await getInitialPropsSetup(functionsPath)
await getServerSidePropsSetup(functionsPath)
await getStaticPropsSetup({ functionsPath, publishPath })
await getSPFallbackSetup(functionsPath)
await getSPRevalidateSetup(functionsPath)
await withoutPropsSetup(publishPath)
const jobs = [
...(await apiSetup(functionsPath)),
...(await getInitialPropsSetup(functionsPath)),
...(await getServerSidePropsSetup(functionsPath)),
...(await getStaticPropsSetup({ functionsPath, publishPath })),
...(await getSPFallbackSetup(functionsPath)),
...(await getSPRevalidateSetup(functionsPath)),
...(await withoutPropsSetup(publishPath)),
]

return runJobsQueue(jobs)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this need to be returned? just curious

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because runJobsQueue returns a Promise, returning it means we can await the return value outside. It's similar to just awaiting runJobsQueue() before returning, but saves us creating another Promise.

}

module.exports = setupPages