Skip to content

Commit 722c0fa

Browse files
committed
fix: limit packument cache size based on heap size
This adds a new packument cache that is an instance of `lru-cache`. It uses that package's ability to limit content based on size, and has some multipliers based on research to mostly correctly approximate the correlation between packument size and its memory usage. It also limits the total size of the cache based on the actual heap available. Closes: #7276 Related: npm/pacote#369
1 parent 5b2317b commit 722c0fa

File tree

10 files changed

+113
-14
lines changed

10 files changed

+113
-14
lines changed

DEPENDENCIES.md

+1
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ graph LR;
582582
npmcli-arborist-->isaacs-string-locale-compare["@isaacs/string-locale-compare"];
583583
npmcli-arborist-->json-parse-even-better-errors;
584584
npmcli-arborist-->json-stringify-nice;
585+
npmcli-arborist-->lru-cache;
585586
npmcli-arborist-->minify-registry-metadata;
586587
npmcli-arborist-->minimatch;
587588
npmcli-arborist-->nock;

smoke-tests/test/fixtures/setup.js

+16-6
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ const getCleanPaths = async () => {
7474
}
7575

7676
module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy = false } = {}) => {
77-
const debugLog = debug || CI ? (...a) => console.error(...a) : () => {}
77+
const debugLog = debug || CI ? (...a) => t.comment(...a) : () => {}
7878
const cleanPaths = await getCleanPaths()
7979

8080
// setup fixtures
@@ -158,7 +158,7 @@ module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy
158158
log(`${spawnCmd} ${spawnArgs.join(' ')}`)
159159
log('-'.repeat(40))
160160

161-
const { stderr, stdout } = await spawn(spawnCmd, spawnArgs, {
161+
const p = spawn(spawnCmd, spawnArgs, {
162162
cwd,
163163
env: {
164164
...getEnvPath(),
@@ -169,10 +169,20 @@ module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy
169169
...opts,
170170
})
171171

172-
log(stderr)
173-
log('-'.repeat(40))
174-
log(stdout)
175-
log('='.repeat(40))
172+
// In debug mode, stream stdout and stderr to console so we can debug hanging processes
173+
if (debug) {
174+
p.process.stdout.on('data', (c) => log('STDOUT: ' + c.toString().trim()))
175+
p.process.stderr.on('data', (c) => log('STDERR: ' + c.toString().trim()))
176+
}
177+
178+
const { stdout, stderr } = await p
179+
// If not in debug mode, print full stderr and stdout contents separately
180+
if (!debug) {
181+
log(stderr)
182+
log('-'.repeat(40))
183+
log(stdout)
184+
log('='.repeat(40))
185+
}
176186

177187
return { stderr, stdout }
178188
}

smoke-tests/test/large-install.js

+11-3
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,20 @@ const setup = require('./fixtures/setup.js')
55
const getFixture = (p) => require(
66
path.resolve(__dirname, './fixtures/large-install', p))
77

8-
const runTest = async (t) => {
8+
const runTest = async (t, { lowMemory } = {}) => {
99
const { npm } = await setup(t, {
1010
// This test relies on the actual production registry
1111
mockRegistry: false,
1212
testdir: {
1313
project: {
1414
'package.json': getFixture('package.json'),
15-
'package-lock.json': getFixture('package-lock.json'),
15+
...lowMemory ? {} : { 'package-lock.json': getFixture('package-lock.json') },
1616
},
1717
},
1818
})
19-
return npm('install', '--no-audit', '--no-fund')
19+
return npm('install', '--no-audit', '--no-fund', {
20+
env: lowMemory ? { NODE_OPTIONS: '--max-old-space-size=500' } : {},
21+
})
2022
}
2123

2224
// This test was originally created for https://github.com/npm/cli/issues/6763
@@ -31,3 +33,9 @@ t.test('large install', async t => {
3133
// installs the same number of packages.
3234
t.match(stdout, `added 126${process.platform === 'linux' ? 4 : 5} packages in`)
3335
})
36+
37+
t.test('large install, no lock and low memory', async t => {
38+
// Run the same install but with no lockfile and constrained max-old-space-size
39+
const { stdout } = await runTest(t, { lowMemory: true })
40+
t.match(stdout, /added \d+ packages/)
41+
})

smoke-tests/test/npm-replace-global.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ t.test('publish and replace global self', async t => {
125125
await npmPackage({
126126
manifest: { packuments: [publishedPackument] },
127127
tarballs: { [version]: tarball },
128-
times: 2,
128+
times: 3,
129129
})
130130
await fs.rm(cache, { recursive: true, force: true })
131131
await useNpm('install', 'npm@latest', '--global')

test/lib/commands/audit.js

+1
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ t.test('audit fix - bulk endpoint', async t => {
184184
tarballs: {
185185
'1.0.1': path.join(npm.prefix, 'test-dep-a-fixed'),
186186
},
187+
times: 2,
187188
})
188189
const advisory = registry.advisory({ id: 100, vulnerable_versions: '1.0.0' })
189190
registry.nock.post('/-/npm/v1/security/advisories/bulk', body => {

workspaces/arborist/lib/arborist/index.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ const { homedir } = require('os')
3131
const { depth } = require('treeverse')
3232
const mapWorkspaces = require('@npmcli/map-workspaces')
3333
const { log, time } = require('proc-log')
34-
3534
const { saveTypeMap } = require('../add-rm-pkg-deps.js')
3635
const AuditReport = require('../audit-report.js')
3736
const relpath = require('../relpath.js')
37+
const PackumentCache = require('../packument-cache.js')
3838

3939
const mixins = [
4040
require('../tracker.js'),
@@ -82,7 +82,7 @@ class Arborist extends Base {
8282
installStrategy: options.global ? 'shallow' : (options.installStrategy ? options.installStrategy : 'hoisted'),
8383
lockfileVersion: lockfileVersion(options.lockfileVersion),
8484
packageLockOnly: !!options.packageLockOnly,
85-
packumentCache: options.packumentCache || new Map(),
85+
packumentCache: options.packumentCache || new PackumentCache(),
8686
path: options.path || '.',
8787
rebuildBundle: 'rebuildBundle' in options ? !!options.rebuildBundle : true,
8888
replaceRegistryHost: options.replaceRegistryHost,

workspaces/arborist/lib/node.js

+2
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ class Node {
119119
// package's dependencies in a virtual root.
120120
this.sourceReference = sourceReference
121121

122+
// TODO if this came from pacote.manifest we don't have to do this,
123+
// we can be told to skip this step
122124
const pkg = sourceReference ? sourceReference.package
123125
: normalize(options.pkg || {})
124126

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
const { LRUCache } = require('lru-cache')
2+
const { getHeapStatistics } = require('node:v8')
3+
const { log } = require('proc-log')
4+
5+
// This is an in-memory cache that Pacote uses for packuments.
6+
// Packuments are usually cached on disk. This allows for rapid re-requests
7+
// of the same packument to bypass disk reads. The tradeoff here is memory
8+
// usage for disk reads.
9+
class PackumentCache extends LRUCache {
10+
static #heapLimit = Math.floor(getHeapStatistics().heap_size_limit)
11+
12+
#sizeKey
13+
#disposed = new Set()
14+
15+
#log (...args) {
16+
log.silly('packumentCache', ...args)
17+
}
18+
19+
constructor ({
20+
// How much of this.#heapLimit to take up
21+
heapFactor = 0.25,
22+
// How much of this.#maxSize we allow any one packument to take up
23+
// Anything over this is not cached
24+
maxEntryFactor = 0.5,
25+
sizeKey = '_contentLength',
26+
} = {}) {
27+
const maxSize = Math.floor(PackumentCache.#heapLimit * heapFactor)
28+
const maxEntrySize = Math.floor(maxSize * maxEntryFactor)
29+
super({
30+
maxSize,
31+
maxEntrySize,
32+
sizeCalculation: (p) => {
33+
// Don't cache if we dont know the size
34+
// Some versions of pacote set this to `0`, newer versions set it to `null`
35+
if (!p[sizeKey]) {
36+
return maxEntrySize + 1
37+
}
38+
if (p[sizeKey] < 10_000) {
39+
return p[sizeKey] * 2
40+
}
41+
if (p[sizeKey] < 1_000_000) {
42+
return Math.floor(p[sizeKey] * 1.5)
43+
}
44+
// It is less beneficial to store a small amount of super large things
45+
// at the cost of all other packuments.
46+
return maxEntrySize + 1
47+
},
48+
dispose: (v, k) => {
49+
this.#disposed.add(k)
50+
this.#log(k, 'dispose')
51+
},
52+
})
53+
this.#sizeKey = sizeKey
54+
this.#log(`heap:${PackumentCache.#heapLimit} maxSize:${maxSize} maxEntrySize:${maxEntrySize}`)
55+
}
56+
57+
set (k, v, ...args) {
58+
// we use disposed only for a logging signal if we are setting packuments that
59+
// have already been evicted from the cache previously. logging here could help
60+
// us tune this in the future.
61+
const disposed = this.#disposed.has(k)
62+
/* istanbul ignore next - this doesnt happen consistently so hard to test without resorting to unit tests */
63+
if (disposed) {
64+
this.#disposed.delete(k)
65+
}
66+
this.#log(k, 'set', `size:${v[this.#sizeKey]} disposed:${disposed}`)
67+
return super.set(k, v, ...args)
68+
}
69+
70+
has (k, ...args) {
71+
const has = super.has(k, ...args)
72+
this.#log(k, `cache-${has ? 'hit' : 'miss'}`)
73+
return has
74+
}
75+
}
76+
77+
module.exports = PackumentCache

workspaces/arborist/test/arborist/rebuild.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ t.test('verify dep flags in script environments', async t => {
197197
const file = resolve(path, 'node_modules', pkg, 'env')
198198
t.strictSame(flags, fs.readFileSync(file, 'utf8').split('\n'), pkg)
199199
}
200-
t.strictSame(checkLogs().sort((a, b) =>
200+
t.strictSame(checkLogs().filter(l => l[0] === 'info' && l[1] === 'run').sort((a, b) =>
201201
localeCompare(a[2], b[2]) || (typeof a[4] === 'string' ? -1 : 1)), [
202202
['info', 'run', '[email protected]', 'postinstall', 'node_modules/devdep', 'node ../../env.js'],
203203
['info', 'run', '[email protected]', 'postinstall', { code: 0, signal: null }],

workspaces/arborist/test/audit-report.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ t.test('audit returns an error', async t => {
222222
const report = await AuditReport.load(tree, arb.options)
223223
t.equal(report.report, null, 'did not get audit response')
224224
t.equal(report.size, 0, 'did not find any vulnerabilities')
225-
t.match(logs, [
225+
t.match(logs.filter(l => l[1].includes('audit')), [
226226
[
227227
'silly',
228228
'audit',

0 commit comments

Comments
 (0)