Skip to content

Commit

Permalink
perf(childprocess): track/monitor health of spawned processes #6304
Browse files Browse the repository at this point in the history
## Problem
Toolkit/Q slow performance is consistently reported by customers.
Toolkit/Q features can spawn processes, included many long-lived
processes such as LSP servers. The cost of these processes is hidden and
hard to troubleshoot. Any misbehaving process will not be noticed except
in a coarse "Toolkit/Q causes vscode to be slow" manner.

## Solution
- Track all running processes in a map-like data structure. 
- use a `PollingSet` to periodically "monitor" their system usage (%cpu,
memory bytes), logging warning messages when it exceeds a threshold.
- Add developer command to instantiate spawned processes via
`ChildProcess` wrapper to make this easier to test.
  • Loading branch information
Hweinstock authored Jan 17, 2025
1 parent 062d24a commit d55b8e1
Show file tree
Hide file tree
Showing 5 changed files with 313 additions and 7 deletions.
19 changes: 19 additions & 0 deletions packages/core/src/dev/activation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import { getSessionId } from '../shared/telemetry/util'
import { NotificationsController } from '../notifications/controller'
import { DevNotificationsState } from '../notifications/types'
import { QuickPickItem } from 'vscode'
import { ChildProcess } from '../shared/utilities/processUtils'

interface MenuOption {
readonly label: string
Expand All @@ -44,6 +45,7 @@ export type DevFunction =
| 'editAuthConnections'
| 'notificationsSend'
| 'forceIdeCrash'
| 'startChildProcess'

export type DevOptions = {
context: vscode.ExtensionContext
Expand Down Expand Up @@ -126,6 +128,11 @@ const menuOptions: () => Record<DevFunction, MenuOption> = () => {
detail: `Will SIGKILL ExtHost, { pid: ${process.pid}, sessionId: '${getSessionId().slice(0, 8)}-...' }, but the IDE itself will not crash.`,
executor: forceQuitIde,
},
startChildProcess: {
label: 'ChildProcess: Start child process',
detail: 'Start ChildProcess from our utility wrapper for testing',
executor: startChildProcess,
},
}
}

Expand Down Expand Up @@ -578,3 +585,15 @@ async function editNotifications() {
await targetNotificationsController.pollForEmergencies()
})
}

async function startChildProcess() {
const result = await createInputBox({
title: 'Enter a command',
}).prompt()
if (result) {
const [command, ...args] = result?.toString().split(' ') ?? []
getLogger().info(`Starting child process: '${command}'`)
const processResult = await ChildProcess.run(command, args, { collect: true })
getLogger().info(`Child process exited with code ${processResult.exitCode}`)
}
}
2 changes: 1 addition & 1 deletion packages/core/src/shared/logger/logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import * as vscode from 'vscode'

export type LogTopic = 'crashMonitoring' | 'dev/beta' | 'notifications' | 'test' | 'unknown'
export type LogTopic = 'crashMonitoring' | 'dev/beta' | 'notifications' | 'test' | 'childProcess' | 'unknown'

class ErrorLog {
constructor(
Expand Down
7 changes: 6 additions & 1 deletion packages/core/src/shared/utilities/pollingSet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,14 @@ export class PollingSet<T> extends Set<T> {
this.clearTimer()
}
}

// TODO(hkobew): Overwrite the add method instead of adding seperate method. If we add item to set, timer should always start.
public start(id: T): void {
this.add(id)
this.pollTimer = this.pollTimer ?? globals.clock.setInterval(() => this.poll(), this.interval)
}

public override clear(): void {
this.clearTimer()
super.clear()
}
}
140 changes: 136 additions & 4 deletions packages/core/src/shared/utilities/processUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import * as proc from 'child_process' // eslint-disable-line no-restricted-impor
import * as crossSpawn from 'cross-spawn'
import * as logger from '../logger'
import { Timeout, CancellationError, waitUntil } from './timeoutUtils'
import { PollingSet } from './pollingSet'
import { getLogger } from '../logger/logger'

export interface RunParameterContext {
/** Reports an error parsed from the stdin/stdout streams. */
Expand Down Expand Up @@ -61,14 +63,144 @@ export interface ChildProcessResult {

export const eof = Symbol('EOF')

export interface ProcessStats {
memory: number
cpu: number
}
export class ChildProcessTracker {
static readonly pollingInterval: number = 10000 // Check usage every 10 seconds
static readonly thresholds: ProcessStats = {
memory: 100 * 1024 * 1024, // 100 MB
cpu: 50,
}
static readonly logger = getLogger('childProcess')
#processByPid: Map<number, ChildProcess> = new Map<number, ChildProcess>()
#pids: PollingSet<number>

public constructor() {
this.#pids = new PollingSet(ChildProcessTracker.pollingInterval, () => this.monitor())
}

private cleanUp() {
const terminatedProcesses = Array.from(this.#pids.values()).filter(
(pid: number) => this.#processByPid.get(pid)?.stopped
)
for (const pid of terminatedProcesses) {
this.delete(pid)
}
}

private async monitor() {
this.cleanUp()
ChildProcessTracker.logger.debug(`Active running processes size: ${this.#pids.size}`)

for (const pid of this.#pids.values()) {
await this.checkProcessUsage(pid)
}
}

private async checkProcessUsage(pid: number): Promise<void> {
if (!this.#pids.has(pid)) {
ChildProcessTracker.logger.warn(`Missing process with id ${pid}`)
return
}
const stats = this.getUsage(pid)
if (stats) {
ChildProcessTracker.logger.debug(`Process ${pid} usage: %O`, stats)
if (stats.memory > ChildProcessTracker.thresholds.memory) {
ChildProcessTracker.logger.warn(`Process ${pid} exceeded memory threshold: ${stats.memory}`)
}
if (stats.cpu > ChildProcessTracker.thresholds.cpu) {
ChildProcessTracker.logger.warn(`Process ${pid} exceeded cpu threshold: ${stats.cpu}`)
}
}
}

public add(childProcess: ChildProcess) {
const pid = childProcess.pid()
this.#processByPid.set(pid, childProcess)
this.#pids.start(pid)
}

public delete(childProcessId: number) {
this.#processByPid.delete(childProcessId)
this.#pids.delete(childProcessId)
}

public get size() {
return this.#pids.size
}

public has(childProcess: ChildProcess) {
return this.#pids.has(childProcess.pid())
}

public clear() {
for (const childProcess of this.#processByPid.values()) {
childProcess.stop(true)
}
this.#pids.clear()
this.#processByPid.clear()
}

public getUsage(pid: number): ProcessStats {
try {
// isWin() leads to circular dependency.
return process.platform === 'win32' ? getWindowsUsage() : getUnixUsage()
} catch (e) {
ChildProcessTracker.logger.warn(`Failed to get process stats for ${pid}: ${e}`)
return { cpu: 0, memory: 0 }
}

function getWindowsUsage() {
const cpuOutput = proc
.execFileSync('wmic', [
'path',
'Win32_PerfFormattedData_PerfProc_Process',
'where',
`IDProcess=${pid}`,
'get',
'PercentProcessorTime',
])
.toString()
const memOutput = proc
.execFileSync('wmic', ['process', 'where', `ProcessId=${pid}`, 'get', 'WorkingSetSize'])
.toString()

const cpuPercentage = parseFloat(cpuOutput.split('\n')[1])
const memoryBytes = parseInt(memOutput.split('\n')[1]) * 1024

return {
cpu: isNaN(cpuPercentage) ? 0 : cpuPercentage,
memory: memoryBytes,
}
}

function getUnixUsage() {
const cpuMemOutput = proc.execFileSync('ps', ['-p', pid.toString(), '-o', '%cpu,%mem']).toString()
const rssOutput = proc.execFileSync('ps', ['-p', pid.toString(), '-o', 'rss']).toString()

const cpuMemLines = cpuMemOutput.split('\n')[1].trim().split(/\s+/)
const cpuPercentage = parseFloat(cpuMemLines[0])
const memoryBytes = parseInt(rssOutput.split('\n')[1]) * 1024

return {
cpu: isNaN(cpuPercentage) ? 0 : cpuPercentage,
memory: memoryBytes,
}
}
}
}

/**
* Convenience class to manage a child process
* To use:
* - instantiate
* - call and await run to get the results (pass or fail)
*/
export class ChildProcess {
static #runningProcesses: Map<number, ChildProcess> = new Map()
static #runningProcesses = new ChildProcessTracker()
static stopTimeout = 3000
#childProcess: proc.ChildProcess | undefined
#processErrors: Error[] = []
#processResult: ChildProcessResult | undefined
Expand Down Expand Up @@ -285,7 +417,7 @@ export class ChildProcess {
child.kill(signal)

if (force === true) {
waitUntil(async () => this.stopped, { timeout: 3000, interval: 200, truthy: true })
waitUntil(async () => this.stopped, { timeout: ChildProcess.stopTimeout, interval: 200, truthy: true })
.then((stopped) => {
if (!stopped) {
child.kill('SIGKILL')
Expand All @@ -309,7 +441,7 @@ export class ChildProcess {
if (pid === undefined) {
return
}
ChildProcess.#runningProcesses.set(pid, this)
ChildProcess.#runningProcesses.add(this)

const timeoutListener = options?.timeout?.token.onCancellationRequested(({ agent }) => {
const message = agent === 'user' ? 'Cancelled: ' : 'Timed out: '
Expand All @@ -319,7 +451,7 @@ export class ChildProcess {

const dispose = () => {
timeoutListener?.dispose()
ChildProcess.#runningProcesses.delete(pid)
ChildProcess.#runningProcesses.delete(this.pid())
}

process.on('exit', dispose)
Expand Down
Loading

0 comments on commit d55b8e1

Please sign in to comment.