Skip to content

Commit f89b868

Browse files
committed
Respect Codex sandbox config in plugin threads
1 parent 6a5c2ba commit f89b868

8 files changed

Lines changed: 165 additions & 11 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ Your configuration will be picked up based on:
270270

271271
Check out the Codex docs for more [configuration options](https://developers.openai.com/codex/config-reference).
272272

273+
By default, the plugin pins review runs to Codex's read-only sandbox and maps write-capable rescue tasks to `workspace-write`. If your local environment cannot initialize the Codex sandbox, set `CODEX_COMPANION_SANDBOX_MODE=inherit` before starting Claude Code to let Codex apply your configured `sandbox_mode` directly. You can also set it to `read-only`, `workspace-write`, or `danger-full-access` to force a specific sandbox mode for plugin-launched Codex threads.
274+
273275
### Moving The Work Over To Codex
274276

275277
Delegated tasks and any [stop gate](#what-does-the-review-gate-do) run can also be directly resumed inside Codex by running `codex resume` either with the specific session ID you received from running `/codex:result` or `/codex:status` or by selecting it from the list.

plugins/codex/agents/codex-rescue.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ Forwarding rules:
3232
- If the user asks for a concrete model name such as `gpt-5.4-mini`, pass it through with `--model`.
3333
- Treat `--effort <value>` and `--model <value>` as runtime controls and do not include them in the task text you pass through.
3434
- Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits.
35+
- If the user says Codex sandboxing, bwrap, bubblewrap, or Linux sandbox setup is failing, keep the single Bash call but prefix it with `CODEX_COMPANION_SANDBOX_MODE=inherit`. This lets Codex apply the user's configured sandbox mode instead of forcing the plugin's default task sandbox.
36+
- If `CODEX_COMPANION_SANDBOX_MODE` is already present in the environment, preserve it. Do not unset it or replace it unless the user explicitly asks for a different sandbox mode.
3537
- Treat `--resume` and `--fresh` as routing controls and do not include them in the task text you pass through.
3638
- `--resume` means add `--resume-last`.
3739
- `--fresh` means do not add `--resume-last`.

plugins/codex/scripts/codex-companion.mjs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ const REVIEW_SCHEMA = path.join(ROOT_DIR, "schemas", "review-output.schema.json"
6767
const DEFAULT_STATUS_WAIT_TIMEOUT_MS = 240000;
6868
const DEFAULT_STATUS_POLL_INTERVAL_MS = 2000;
6969
const VALID_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]);
70+
const VALID_SANDBOX_MODES = new Set(["read-only", "workspace-write", "danger-full-access"]);
71+
const SANDBOX_MODE_ENV = "CODEX_COMPANION_SANDBOX_MODE";
7072
const MODEL_ALIASES = new Map([["spark", "gpt-5.3-codex-spark"]]);
7173
const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Claude turn.";
7274

@@ -153,6 +155,20 @@ function resolveCommandWorkspace(options = {}) {
153155
return resolveWorkspaceRoot(resolveCommandCwd(options));
154156
}
155157

158+
function resolveSandboxMode(defaultMode) {
159+
const configured = process.env[SANDBOX_MODE_ENV]?.trim();
160+
if (!configured) {
161+
return defaultMode;
162+
}
163+
if (configured === "inherit") {
164+
return null;
165+
}
166+
if (VALID_SANDBOX_MODES.has(configured)) {
167+
return configured;
168+
}
169+
throw new Error(`Invalid ${SANDBOX_MODE_ENV}: ${configured}`);
170+
}
171+
156172
function sleep(ms) {
157173
return new Promise((resolve) => setTimeout(resolve, ms));
158174
}
@@ -367,6 +383,7 @@ async function executeReviewRun(request) {
367383
const result = await runAppServerReview(request.cwd, {
368384
target: reviewTarget,
369385
model: request.model,
386+
sandbox: resolveSandboxMode("read-only"),
370387
onProgress: request.onProgress
371388
});
372389
const payload = {
@@ -408,7 +425,7 @@ async function executeReviewRun(request) {
408425
const result = await runAppServerTurn(context.repoRoot, {
409426
prompt,
410427
model: request.model,
411-
sandbox: "read-only",
428+
sandbox: resolveSandboxMode("read-only"),
412429
outputSchema: readOutputSchema(REVIEW_SCHEMA),
413430
onProgress: request.onProgress
414431
});
@@ -485,7 +502,7 @@ async function executeTaskRun(request) {
485502
defaultPrompt: resumeThreadId ? DEFAULT_CONTINUE_PROMPT : "",
486503
model: request.model,
487504
effort: request.effort,
488-
sandbox: request.write ? "workspace-write" : "read-only",
505+
sandbox: resolveSandboxMode(request.write ? "workspace-write" : "read-only"),
489506
onProgress: request.onProgress,
490507
persistThread: true,
491508
threadName: resumeThreadId ? null : buildPersistentTaskThreadName(request.prompt || DEFAULT_CONTINUE_PROMPT)

plugins/codex/scripts/lib/codex.mjs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,26 +54,32 @@ function cleanCodexStderr(stderr) {
5454

5555
/** @returns {ThreadStartParams} */
5656
function buildThreadParams(cwd, options = {}) {
57-
return {
57+
const params = {
5858
cwd,
5959
model: options.model ?? null,
6060
approvalPolicy: options.approvalPolicy ?? "never",
61-
sandbox: options.sandbox ?? "read-only",
6261
serviceName: SERVICE_NAME,
6362
ephemeral: options.ephemeral ?? true,
6463
experimentalRawEvents: false
6564
};
65+
if (typeof options.sandbox === "string") {
66+
params.sandbox = options.sandbox;
67+
}
68+
return params;
6669
}
6770

6871
/** @returns {ThreadResumeParams} */
6972
function buildResumeParams(threadId, cwd, options = {}) {
70-
return {
73+
const params = {
7174
threadId,
7275
cwd,
7376
model: options.model ?? null,
74-
approvalPolicy: options.approvalPolicy ?? "never",
75-
sandbox: options.sandbox ?? "read-only"
77+
approvalPolicy: options.approvalPolicy ?? "never"
7678
};
79+
if (typeof options.sandbox === "string") {
80+
params.sandbox = options.sandbox;
81+
}
82+
return params;
7783
}
7884

7985
/** @returns {UserInput[]} */
@@ -915,7 +921,7 @@ export async function runAppServerReview(cwd, options = {}) {
915921
emitProgress(options.onProgress, "Starting Codex review thread.", "starting");
916922
const thread = await startThread(client, cwd, {
917923
model: options.model,
918-
sandbox: "read-only",
924+
sandbox: options.sandbox === undefined ? "read-only" : options.sandbox,
919925
ephemeral: true,
920926
threadName: options.threadName
921927
});

plugins/codex/skills/codex-cli-runtime/SKILL.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ Execution rules:
2222
- Leave model unset by default. Add `--model` only when the user explicitly asks for one.
2323
- Map `spark` to `--model gpt-5.3-codex-spark`.
2424
- Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits.
25+
- If the user reports `bwrap`, `bubblewrap`, Codex sandbox, or Linux sandbox setup failures, prefix the single `task` command with `CODEX_COMPANION_SANDBOX_MODE=inherit`. Example: `CODEX_COMPANION_SANDBOX_MODE=inherit node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task --write "<raw arguments>"`.
26+
- If `CODEX_COMPANION_SANDBOX_MODE` is already set in the environment, let it pass through unchanged unless the user explicitly requests another sandbox mode.
2527

2628
Command selection:
2729
- Use exactly one `task` invocation per rescue handoff.
@@ -34,6 +36,7 @@ Command selection:
3436
- `--fresh`: always use a fresh `task` run, even if the request sounds like a follow-up.
3537
- `--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`.
3638
- `task --resume-last`: internal helper for "keep going", "resume", "apply the top fix", or "dig deeper" after a previous rescue run.
39+
- Sandbox override: `CODEX_COMPANION_SANDBOX_MODE=inherit` omits the app-server sandbox field so Codex uses its configured `sandbox_mode`. The variable also accepts `read-only`, `workspace-write`, and `danger-full-access`, but only set those explicit modes when the user asks for that exact sandbox behavior.
3740

3841
Safety rules:
3942
- Default to write-capable Codex work in `codex:codex-rescue` unless the user explicitly asks for read-only behavior.

tests/commands.test.mjs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ test("rescue command absorbs continue semantics", () => {
126126
assert.match(agent, /Leave model unset by default/i);
127127
assert.match(agent, /If the user asks for `spark`, map that to `--model gpt-5\.3-codex-spark`/i);
128128
assert.match(agent, /If the user asks for a concrete model name such as `gpt-5\.4-mini`, pass it through with `--model`/i);
129+
assert.match(agent, /Default to a write-capable Codex run by adding `--write`/i);
130+
assert.match(agent, /If the user says Codex sandboxing, bwrap, bubblewrap, or Linux sandbox setup is failing/i);
131+
assert.match(agent, /CODEX_COMPANION_SANDBOX_MODE=inherit/i);
132+
assert.match(agent, /If `CODEX_COMPANION_SANDBOX_MODE` is already present in the environment, preserve it/i);
129133
assert.match(agent, /Return the stdout of the `codex-companion` command exactly as-is/i);
130134
assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
131135
assert.match(agent, /gpt-5-4-prompting/);
@@ -138,9 +142,14 @@ test("rescue command absorbs continue semantics", () => {
138142
assert.match(runtimeSkill, /Leave `--effort` unset unless the user explicitly requests a specific effort/i);
139143
assert.match(runtimeSkill, /Leave model unset by default/i);
140144
assert.match(runtimeSkill, /Map `spark` to `--model gpt-5\.3-codex-spark`/i);
145+
assert.match(runtimeSkill, /Default to a write-capable Codex run by adding `--write`/i);
146+
assert.match(runtimeSkill, /If the user reports `bwrap`, `bubblewrap`, Codex sandbox, or Linux sandbox setup failures/i);
147+
assert.match(runtimeSkill, /CODEX_COMPANION_SANDBOX_MODE=inherit node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/codex-companion\.mjs" task --write "<raw arguments>"/i);
148+
assert.match(runtimeSkill, /If `CODEX_COMPANION_SANDBOX_MODE` is already set in the environment, let it pass through unchanged/i);
141149
assert.match(runtimeSkill, /If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only/i);
142150
assert.match(runtimeSkill, /Strip it before calling `task`/i);
143151
assert.match(runtimeSkill, /`--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`/i);
152+
assert.match(runtimeSkill, /Sandbox override: `CODEX_COMPANION_SANDBOX_MODE=inherit` omits the app-server sandbox field/i);
144153
assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i);
145154
assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
146155
assert.match(readme, /`codex:codex-rescue` subagent/i);
@@ -165,9 +174,9 @@ test("result and cancel commands are exposed as deterministic runtime entrypoint
165174
const resultHandling = read("skills/codex-result-handling/SKILL.md");
166175

167176
assert.match(result, /disable-model-invocation:\s*true/);
168-
assert.match(result, /codex-companion\.mjs" result \$ARGUMENTS/);
177+
assert.match(result, /codex-companion\.mjs" result "\$ARGUMENTS"/);
169178
assert.match(cancel, /disable-model-invocation:\s*true/);
170-
assert.match(cancel, /codex-companion\.mjs" cancel \$ARGUMENTS/);
179+
assert.match(cancel, /codex-companion\.mjs" cancel "\$ARGUMENTS"/);
171180
assert.match(resultHandling, /do not turn a failed or incomplete Codex run into a Claude-side implementation attempt/i);
172181
assert.match(resultHandling, /if Codex was never successfully invoked, do not generate a substitute answer at all/i);
173182
});

tests/fake-codex-fixture.mjs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ const readline = require("node:readline");
1818
1919
function loadState() {
2020
if (!fs.existsSync(STATE_PATH)) {
21-
return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastInterrupt: null };
21+
return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastThreadStart: null, lastThreadResume: null, lastInterrupt: null };
2222
}
2323
return JSON.parse(fs.readFileSync(STATE_PATH, "utf8"));
2424
}
@@ -297,6 +297,8 @@ rl.on("line", (line) => {
297297
throw new Error("thread/start.persistFullHistory requires experimentalApi capability");
298298
}
299299
const thread = nextThread(state, message.params.cwd, message.params.ephemeral);
300+
state.lastThreadStart = message.params;
301+
saveState(state);
300302
send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model || "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } });
301303
send({ method: "thread/started", params: { thread: { id: thread.id } } });
302304
break;
@@ -330,6 +332,7 @@ rl.on("line", (line) => {
330332
}
331333
const thread = ensureThread(state, message.params.threadId);
332334
thread.updatedAt = now();
335+
state.lastThreadResume = message.params;
333336
saveState(state);
334337
send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model || "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } });
335338
break;

tests/runtime.test.mjs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,50 @@ test("review renders a no-findings result from app-server review/start", () => {
157157
assert.match(result.stdout, /No material issues found/);
158158
});
159159

160+
test("review keeps the default read-only sandbox", () => {
161+
const repo = makeTempDir();
162+
const binDir = makeTempDir();
163+
installFakeCodex(binDir);
164+
initGitRepo(repo);
165+
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
166+
run("git", ["add", "README.md"], { cwd: repo });
167+
run("git", ["commit", "-m", "init"], { cwd: repo });
168+
fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
169+
170+
const result = run("node", [SCRIPT, "review"], {
171+
cwd: repo,
172+
env: buildEnv(binDir)
173+
});
174+
175+
assert.equal(result.status, 0, result.stderr);
176+
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
177+
assert.equal(state.lastThreadStart.approvalPolicy, "never");
178+
assert.equal(state.lastThreadStart.sandbox, "read-only");
179+
});
180+
181+
test("review can inherit the configured Codex sandbox when explicitly requested", () => {
182+
const repo = makeTempDir();
183+
const binDir = makeTempDir();
184+
installFakeCodex(binDir);
185+
initGitRepo(repo);
186+
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
187+
run("git", ["add", "README.md"], { cwd: repo });
188+
run("git", ["commit", "-m", "init"], { cwd: repo });
189+
fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
190+
191+
const result = run("node", [SCRIPT, "review"], {
192+
cwd: repo,
193+
env: {
194+
...buildEnv(binDir),
195+
CODEX_COMPANION_SANDBOX_MODE: "inherit"
196+
}
197+
});
198+
199+
assert.equal(result.status, 0, result.stderr);
200+
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
201+
assert.equal(state.lastThreadStart.sandbox, undefined);
202+
});
203+
160204
test("task runs when the active provider does not require OpenAI login", () => {
161205
const repo = makeTempDir();
162206
const binDir = makeTempDir();
@@ -175,6 +219,48 @@ test("task runs when the active provider does not require OpenAI login", () => {
175219
assert.match(result.stdout, /Handled the requested task/);
176220
});
177221

222+
test("task --write requests the default workspace-write sandbox", () => {
223+
const repo = makeTempDir();
224+
const binDir = makeTempDir();
225+
installFakeCodex(binDir);
226+
initGitRepo(repo);
227+
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
228+
run("git", ["add", "README.md"], { cwd: repo });
229+
run("git", ["commit", "-m", "init"], { cwd: repo });
230+
231+
const result = run("node", [SCRIPT, "task", "--write", "fix the bug"], {
232+
cwd: repo,
233+
env: buildEnv(binDir)
234+
});
235+
236+
assert.equal(result.status, 0, result.stderr);
237+
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
238+
assert.equal(state.lastThreadStart.approvalPolicy, "never");
239+
assert.equal(state.lastThreadStart.sandbox, "workspace-write");
240+
});
241+
242+
test("task --write can inherit the configured Codex sandbox when explicitly requested", () => {
243+
const repo = makeTempDir();
244+
const binDir = makeTempDir();
245+
installFakeCodex(binDir);
246+
initGitRepo(repo);
247+
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
248+
run("git", ["add", "README.md"], { cwd: repo });
249+
run("git", ["commit", "-m", "init"], { cwd: repo });
250+
251+
const result = run("node", [SCRIPT, "task", "--write", "fix the bug"], {
252+
cwd: repo,
253+
env: {
254+
...buildEnv(binDir),
255+
CODEX_COMPANION_SANDBOX_MODE: "inherit"
256+
}
257+
});
258+
259+
assert.equal(result.status, 0, result.stderr);
260+
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
261+
assert.equal(state.lastThreadStart.sandbox, undefined);
262+
});
263+
178264
test("task runs without auth preflight so Codex can refresh an expired session", () => {
179265
const repo = makeTempDir();
180266
const binDir = makeTempDir();
@@ -250,6 +336,32 @@ test("adversarial review renders structured findings over app-server turn/start"
250336

251337
assert.equal(result.status, 0);
252338
assert.match(result.stdout, /Missing empty-state guard/);
339+
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
340+
assert.equal(state.lastThreadStart.sandbox, "read-only");
341+
});
342+
343+
test("adversarial review can inherit the configured Codex sandbox when explicitly requested", () => {
344+
const repo = makeTempDir();
345+
const binDir = makeTempDir();
346+
installFakeCodex(binDir);
347+
initGitRepo(repo);
348+
fs.mkdirSync(path.join(repo, "src"));
349+
fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0];\n");
350+
run("git", ["add", "src/app.js"], { cwd: repo });
351+
run("git", ["commit", "-m", "init"], { cwd: repo });
352+
fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0].id;\n");
353+
354+
const result = run("node", [SCRIPT, "adversarial-review"], {
355+
cwd: repo,
356+
env: {
357+
...buildEnv(binDir),
358+
CODEX_COMPANION_SANDBOX_MODE: "inherit"
359+
}
360+
});
361+
362+
assert.equal(result.status, 0, result.stderr);
363+
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
364+
assert.equal(state.lastThreadStart.sandbox, undefined);
253365
});
254366

255367
test("adversarial review accepts the same base-branch targeting as review", () => {

0 commit comments

Comments
 (0)