Respect Codex sandbox config in plugin threads

msl2246 · msl2246 · commit f89b868c3b65 · 2026-04-18T16:14:16.000+09:00
diff --git a/README.md b/README.md
@@ -270,6 +270,8 @@ Your configuration will be picked up based on:
 
 Check out the Codex docs for more [configuration options](https://developers.openai.com/codex/config-reference).
 
+By default, the plugin pins review runs to Codex's read-only sandbox and maps write-capable rescue tasks to `workspace-write`. If your local environment cannot initialize the Codex sandbox, set `CODEX_COMPANION_SANDBOX_MODE=inherit` before starting Claude Code to let Codex apply your configured `sandbox_mode` directly. You can also set it to `read-only`, `workspace-write`, or `danger-full-access` to force a specific sandbox mode for plugin-launched Codex threads.
+
 ### Moving The Work Over To Codex
 
 Delegated tasks and any [stop gate](#what-does-the-review-gate-do) run can also be directly resumed inside Codex by running `codex resume` either with the specific session ID you received from running `/codex:result` or `/codex:status` or by selecting it from the list.
diff --git a/plugins/codex/agents/codex-rescue.md b/plugins/codex/agents/codex-rescue.md
@@ -32,6 +32,8 @@ Forwarding rules:
 - If the user asks for a concrete model name such as `gpt-5.4-mini`, pass it through with `--model`.
 - Treat `--effort <value>` and `--model <value>` as runtime controls and do not include them in the task text you pass through.
 - Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits.
+- If the user says Codex sandboxing, bwrap, bubblewrap, or Linux sandbox setup is failing, keep the single Bash call but prefix it with `CODEX_COMPANION_SANDBOX_MODE=inherit`. This lets Codex apply the user's configured sandbox mode instead of forcing the plugin's default task sandbox.
+- If `CODEX_COMPANION_SANDBOX_MODE` is already present in the environment, preserve it. Do not unset it or replace it unless the user explicitly asks for a different sandbox mode.
 - Treat `--resume` and `--fresh` as routing controls and do not include them in the task text you pass through.
 - `--resume` means add `--resume-last`.
 - `--fresh` means do not add `--resume-last`.
diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs
@@ -67,6 +67,8 @@ const REVIEW_SCHEMA = path.join(ROOT_DIR, "schemas", "review-output.schema.json"
 const DEFAULT_STATUS_WAIT_TIMEOUT_MS = 240000;
 const DEFAULT_STATUS_POLL_INTERVAL_MS = 2000;
 const VALID_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]);
+const VALID_SANDBOX_MODES = new Set(["read-only", "workspace-write", "danger-full-access"]);
+const SANDBOX_MODE_ENV = "CODEX_COMPANION_SANDBOX_MODE";
 const MODEL_ALIASES = new Map([["spark", "gpt-5.3-codex-spark"]]);
 const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Claude turn.";
 
@@ -153,6 +155,20 @@ function resolveCommandWorkspace(options = {}) {
   return resolveWorkspaceRoot(resolveCommandCwd(options));
 }
 
+function resolveSandboxMode(defaultMode) {
+  const configured = process.env[SANDBOX_MODE_ENV]?.trim();
+  if (!configured) {
+    return defaultMode;
+  }
+  if (configured === "inherit") {
+    return null;
+  }
+  if (VALID_SANDBOX_MODES.has(configured)) {
+    return configured;
+  }
+  throw new Error(`Invalid ${SANDBOX_MODE_ENV}: ${configured}`);
+}
+
 function sleep(ms) {
   return new Promise((resolve) => setTimeout(resolve, ms));
 }
@@ -367,6 +383,7 @@ async function executeReviewRun(request) {
     const result = await runAppServerReview(request.cwd, {
       target: reviewTarget,
       model: request.model,
+      sandbox: resolveSandboxMode("read-only"),
       onProgress: request.onProgress
     });
     const payload = {
@@ -408,7 +425,7 @@ async function executeReviewRun(request) {
   const result = await runAppServerTurn(context.repoRoot, {
     prompt,
     model: request.model,
-    sandbox: "read-only",
+    sandbox: resolveSandboxMode("read-only"),
     outputSchema: readOutputSchema(REVIEW_SCHEMA),
     onProgress: request.onProgress
   });
@@ -485,7 +502,7 @@ async function executeTaskRun(request) {
     defaultPrompt: resumeThreadId ? DEFAULT_CONTINUE_PROMPT : "",
     model: request.model,
     effort: request.effort,
-    sandbox: request.write ? "workspace-write" : "read-only",
+    sandbox: resolveSandboxMode(request.write ? "workspace-write" : "read-only"),
     onProgress: request.onProgress,
     persistThread: true,
     threadName: resumeThreadId ? null : buildPersistentTaskThreadName(request.prompt || DEFAULT_CONTINUE_PROMPT)
diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs
@@ -54,26 +54,32 @@ function cleanCodexStderr(stderr) {
 
 /** @returns {ThreadStartParams} */
 function buildThreadParams(cwd, options = {}) {
-  return {
+  const params = {
     cwd,
     model: options.model ?? null,
     approvalPolicy: options.approvalPolicy ?? "never",
-    sandbox: options.sandbox ?? "read-only",
     serviceName: SERVICE_NAME,
     ephemeral: options.ephemeral ?? true,
     experimentalRawEvents: false
   };
+  if (typeof options.sandbox === "string") {
+    params.sandbox = options.sandbox;
+  }
+  return params;
 }
 
 /** @returns {ThreadResumeParams} */
 function buildResumeParams(threadId, cwd, options = {}) {
-  return {
+  const params = {
     threadId,
     cwd,
     model: options.model ?? null,
-    approvalPolicy: options.approvalPolicy ?? "never",
-    sandbox: options.sandbox ?? "read-only"
+    approvalPolicy: options.approvalPolicy ?? "never"
   };
+  if (typeof options.sandbox === "string") {
+    params.sandbox = options.sandbox;
+  }
+  return params;
 }
 
 /** @returns {UserInput[]} */
@@ -915,7 +921,7 @@ export async function runAppServerReview(cwd, options = {}) {
     emitProgress(options.onProgress, "Starting Codex review thread.", "starting");
     const thread = await startThread(client, cwd, {
       model: options.model,
-      sandbox: "read-only",
+      sandbox: options.sandbox === undefined ? "read-only" : options.sandbox,
       ephemeral: true,
       threadName: options.threadName
     });
diff --git a/plugins/codex/skills/codex-cli-runtime/SKILL.md b/plugins/codex/skills/codex-cli-runtime/SKILL.md
@@ -22,6 +22,8 @@ Execution rules:
 - Leave model unset by default. Add `--model` only when the user explicitly asks for one.
 - Map `spark` to `--model gpt-5.3-codex-spark`.
 - Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits.
+- If the user reports `bwrap`, `bubblewrap`, Codex sandbox, or Linux sandbox setup failures, prefix the single `task` command with `CODEX_COMPANION_SANDBOX_MODE=inherit`. Example: `CODEX_COMPANION_SANDBOX_MODE=inherit node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task --write "<raw arguments>"`.
+- If `CODEX_COMPANION_SANDBOX_MODE` is already set in the environment, let it pass through unchanged unless the user explicitly requests another sandbox mode.
 
 Command selection:
 - Use exactly one `task` invocation per rescue handoff.
@@ -34,6 +36,7 @@ Command selection:
 - `--fresh`: always use a fresh `task` run, even if the request sounds like a follow-up.
 - `--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`.
 - `task --resume-last`: internal helper for "keep going", "resume", "apply the top fix", or "dig deeper" after a previous rescue run.
+- Sandbox override: `CODEX_COMPANION_SANDBOX_MODE=inherit` omits the app-server sandbox field so Codex uses its configured `sandbox_mode`. The variable also accepts `read-only`, `workspace-write`, and `danger-full-access`, but only set those explicit modes when the user asks for that exact sandbox behavior.
 
 Safety rules:
 - Default to write-capable Codex work in `codex:codex-rescue` unless the user explicitly asks for read-only behavior.
diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs
@@ -126,6 +126,10 @@ test("rescue command absorbs continue semantics", () => {
   assert.match(agent, /Leave model unset by default/i);
   assert.match(agent, /If the user asks for `spark`, map that to `--model gpt-5\.3-codex-spark`/i);
   assert.match(agent, /If the user asks for a concrete model name such as `gpt-5\.4-mini`, pass it through with `--model`/i);
+  assert.match(agent, /Default to a write-capable Codex run by adding `--write`/i);
+  assert.match(agent, /If the user says Codex sandboxing, bwrap, bubblewrap, or Linux sandbox setup is failing/i);
+  assert.match(agent, /CODEX_COMPANION_SANDBOX_MODE=inherit/i);
+  assert.match(agent, /If `CODEX_COMPANION_SANDBOX_MODE` is already present in the environment, preserve it/i);
   assert.match(agent, /Return the stdout of the `codex-companion` command exactly as-is/i);
   assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
   assert.match(agent, /gpt-5-4-prompting/);
@@ -138,9 +142,14 @@ test("rescue command absorbs continue semantics", () => {
   assert.match(runtimeSkill, /Leave `--effort` unset unless the user explicitly requests a specific effort/i);
   assert.match(runtimeSkill, /Leave model unset by default/i);
   assert.match(runtimeSkill, /Map `spark` to `--model gpt-5\.3-codex-spark`/i);
+  assert.match(runtimeSkill, /Default to a write-capable Codex run by adding `--write`/i);
+  assert.match(runtimeSkill, /If the user reports `bwrap`, `bubblewrap`, Codex sandbox, or Linux sandbox setup failures/i);
+  assert.match(runtimeSkill, /CODEX_COMPANION_SANDBOX_MODE=inherit node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/codex-companion\.mjs" task --write "<raw arguments>"/i);
+  assert.match(runtimeSkill, /If `CODEX_COMPANION_SANDBOX_MODE` is already set in the environment, let it pass through unchanged/i);
   assert.match(runtimeSkill, /If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only/i);
   assert.match(runtimeSkill, /Strip it before calling `task`/i);
   assert.match(runtimeSkill, /`--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`/i);
+  assert.match(runtimeSkill, /Sandbox override: `CODEX_COMPANION_SANDBOX_MODE=inherit` omits the app-server sandbox field/i);
   assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i);
   assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
   assert.match(readme, /`codex:codex-rescue` subagent/i);
@@ -165,9 +174,9 @@ test("result and cancel commands are exposed as deterministic runtime entrypoint
   const resultHandling = read("skills/codex-result-handling/SKILL.md");
 
   assert.match(result, /disable-model-invocation:\s*true/);
-  assert.match(result, /codex-companion\.mjs" result \$ARGUMENTS/);
+  assert.match(result, /codex-companion\.mjs" result "\$ARGUMENTS"/);
   assert.match(cancel, /disable-model-invocation:\s*true/);
-  assert.match(cancel, /codex-companion\.mjs" cancel \$ARGUMENTS/);
+  assert.match(cancel, /codex-companion\.mjs" cancel "\$ARGUMENTS"/);
   assert.match(resultHandling, /do not turn a failed or incomplete Codex run into a Claude-side implementation attempt/i);
   assert.match(resultHandling, /if Codex was never successfully invoked, do not generate a substitute answer at all/i);
 });
diff --git a/tests/fake-codex-fixture.mjs b/tests/fake-codex-fixture.mjs
@@ -18,7 +18,7 @@ const readline = require("node:readline");
 
 	function loadState() {
 	  if (!fs.existsSync(STATE_PATH)) {
-	    return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastInterrupt: null };
+	    return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastThreadStart: null, lastThreadResume: null, lastInterrupt: null };
 	  }
 	  return JSON.parse(fs.readFileSync(STATE_PATH, "utf8"));
 	}
@@ -297,6 +297,8 @@ rl.on("line", (line) => {
           throw new Error("thread/start.persistFullHistory requires experimentalApi capability");
         }
         const thread = nextThread(state, message.params.cwd, message.params.ephemeral);
+        state.lastThreadStart = message.params;
+        saveState(state);
         send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model || "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } });
         send({ method: "thread/started", params: { thread: { id: thread.id } } });
         break;
@@ -330,6 +332,7 @@ rl.on("line", (line) => {
         }
         const thread = ensureThread(state, message.params.threadId);
         thread.updatedAt = now();
+        state.lastThreadResume = message.params;
         saveState(state);
         send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model || "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } });
         break;
diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs
@@ -157,6 +157,50 @@ test("review renders a no-findings result from app-server review/start", () => {
   assert.match(result.stdout, /No material issues found/);
 });
 
+test("review keeps the default read-only sandbox", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
+
+  const result = run("node", [SCRIPT, "review"], {
+    cwd: repo,
+    env: buildEnv(binDir)
+  });
+
+  assert.equal(result.status, 0, result.stderr);
+  const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(state.lastThreadStart.approvalPolicy, "never");
+  assert.equal(state.lastThreadStart.sandbox, "read-only");
+});
+
+test("review can inherit the configured Codex sandbox when explicitly requested", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");
+
+  const result = run("node", [SCRIPT, "review"], {
+    cwd: repo,
+    env: {
+      ...buildEnv(binDir),
+      CODEX_COMPANION_SANDBOX_MODE: "inherit"
+    }
+  });
+
+  assert.equal(result.status, 0, result.stderr);
+  const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(state.lastThreadStart.sandbox, undefined);
+});
+
 test("task runs when the active provider does not require OpenAI login", () => {
   const repo = makeTempDir();
   const binDir = makeTempDir();
@@ -175,6 +219,48 @@ test("task runs when the active provider does not require OpenAI login", () => {
   assert.match(result.stdout, /Handled the requested task/);
 });
 
+test("task --write requests the default workspace-write sandbox", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+
+  const result = run("node", [SCRIPT, "task", "--write", "fix the bug"], {
+    cwd: repo,
+    env: buildEnv(binDir)
+  });
+
+  assert.equal(result.status, 0, result.stderr);
+  const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(state.lastThreadStart.approvalPolicy, "never");
+  assert.equal(state.lastThreadStart.sandbox, "workspace-write");
+});
+
+test("task --write can inherit the configured Codex sandbox when explicitly requested", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
+  run("git", ["add", "README.md"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+
+  const result = run("node", [SCRIPT, "task", "--write", "fix the bug"], {
+    cwd: repo,
+    env: {
+      ...buildEnv(binDir),
+      CODEX_COMPANION_SANDBOX_MODE: "inherit"
+    }
+  });
+
+  assert.equal(result.status, 0, result.stderr);
+  const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(state.lastThreadStart.sandbox, undefined);
+});
+
 test("task runs without auth preflight so Codex can refresh an expired session", () => {
   const repo = makeTempDir();
   const binDir = makeTempDir();
@@ -250,6 +336,32 @@ test("adversarial review renders structured findings over app-server turn/start"
 
   assert.equal(result.status, 0);
   assert.match(result.stdout, /Missing empty-state guard/);
+  const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(state.lastThreadStart.sandbox, "read-only");
+});
+
+test("adversarial review can inherit the configured Codex sandbox when explicitly requested", () => {
+  const repo = makeTempDir();
+  const binDir = makeTempDir();
+  installFakeCodex(binDir);
+  initGitRepo(repo);
+  fs.mkdirSync(path.join(repo, "src"));
+  fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0];\n");
+  run("git", ["add", "src/app.js"], { cwd: repo });
+  run("git", ["commit", "-m", "init"], { cwd: repo });
+  fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0].id;\n");
+
+  const result = run("node", [SCRIPT, "adversarial-review"], {
+    cwd: repo,
+    env: {
+      ...buildEnv(binDir),
+      CODEX_COMPANION_SANDBOX_MODE: "inherit"
+    }
+  });
+
+  assert.equal(result.status, 0, result.stderr);
+  const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
+  assert.equal(state.lastThreadStart.sandbox, undefined);
 });
 
 test("adversarial review accepts the same base-branch targeting as review", () => {

Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@ const readline = require("node:readline");`
`18`	`18`
`19`	`19`	`function loadState() {`
`20`	`20`	`if (!fs.existsSync(STATE_PATH)) {`
`21`		`- return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastInterrupt: null };`
	`21`	`+ return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastThreadStart: null, lastThreadResume: null, lastInterrupt: null };`
`22`	`22`	`}`
`23`	`23`	`return JSON.parse(fs.readFileSync(STATE_PATH, "utf8"));`
`24`	`24`	`}`
`@@ -297,6 +297,8 @@ rl.on("line", (line) => {`
`297`	`297`	`throw new Error("thread/start.persistFullHistory requires experimentalApi capability");`
`298`	`298`	`}`
`299`	`299`	`const thread = nextThread(state, message.params.cwd, message.params.ephemeral);`
	`300`	`+ state.lastThreadStart = message.params;`
	`301`	`+ saveState(state);`
`300`	`302`	`send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model \|\| "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } });`
`301`	`303`	`send({ method: "thread/started", params: { thread: { id: thread.id } } });`
`302`	`304`	`break;`
`@@ -330,6 +332,7 @@ rl.on("line", (line) => {`
`330`	`332`	`}`
`331`	`333`	`const thread = ensureThread(state, message.params.threadId);`
`332`	`334`	`thread.updatedAt = now();`
	`335`	`+ state.lastThreadResume = message.params;`
`333`	`336`	`saveState(state);`
`334`	`337`	`send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model \|\| "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } });`
`335`	`338`	`break;`