diff --git a/.claude/learnings.md b/.claude/learnings.md index 5b85315..f11adb3 100644 --- a/.claude/learnings.md +++ b/.claude/learnings.md @@ -162,3 +162,9 @@ The /cost + /evals collectors were first built against assumed endpoints that DO ## 2026-06-18 — `agent.ts` workflow has NO top-level adapter/logger imports — all deferred - `apps/worker/src/workflows/agent.ts` is a Vercel Workflow DevKit `"use workflow"` module. The WDK bundler cannot tolerate top-level adapter/logger imports, so EVERY use of `logger`/adapters inside a `"use step"` is a deferred `await import("../lib/logger.js")` / `await import("../lib/step-adapters.js")` in the step body (logger refs at lines 73,137,201,338,445,486,508 — none top-level). Do NOT assume "a warm module is already imported at the top" — there isn't one; the module is only cache-warm because earlier steps in the same run imported it. (Correction to a wrong assumption I made during the PR-link-comment work: I claimed logger was imported top-level; it is not — an opus reviewer caught it.) Consequence for best-effort steps that log inside a `catch`: that import is itself a deferred `await import` that could in theory fail cold, so a "never throws" step is only truly safe in concert with `maxRetries=0` + a failure mode that doesn't affect the run outcome. + +## 2026-06-18 — us11 capacity e2e flakiness = Jira JQL index is non-monotonic, not a product bug +- `e2e/tier2/us11-capacity-limit-respected.test.ts` intermittently failed with `expected 0 to be greater than or equal to 1` on `pollRes.body.discovered`. Root cause: the cron's discovery (`/rest/api/3/search/jql`, `project AND status=COLUMN_AI`) hit Jira's eventually-consistent search index during a window where it reported the AI column **empty** — even though the `isTicketVisibleInJql` barrier had just gotten one positive read. A single positive index read does NOT guarantee the cron's immediately-following search (a separate request) also sees the ticket. The cron is idempotent (next tick discovers it), so this is purely a test-reliability defect. Fix: poll the cron in a bounded `waitFor` (30s) until `discovered >= 1`, **re-saturating the dummy registry entries before each poll** (reconcile's `ORPHAN_GRACE_MS = 30s` would otherwise free a slot mid-loop and let a late poll actually start the ticket), asserting `started === 0` every tick. us08 already tolerates this race by asserting only on the *effect* (no registry entry over a 15s window), not on the `discovered` count — us11 was the only test making a single-shot `discovered` assertion. + +## 2026-06-18 — Dashboard ticket grid blowout = bare `1fr` honors content min-content +- `apps/dashboard/app/(cockpit)/ticket/[ticketKey]/page.tsx` desktop split-view used `gridTemplateColumns: "280px 1fr"`. A bare `1fr` is `minmax(auto, 1fr)`, so the detail (trace) column honored its content's min-content (~1082px, driven by the 5-up `grid-cols-5` KPI row + the `lg:grid-cols-[1.4fr_1fr]` panel), pushing the grid to 1410px inside a ~1060px area → horizontal scrollbar inside `cockpit-shell`'s `flex-1 overflow-auto` scroller (`html` itself didn't overflow because the root is `overflow-hidden`). Fix: `280px minmax(0, 1fr)` on the column + `min-w-0` on the `DetailArea` grid item (`ticket-selection.tsx`) so the track + item can shrink below content. Verified live with agent-browser (grid 1410→1060, scroller H-overflow gone). The FlameGraph's own `overflow-x-auto` (fixed 1040px timeline) is intended internal scroll, not a blowout. General rule here: any `1fr` grid track holding wide/min-content-heavy content should be `minmax(0, 1fr)` and its grid item `min-w-0`. diff --git a/apps/dashboard/app/(cockpit)/ticket/[ticketKey]/page.tsx b/apps/dashboard/app/(cockpit)/ticket/[ticketKey]/page.tsx index 07fc956..804192e 100644 --- a/apps/dashboard/app/(cockpit)/ticket/[ticketKey]/page.tsx +++ b/apps/dashboard/app/(cockpit)/ticket/[ticketKey]/page.tsx @@ -48,7 +48,7 @@ export default async function TicketPage({
+
{children}
{isPending && (
diff --git a/apps/worker/e2e/tier2/us11-capacity-limit-respected.test.ts b/apps/worker/e2e/tier2/us11-capacity-limit-respected.test.ts index 3821608..9a47f1d 100644 --- a/apps/worker/e2e/tier2/us11-capacity-limit-respected.test.ts +++ b/apps/worker/e2e/tier2/us11-capacity-limit-respected.test.ts @@ -35,6 +35,15 @@ describe("US-11: Capacity limit respected", () => { const dummyKeys: string[] = []; let ticketKey: string | null = null; + // Re-seed every dummy slot with a fresh `created_at`. Called before each + // cron poll so reconcile's 30s orphan grace never frees a slot mid-test — + // a freed slot could let a late poll actually start the ticket. + async function saturateCapacity(): Promise { + await Promise.all( + dummyKeys.map((key) => setEntry(key, `run_e2e_dummy_${key}`)), + ); + } + beforeAll(async () => { const stale = await listAllRuns(); if (stale.length > 0) { @@ -50,10 +59,9 @@ describe("US-11: Capacity limit respected", () => { // slot. Fresh timestamps (default `ageMs: 0`) keep reconcile's 30s // orphan grace from wiping them mid-test. for (let i = 0; i < e2eEnv.MAX_CONCURRENT_AGENTS; i++) { - const key = `${DUMMY_PREFIX}${i}`; - dummyKeys.push(key); - await setEntry(key, `run_e2e_dummy_${i}`); + dummyKeys.push(`${DUMMY_PREFIX}${i}`); } + await saturateCapacity(); console.log( `[US-11] Seeded ${dummyKeys.length} dummy entries to saturate capacity`, ); @@ -93,14 +101,29 @@ describe("US-11: Capacity limit respected", () => { }, ); - // 3. Trigger cron. Dispatch's `isAtCapacity` precheck sees MAX dummies - // and rejects our ticket before it can claim. (The deployed - // scheduled cron may also have fired during the JQL wait — it hits - // the same at-capacity rejection, so either way no claim lands.) - const pollRes = await callCronPoll(); + // 3. Trigger cron until it discovers our ticket while every slot is full. + // Dispatch's `isAtCapacity` precheck sees MAX dummies and rejects the + // ticket before it can claim. Jira's JQL index is eventually + // consistent, so even after the visibility barrier above the cron's + // own search (a separate request) can momentarily report + // `discovered: 0`; retry until the ticket lands in the index. + // Re-saturate before each poll so a slot never frees mid-loop, and + // assert `started === 0` every tick — capacity must hold throughout. + const pollRes = await waitFor( + async () => { + await saturateCapacity(); + const res = await callCronPoll(); + expect(res.status).toBe(200); + expect(res.body?.started).toBe(0); + return (res.body?.discovered ?? 0) >= 1 ? res : null; + }, + { + description: `cron discovers ${ticketKey} while at capacity`, + timeoutMs: 30_000, + intervalMs: 3_000, + }, + ); console.log("[US-11] cron response:", JSON.stringify(pollRes.body)); - expect(pollRes.status).toBe(200); - expect(pollRes.body?.discovered).toBeGreaterThanOrEqual(1); expect(pollRes.body?.started).toBe(0); // 4. The ticket has no registry entry — capacity rejection confirmed.