footnote-ai · jbax1899 · Mar 26, 2026 · Mar 26, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/.env.example b/.env.example
@@ -134,7 +134,8 @@ FRAME_ANCESTORS='self',https://ai.jordanmakes.dev # Production CSP frame-ancesto
 # DEV example: FRAME_ANCESTORS='self',https://ai.jordanmakes.dev,http://localhost:8080,http://localhost:3000
 DEFAULT_MODEL=gpt-5-mini # Default model for reflect responses
 MODEL_PROFILE_CATALOG_PATH= # Optional YAML path for backend model profile catalog (defaults to bundled catalog)
-DEFAULT_PROFILE_ID=openai-text-medium # Default backend model profile ID used by catalog resolver
+DEFAULT_PROFILE_ID=openai-text-medium # Default response model profile ID used when planner does not select a valid profile
+PLANNER_PROFILE_ID=openai-text-fast # Planner model profile ID (can differ from DEFAULT_PROFILE_ID)
 REALTIME_DEFAULT_MODEL=gpt-realtime-mini # Default model for realtime voice sessions
 REALTIME_DEFAULT_VOICE=echo # Default voice for realtime voice sessions
 REALTIME_TURN_DETECTION=server_vad # server_vad or semantic_vad for realtime voice turns

diff --git a/AGENTS.md b/AGENTS.md
@@ -72,11 +72,16 @@ Example:
 
 ## Testing & Validation
 
-- Run ESLint by default after any file edit.
+- Run `pnpm lint:fix` by default after any file edit (robot/local workflow).
+- Use `pnpm lint` as non-mutating verification (CI and final gate checks).
+- `pnpm format:check` and `pnpm format:write` are changed-file aware by default.
+  Optional CI override: set `FORMAT_BASE_REF` to check/write against a base branch range.
+- If a file type is intentionally outside prettier/eslint globs (for example `.env.example`), keep formatting consistent manually and call that out in the summary.
 - Prefer linting only the touched files when the repo tooling supports it cleanly.
 - If the repo only exposes a broader lint command, run that broader command and note the wider validation scope.
 - Review: `pnpm review`
 - Packaging validation for deployable service changes or cleanup that can affect runtime packaging: `docker compose -f deploy/compose.yml build`
+- PR readiness gate for large or cross-cutting changes: run both `pnpm review` and `docker compose -f deploy/compose.yml build` before marking the change review-ready.
 - `@footnote-*` tags: `pnpm validate-footnote-tags`
 - OpenAPI linking: `pnpm validate-openapi-links`
 

diff --git a/cursor.rules b/cursor.rules
@@ -188,7 +188,10 @@ const <loggerName>Logger = logger.child({ module: '<loggerName>' });
 - Preserve provenance comments, cost tracking, and licensing headers.
 - Never remove risk annotations or audit metadata without explicit reason.
 - Maintain backward compatibility unless explicitly breaking for a versioned release.
-- After any file edit, run ESLint by default before wrapping up the change.
+- After any file edit, run `pnpm lint:fix` by default (robot/local workflow).
+- Use `pnpm lint` as the non-mutating CI/final verification gate.
+- `pnpm format:check` / `pnpm format:write` operate on changed files by default; set `FORMAT_BASE_REF` in CI to evaluate a base-ref range.
+- If a file is outside formatter/parser coverage (for example `.env.example`), preserve style manually and note the limitation in the change summary.
 - Prefer linting only the touched files when the repo tooling supports it cleanly.
 - If the repo only exposes a broader lint command, run that broader command and call out the wider scope in the summary.
 
@@ -257,13 +260,15 @@ const <loggerName>Logger = logger.child({ module: '<loggerName>' });
 
 ### Recommended Workflow
 1. **Complete implementation**
-2. **Run ESLint by default** on the touched files when possible; otherwise run the broader repo lint command and note the wider scope
-3. **Run automated validation**: `pnpm review` (validates `@footnote-*` tags, OpenAPI code links, types, linting)
-4. **Run packaging validation when the change can affect deployable services**: `docker compose -f deploy/compose.yml build`
-5. **Use Cursor's Bugbot (Review PR)** for automated code quality analysis
-6. **Use inline chat (`Ctrl+K`)** with project-specific prompts (see `.cursor/footnote-prompts.md`)
-7. **Accept suggested simplifications or comments** in-place
-8. **Open human PR review** for logic, ethics, and integration focus
+2. **Run `pnpm lint:fix` by default** (robot/local cleanup)
+3. **Run `pnpm lint`** for non-mutating verification
+4. **Run automated validation**: `pnpm review` (validates `@footnote-*` tags, OpenAPI code links, types, linting)
+5. **Run packaging validation when the change can affect deployable services**: `docker compose -f deploy/compose.yml build`
+6. **PR-readiness gate for large/cross-cutting changes**: run both `pnpm review` and `docker compose -f deploy/compose.yml build` before marking review-ready
+7. **Use Cursor's Bugbot (Review PR)** for automated code quality analysis
+8. **Use inline chat (`Ctrl+K`)** with project-specific prompts (see `.cursor/footnote-prompts.md`)
+9. **Accept suggested simplifications or comments** in-place
+10. **Open human PR review** for logic, ethics, and integration focus
 
 ### Integration with Existing Tools
 - **Review pipeline**: `pnpm review`

diff --git a/package.json b/package.json
@@ -34,6 +34,10 @@
         "review": "node scripts/review.cjs",
         "test:annotation-governance": "pnpm exec tsx --test scripts/annotation-schema.test.ts scripts/validate-footnote-tags.test.ts scripts/review.test.ts",
         "type-check": "pnpm exec tsc --noEmit",
+        "format:check": "node scripts/format-changed.cjs --check",
+        "format:write": "node scripts/format-changed.cjs --write",
+        "lint": "pnpm run format:check && pnpm run lint-check",
+        "lint:fix": "pnpm run format:write && pnpm exec eslint packages/ --fix",
         "lint-check": "pnpm exec eslint packages/",
         "backend:prepare": "pnpm --filter @footnote/config-spec run build:dev && pnpm --filter @footnote/prompts run build:dev"
     },

diff --git a/packages/agent-runtime/src/index.ts b/packages/agent-runtime/src/index.ts
@@ -138,6 +138,10 @@ export interface GenerationRequest {
      * Retrieval settings. Omit this field when search should stay disabled.
      */
     search?: GenerationSearchRequest;
+    /**
+     * Optional stable caller/user identifier reserved for future memory flows.
+     */
+    userId?: string;
     /**
      * Optional cancellation signal forwarded from backend orchestration.
      */
@@ -235,6 +239,11 @@ export interface GenerationResult {
      * Runtime-reported provenance classification, when available.
      */
     provenance?: GenerationProvenance;
+    /**
+     * Placeholder memory retrieval payload reserved for future memory features.
+     * Current flows should leave this undefined.
+     */
+    memoryRetrievals?: [];
 }
 
 /**

diff --git a/packages/backend/src/config/sections/modelProfiles.ts b/packages/backend/src/config/sections/modelProfiles.ts
@@ -116,6 +116,13 @@ export const buildModelProfilesSection = (
     const defaultProfileId =
         parseOptionalTrimmedString(env.DEFAULT_PROFILE_ID) ||
         envDefaultValues.DEFAULT_PROFILE_ID;
+    // Response generation fallback profile.
+    // Used when callers provide no selector or an invalid/disabled selector.
+    const plannerProfileId =
+        parseOptionalTrimmedString(env.PLANNER_PROFILE_ID) ||
+        envDefaultValues.PLANNER_PROFILE_ID;
+    // Planner execution profile.
+    // Kept separate so planner cost/latency can be tuned independently.
 
     let effectiveCatalogPath = preferredCatalogPath;
     let entries: unknown[] | null = null;
@@ -167,6 +174,7 @@ export const buildModelProfilesSection = (
 
     return {
         defaultProfileId,
+        plannerProfileId,
         catalogPath: effectiveCatalogPath,
         catalog,
     };

diff --git a/packages/backend/src/config/types.ts b/packages/backend/src/config/types.ts
@@ -62,6 +62,7 @@ export type RuntimeConfig = {
     };
     modelProfiles: {
         defaultProfileId: string;
+        plannerProfileId: string;
         catalogPath: string;
         catalog: ModelProfile[];
     };

diff --git a/packages/backend/src/services/chatOrchestrator.ts b/packages/backend/src/services/chatOrchestrator.ts
@@ -16,6 +16,7 @@ import {
     type CreateChatServiceOptions,
 } from './chatService.js';
 import { createChatPlanner, type ChatPlan } from './chatPlanner.js';
+import type { ChatGenerationPlan } from './chatGenerationTypes.js';
 import { normalizeDiscordConversation } from './chatConversationNormalization.js';
 import {
     resolveActiveProfileOverlayPrompt,
@@ -41,6 +42,7 @@ const buildPlannerPayload = (
     JSON.stringify({
         action: plan.action,
         modality: plan.modality,
+        profileId: plan.profileId,
         reaction: plan.reaction,
         imageRequest: plan.imageRequest,
         riskTier: plan.riskTier,
@@ -64,31 +66,56 @@ export const createChatOrchestrator = ({
         typeof logger.child === 'function'
             ? logger.child({ module: 'chatOrchestrator' })
             : logger;
+    const catalogProfiles = runtimeConfig.modelProfiles.catalog;
+    const enabledProfiles = catalogProfiles.filter(
+        (profile) => profile.enabled
+    );
+    const enabledProfilesById = new Map(
+        enabledProfiles.map((profile) => [profile.id, profile])
+    );
 
-    // Resolve one startup default profile that drives both planner and response
-    // generation. This keeps routing deterministic unless a future planner
-    // branch chooses profile ids explicitly.
+    // Resolver remains authoritative for all profile-id/tier/raw selector
+    // resolution and fail-open behavior.
     const modelProfileResolver = createModelProfileResolver({
-        catalog: runtimeConfig.modelProfiles.catalog,
+        catalog: catalogProfiles,
         defaultProfileId: runtimeConfig.modelProfiles.defaultProfileId,
         legacyDefaultModel: runtimeConfig.openai.defaultModel,
         warn: chatOrchestratorLogger,
     });
-    const defaultGenerationProfile = modelProfileResolver.resolve(defaultModel);
-    // One resolved profile is reused for planner + generation so both paths
-    // target the same provider/model/capability defaults.
+    const plannerProfile = modelProfileResolver.resolve(
+        runtimeConfig.modelProfiles.plannerProfileId
+    );
+    // Startup fallback profile for end-user response generation.
+    // Planner may override this per-request with one catalog profile id.
+    const defaultResponseProfile = modelProfileResolver.resolve(defaultModel);
+
+    // Bounded profile payload sent to planner prompt context.
+    // Description is trimmed to keep planner context predictable.
+    const plannerProfileOptions = enabledProfiles.map((profile) => ({
+        id: profile.id,
+        description: profile.description.slice(0, 180),
+        costClass: profile.costClass,
+        latencyClass: profile.latencyClass,
+        capabilities: {
+            canUseSearch: profile.capabilities.canUseSearch,
+        },
+    }));
+    // TODO(phase-5-provider-tool-registry): Add deterministic fallback ranking
+    // metadata for planner/executor handoff (for example, preferred
+    // search-capable backup profile ids by policy).
 
     // ChatService handles final message generation and trace/cost wiring.
     const chatService = createChatService({
         generationRuntime,
         storeTrace,
         buildResponseMetadata,
-        defaultModel: defaultGenerationProfile.providerModel,
-        defaultProvider: defaultGenerationProfile.provider,
-        defaultCapabilities: defaultGenerationProfile.capabilities,
+        defaultModel: defaultResponseProfile.providerModel,
+        defaultProvider: defaultResponseProfile.provider,
+        defaultCapabilities: defaultResponseProfile.capabilities,
         recordUsage,
     });
     const chatPlanner = createChatPlanner({
+        availableProfiles: plannerProfileOptions,
         executePlanner: async ({
             messages,
             model,
@@ -101,8 +128,8 @@ export const createChatOrchestrator = ({
             const plannerResult = await generationRuntime.generate({
                 messages,
                 model,
-                provider: defaultGenerationProfile.provider,
-                capabilities: defaultGenerationProfile.capabilities,
+                provider: plannerProfile.provider,
+                capabilities: plannerProfile.capabilities,
                 maxOutputTokens,
                 reasoningEffort,
                 verbosity,
@@ -114,7 +141,7 @@ export const createChatOrchestrator = ({
                 usage: plannerResult.usage,
             };
         },
-        defaultModel: defaultGenerationProfile.providerModel,
+        defaultModel: plannerProfile.providerModel,
         recordUsage,
     });
 
@@ -150,32 +177,81 @@ export const createChatOrchestrator = ({
             planned,
             chatOrchestratorLogger
         );
+        // Planner-selected profile is advisory.
+        // Runtime resolution here is authoritative and fail-open.
+        let selectedResponseProfile = defaultResponseProfile;
+        if (plan.profileId) {
+            const selectedProfile = enabledProfilesById.get(plan.profileId);
+            if (selectedProfile) {
+                selectedResponseProfile = selectedProfile;
+            } else {
+                chatOrchestratorLogger.warn(
+                    'planner selected invalid or disabled profile id; falling back to default profile',
+                    {
+                        selectedProfileId: plan.profileId,
+                        defaultProfileId: defaultResponseProfile.id,
+                        surface: normalizedRequest.surface,
+                    }
+                );
+            }
+        }
+
+        // Keep selected profile, but drop search when profile capabilities do
+        // not allow it. This avoids silently forcing a different model.
+        let generationForExecution: ChatGenerationPlan = plan.generation;
+        if (
+            generationForExecution.search &&
+            !selectedResponseProfile.capabilities.canUseSearch
+        ) {
+            // TODO: Before dropping search, attempt rerouting to a search-capable profile. Emit structured fields for observability, maybe:
+            // - searchFallbackApplied
+            // - originalProfileId
+            // - effectiveProfileId
+            generationForExecution = {
+                ...generationForExecution,
+                search: undefined,
+            };
+            chatOrchestratorLogger.warn(
+                'planner requested search but selected profile does not support search; running without search',
+                {
+                    selectedProfileId: selectedResponseProfile.id,
+                    surface: normalizedRequest.surface,
+                }
+            );
+        }
+        // Persist the effective profile id in planner payload/snapshot so traces
+        // reflect what was actually executed.
+        const executionPlan: ChatPlan = {
+            ...plan,
+            generation: generationForExecution,
+            profileId: selectedResponseProfile.id,
+        };
 
         // Non-message actions return early and skip model generation.
-        if (plan.action === 'ignore') {
+        if (executionPlan.action === 'ignore') {
             return {
                 action: 'ignore',
                 metadata: null,
             };
         }
 
-        if (plan.action === 'react') {
+        if (executionPlan.action === 'react') {
             return {
                 action: 'react',
-                reaction: plan.reaction ?? '👍',
+                reaction: executionPlan.reaction ?? '👍',
                 metadata: null,
             };
         }
 
-        if (plan.action === 'image' && plan.imageRequest) {
+        if (executionPlan.action === 'image' && executionPlan.imageRequest) {
             return {
                 action: 'image',
-                imageRequest: plan.imageRequest,
+                imageRequest: executionPlan.imageRequest,
                 metadata: null,
             };
         }
 
-        if (plan.action === 'image' && !plan.imageRequest) {
+        if (executionPlan.action === 'image' && !executionPlan.imageRequest) {
             // Invalid image action should not block response flow.
             chatOrchestratorLogger.warn(
                 `Chat planner returned image without imageRequest; falling back to ignore. surface=${normalizedRequest.surface} trigger=${normalizedRequest.trigger.kind} latestUserInputLength=${normalizedRequest.latestUserInput.length}`
@@ -226,7 +302,7 @@ export const createChatOrchestrator = ({
                     '// BEGIN Planner Output',
                     '// This planner decision was made by the backend and should be treated as authoritative for this response.',
                     '// ==========',
-                    buildPlannerPayload(plan, surfacePolicy),
+                    buildPlannerPayload(executionPlan, surfacePolicy),
                     '// ==========',
                     '// END Planner Output',
                     '// ==========',
@@ -241,26 +317,27 @@ export const createChatOrchestrator = ({
             conversationSnapshot: JSON.stringify({
                 request: normalizedRequest,
                 planner: {
-                    action: plan.action,
-                    modality: plan.modality,
-                    riskTier: plan.riskTier,
-                    generation: plan.generation,
+                    action: executionPlan.action,
+                    modality: executionPlan.modality,
+                    profileId: executionPlan.profileId,
+                    riskTier: executionPlan.riskTier,
+                    generation: executionPlan.generation,
                     ...(surfacePolicy && { surfacePolicy }),
                 },
             }),
-            plannerTemperament: plan.generation.temperament,
-            riskTier: plan.riskTier,
-            model: defaultGenerationProfile.providerModel,
-            provider: defaultGenerationProfile.provider,
-            capabilities: defaultGenerationProfile.capabilities,
-            generation: plan.generation,
+            plannerTemperament: executionPlan.generation.temperament,
+            riskTier: executionPlan.riskTier,
+            model: selectedResponseProfile.providerModel,
+            provider: selectedResponseProfile.provider,
+            capabilities: selectedResponseProfile.capabilities,
+            generation: executionPlan.generation,
         });
 
         // Message action is the only branch that returns provenance metadata.
         return {
             action: 'message',
             message: response.message,
-            modality: plan.modality,
+            modality: executionPlan.modality,
             metadata: response.metadata,
         };
     };