From 7354582d69898952b20b26c0a468e86bc4c45939 Mon Sep 17 00:00:00 2001
From: Lukas May <lukas.may@carealytix.com>
Date: Wed, 18 Feb 2026 16:43:19 +0900
Subject: [PATCH] refactor: Add context management to plan/detail prompts,
 update docs

Add CONTEXT_MANAGEMENT shared block to plan and detail mode prompts so
architect agents also benefit from compaction awareness and parallel
execution hints. Update index.ts re-exports and agent docs.
---
 docs/agent.md               | 33 ++++++++++++++++++++-
 src/agent/prompts/detail.ts | 59 +++++++++++++++++++++++++++++++++++--
 src/agent/prompts/index.ts  |  2 +-
 src/agent/prompts/plan.ts   | 42 ++++++++++++++++++++++++--
 4 files changed, 130 insertions(+), 6 deletions(-)
diff --git a/docs/agent.md b/docs/agent.md
index bea6a0c..9d6f9ff 100644
--- a/docs/agent.md
+++ b/docs/agent.md
@@ -24,7 +24,7 @@
 | `accounts/` | Account discovery, config dir setup, credential management, usage API |
 | `credentials/` | `AccountCredentialManager` — credential injection per account |
 | `lifecycle/` | `LifecycleController` — retry policy, signal recovery, missing signal instructions |
-| `prompts/` | Mode-specific prompt builders (execute, discuss, plan, detail, refine) + shared inter-agent communication instructions |
+| `prompts/` | Mode-specific prompt builders (execute, discuss, plan, detail, refine) + shared blocks (codebase verification, deviation rules, git workflow) + inter-agent communication instructions |
 
 ## Key Flows
 
@@ -166,3 +166,34 @@ Agents can communicate with each other via the `conversations` table, coordinate
 
 **`cw answer <answer> --conversation-id <id>`**
 - Calls `answerConversation`, prints `{ conversationId, status: "answered" }`
+
+## Prompt Architecture
+
+Mode-specific prompts in `prompts/` are composed from shared blocks and mode-specific sections.
+
+### Shared Blocks (`prompts/shared.ts`)
+
+| Constant | Content |
+|----------|---------|
+| `SIGNAL_FORMAT` | Signal output format (done/questions/error via `.cw/output/signal.json`) |
+| `INPUT_FILES` | Input file structure (manifest, assignment files, context files) |
+| `ID_GENERATION` | `cw id` usage for generating entity IDs |
+| `CODEBASE_VERIFICATION` | "Training as hypothesis" pattern — read before writing, verify imports, follow patterns, check git log |
+| `DEVIATION_RULES` | Decision tree for handling unexpected situations (typo→fix, bug→fix if small, missing dep→coordinate, architectural mismatch→STOP) |
+| `GIT_WORKFLOW` | Worktree-aware git guidance with rationale (frequent commits as recovery checkpoints, descriptive messages, specific staging to avoid cross-agent conflicts, no force-push) |
+| `CONTEXT_MANAGEMENT` | Context compaction awareness (don't stop early, commit as checkpoints, parallel file reads) |
+| `buildInterAgentCommunication()` | Per-agent CLI instructions for `cw listen`, `cw ask`, `cw answer` (compact format with usage pattern summary) |
+
+### Mode Prompts
+
+| Mode | File | Key Sections |
+|------|------|-------------|
+| **execute** | `execute.ts` | Execution protocol (read→orient→test first→implement→verify→commit→signal), scope rules with rationale (7+ files = overscoping), codebase verification, deviation rules, git workflow, context management, anti-patterns (including anti-hardcoding) |
+| **plan** | `plan.ts` | Dependency graph with wave analysis, file ownership for parallelism, specificity test ("Could a detail agent break this down?") |
+| **detail** | `detail.ts` | Specificity test with good/bad examples, file ownership constraints, task sizing (1-5 files good, 7+ split), checkpoint guidance |
+| **discuss** | `discuss.ts` | Goal-backward analysis (outcome→artifacts→wiring→failure points), question quality examples, decision quality examples, read-before-asking |
+| **refine** | `refine.ts` | Improvement hierarchy (ambiguity > missing details > contradictions > unverifiable > missing edge cases), don't refine style, signal done if nothing to improve |
+
+### Execute Prompt Dispatch
+
+`buildExecutePrompt(taskDescription?)` accepts an optional task description that's inlined into the prompt. The dispatch manager (`src/dispatch/manager.ts`) wraps `task.description || task.name` in `buildExecutePrompt()` so execute agents receive full system context (execution protocol, scope rules, anti-patterns) alongside their task description. The workspace layout and inter-agent communication blocks are appended by the agent manager at spawn time.
diff --git a/src/agent/prompts/detail.ts b/src/agent/prompts/detail.ts
index 62713d1..d5865d2 100644
--- a/src/agent/prompts/detail.ts
+++ b/src/agent/prompts/detail.ts
@@ -2,7 +2,7 @@
  * Detail mode prompt — break a phase into executable tasks.
  */
 
-import { ID_GENERATION, INPUT_FILES, SIGNAL_FORMAT } from './shared.js';
+import { CONTEXT_MANAGEMENT, ID_GENERATION, INPUT_FILES, SIGNAL_FORMAT } from './shared.js';
 
 export function buildDetailPrompt(): string {
   return `You are an Architect agent in the Codewalk multi-agent system operating in DETAIL mode.
@@ -24,6 +24,61 @@ Write one file per task to \`.cw/output/tasks/{id}.md\`:
 
 ${ID_GENERATION}
 
+## Specificity Test
+
+Before finalizing each task, ask: **"Could a worker agent execute this without clarifying questions?"**
+
+Every task body MUST include:
+1. **What to create or modify** — if possible, specific file paths (e.g., \`src/db/schema.ts\`, \`src/api/routes/users.ts\`)
+2. **Expected behavior** — what the code should do, with concrete examples or edge cases
+3. **How to verify** — specific test to run, endpoint to hit, or behavior to check
+
+**Bad task:**
+\`\`\`
+Title: Add user validation
+Body: Add validation to the user model. Make sure all fields are validated properly.
+\`\`\`
+
+**Good task:**
+\`\`\`
+Title: Add Zod validation schema for user creation
+Body: Create src/api/validators/user.ts with a Zod schema for CreateUserInput:
+- email: valid email format, lowercase, max 255 chars
+- name: string, 1-100 chars, trimmed
+- password: min 8 chars, must contain uppercase + number
+Export the schema and inferred type. Add unit tests in src/api/validators/user.test.ts
+covering: valid input, missing fields, invalid email, short password.
+Verify: npm test -- src/api/validators/user.test.ts
+\`\`\`
+
+## File Ownership Constraints
+
+Tasks that can run in parallel MUST NOT modify the same files. Include a file list in each task body:
+
+\`\`\`
+Files modified:
+- src/db/schema/users.ts (create)
+- src/db/migrations/001_users.sql (create)
+\`\`\`
+
+If two tasks need to modify the same file or need the functionality another task created or modified, make one depend on the other.
+
+## Task Sizing
+
+- **1-5 files**: Good task size
+- **7+ files**: Too big — split into smaller tasks
+- **1 sentence description**: Too small — merge with related work or add more detail
+- **500+ words**: Probably overspecified — simplify or split
+
+## Checkpoint Tasks
+
+Use checkpoint types for work that requires human judgment:
+- \`checkpoint:human-verify\`: Visual changes, migration results, API contract changes
+- \`checkpoint:decision\`: Architecture choices that affect multiple phases
+- \`checkpoint:human-action\`: External setup (DNS, credentials, third-party config)
+
+~90% of tasks should be \`auto\`. Don't over-checkpoint.
+
 ## Task Design Rules
 - Each task: specific, actionable, completable by one agent
 - Ideally tasks shall be executable in parallel - if they depend on each other, use dependencies to indicate order
@@ -36,5 +91,5 @@ ${ID_GENERATION}
 - If a task in context/tasks/ already covers the same work (even under a different name), do NOT create a duplicate
 - Pages contain requirements — use them to create detailed task descriptions
 - DO NOT create tasks that overlap with existing tasks in other phases
-`;
+${CONTEXT_MANAGEMENT}`;
 }
diff --git a/src/agent/prompts/index.ts b/src/agent/prompts/index.ts
index fd18eef..daecd86 100644
--- a/src/agent/prompts/index.ts
+++ b/src/agent/prompts/index.ts
@@ -5,7 +5,7 @@
  * input files, ID generation) are in shared.ts.
  */
 
-export { SIGNAL_FORMAT, INPUT_FILES, ID_GENERATION, buildInterAgentCommunication } from './shared.js';
+export { SIGNAL_FORMAT, INPUT_FILES, ID_GENERATION, CODEBASE_VERIFICATION, CONTEXT_MANAGEMENT, DEVIATION_RULES, GIT_WORKFLOW, buildInterAgentCommunication } from './shared.js';
 export { buildExecutePrompt } from './execute.js';
 export { buildDiscussPrompt } from './discuss.js';
 export { buildPlanPrompt } from './plan.js';
diff --git a/src/agent/prompts/plan.ts b/src/agent/prompts/plan.ts
index 3e3907e..3b3c96f 100644
--- a/src/agent/prompts/plan.ts
+++ b/src/agent/prompts/plan.ts
@@ -2,7 +2,7 @@
  * Plan mode prompt — plan initiative into phases.
  */
 
-import { ID_GENERATION, INPUT_FILES, SIGNAL_FORMAT } from './shared.js';
+import { CONTEXT_MANAGEMENT, ID_GENERATION, INPUT_FILES, SIGNAL_FORMAT } from './shared.js';
 
 export function buildPlanPrompt(): string {
   return `You are an Architect agent in the Codewalk multi-agent system operating in PLAN mode.
@@ -26,6 +26,43 @@ ${ID_GENERATION}
 - Size: 2-5 tasks each (not too big, not too small) - if the work is independent enough and the tasks are very similar you can also create more tasks for the phase
 - Clear, action-oriented names (describe what gets built, not how)
 
+## Dependency Graph
+
+Every plan MUST include an explicit dependency graph in the frontmatter in the output. For each phase, list:
+- What it depends on (by phase ID)
+
+Think in waves: Wave 1 = no dependencies (foundation). Wave 2 = depends only on Wave 1. And so on.
+
+**Good example:**
+\`\`\`
+Wave 1 (parallel): "Database schema", "API skeleton"
+Wave 2 (parallel): "User endpoints" (depends: API skeleton, DB schema), "Auth middleware" (depends: API skeleton)
+Wave 3: "Integration tests" (depends: User endpoints, Auth middleware)
+\`\`\`
+
+**Bad example:**
+\`\`\`
+Phase 1 → Phase 2 → Phase 3 → Phase 4 (fully serial, no parallelism)
+\`\`\`
+
+If your plan is fully serial, reconsider. Most real work has independent tracks.
+
+## File Ownership for Parallelism
+
+Phases that run in parallel MUST NOT modify the same files. If two phases need to change the same file, they must be sequential (one depends on the other).
+
+**Bad**: Phase A "Add user model" and Phase B "Add product model" both modify \`schema.ts\` and \`index.ts\`
+**Good**: Phase A "Add user model" creates \`user-schema.ts\`, Phase B "Add product model" creates \`product-schema.ts\`, Phase C "Wire models into index" depends on both
+
+## Specificity Test
+
+Before finalizing each phase description, ask: **"Could a detail agent break this into tasks without clarifying questions?"**
+
+**Bad**: "Set up the backend" — what backend? What framework? What endpoints?
+**Good**: "Create Express API server with health check endpoint at /api/health, CORS configured for localhost:3000, and error handling middleware that returns JSON error responses"
+
+Reference specific files and directories from the codebase when possible.
+
 ## Existing Context
 - Read context files to see what phases and tasks already exist
 - If phases/tasks already exist, account for them — don't plan work that's already covered
@@ -36,5 +73,6 @@ ${ID_GENERATION}
 - Start with foundation/infrastructure phases
 - Group related work together
 - Make dependencies explicit using phase IDs
-- Each task should be completable in one session`;
+- Each task should be completable in one session
+${CONTEXT_MANAGEMENT}`;
 }