feat: add backfill-metrics script and cw backfill-metrics CLI command
Populates the agent_metrics table from existing agent_log_chunks data after the schema migration. Reads chunks in batches of 500, accumulates per-agent counts in memory, then upserts with additive ON CONFLICT DO UPDATE to match the ongoing insertChunk write-path behavior. - apps/server/scripts/backfill-metrics.ts: core backfillMetrics(db) + CLI wrapper backfillMetricsFromPath(dbPath) - apps/server/scripts/backfill-metrics.test.ts: 8 tests covering all chunk types, malformed JSON, isolation, empty DB, and re-run double-count behavior - apps/server/cli/index.ts: new top-level `cw backfill-metrics [--db <path>]` command - docs/database-migrations.md: Post-migration backfill scripts section documenting when and how to run the script Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
131
apps/server/scripts/backfill-metrics.test.ts
Normal file
131
apps/server/scripts/backfill-metrics.test.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
/**
|
||||
* Tests for the backfill-metrics script.
|
||||
*
|
||||
* Uses an in-memory test database to verify that backfillMetrics correctly
|
||||
* accumulates counts from agent_log_chunks and upserts into agent_metrics.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { createTestDatabase } from '../db/repositories/drizzle/test-helpers.js';
|
||||
import type { DrizzleDatabase } from '../db/index.js';
|
||||
import { agentLogChunks, agentMetrics } from '../db/index.js';
|
||||
import { backfillMetrics } from './backfill-metrics.js';
|
||||
import { nanoid } from 'nanoid';
|
||||
import { eq } from 'drizzle-orm';
|
||||
|
||||
async function insertChunk(db: DrizzleDatabase, agentId: string, content: object | string) {
|
||||
await db.insert(agentLogChunks).values({
|
||||
id: nanoid(),
|
||||
agentId,
|
||||
agentName: 'test-agent',
|
||||
sessionNumber: 1,
|
||||
content: typeof content === 'string' ? content : JSON.stringify(content),
|
||||
createdAt: new Date(),
|
||||
});
|
||||
}
|
||||
|
||||
describe('backfillMetrics', () => {
|
||||
let db: DrizzleDatabase;
|
||||
|
||||
beforeEach(() => {
|
||||
db = createTestDatabase();
|
||||
});
|
||||
|
||||
it('AskUserQuestion chunks — questionsCount correct', async () => {
|
||||
await insertChunk(db, 'agent-a', { type: 'tool_use', name: 'AskUserQuestion', input: { questions: [{}, {}] } });
|
||||
await insertChunk(db, 'agent-a', { type: 'tool_use', name: 'AskUserQuestion', input: { questions: [{}] } });
|
||||
|
||||
await backfillMetrics(db);
|
||||
|
||||
const rows = await db.select().from(agentMetrics).where(eq(agentMetrics.agentId, 'agent-a'));
|
||||
expect(rows).toHaveLength(1);
|
||||
expect(rows[0].questionsCount).toBe(3);
|
||||
expect(rows[0].subagentsCount).toBe(0);
|
||||
expect(rows[0].compactionsCount).toBe(0);
|
||||
});
|
||||
|
||||
it('Agent tool chunks — subagentsCount correct', async () => {
|
||||
await insertChunk(db, 'agent-b', { type: 'tool_use', name: 'Agent' });
|
||||
await insertChunk(db, 'agent-b', { type: 'tool_use', name: 'Agent' });
|
||||
|
||||
await backfillMetrics(db);
|
||||
|
||||
const rows = await db.select().from(agentMetrics).where(eq(agentMetrics.agentId, 'agent-b'));
|
||||
expect(rows).toHaveLength(1);
|
||||
expect(rows[0].questionsCount).toBe(0);
|
||||
expect(rows[0].subagentsCount).toBe(2);
|
||||
expect(rows[0].compactionsCount).toBe(0);
|
||||
});
|
||||
|
||||
it('Compaction chunks — compactionsCount correct', async () => {
|
||||
await insertChunk(db, 'agent-c', { type: 'system', subtype: 'init', source: 'compact' });
|
||||
|
||||
await backfillMetrics(db);
|
||||
|
||||
const rows = await db.select().from(agentMetrics).where(eq(agentMetrics.agentId, 'agent-c'));
|
||||
expect(rows).toHaveLength(1);
|
||||
expect(rows[0].questionsCount).toBe(0);
|
||||
expect(rows[0].subagentsCount).toBe(0);
|
||||
expect(rows[0].compactionsCount).toBe(1);
|
||||
});
|
||||
|
||||
it('Irrelevant chunk type — no metrics row created', async () => {
|
||||
await insertChunk(db, 'agent-d', { type: 'text', text: 'hello' });
|
||||
|
||||
await backfillMetrics(db);
|
||||
|
||||
const rows = await db.select().from(agentMetrics).where(eq(agentMetrics.agentId, 'agent-d'));
|
||||
expect(rows).toEqual([]);
|
||||
});
|
||||
|
||||
it('Malformed JSON chunk — skipped, no crash', async () => {
|
||||
await insertChunk(db, 'agent-e', 'not-valid-json');
|
||||
await insertChunk(db, 'agent-e', { type: 'tool_use', name: 'Agent' });
|
||||
|
||||
await expect(backfillMetrics(db)).resolves.not.toThrow();
|
||||
|
||||
const rows = await db.select().from(agentMetrics).where(eq(agentMetrics.agentId, 'agent-e'));
|
||||
expect(rows).toHaveLength(1);
|
||||
expect(rows[0].subagentsCount).toBe(1);
|
||||
});
|
||||
|
||||
it('Multiple agents — counts isolated per agent', async () => {
|
||||
await insertChunk(db, 'agent-f', { type: 'tool_use', name: 'AskUserQuestion', input: { questions: [{}, {}, {}] } });
|
||||
await insertChunk(db, 'agent-f', { type: 'tool_use', name: 'AskUserQuestion', input: { questions: [{}, {}, {}] } });
|
||||
await insertChunk(db, 'agent-g', { type: 'tool_use', name: 'Agent' });
|
||||
|
||||
await backfillMetrics(db);
|
||||
|
||||
const rowsF = await db.select().from(agentMetrics).where(eq(agentMetrics.agentId, 'agent-f'));
|
||||
expect(rowsF).toHaveLength(1);
|
||||
expect(rowsF[0].questionsCount).toBe(6);
|
||||
expect(rowsF[0].subagentsCount).toBe(0);
|
||||
expect(rowsF[0].compactionsCount).toBe(0);
|
||||
|
||||
const rowsG = await db.select().from(agentMetrics).where(eq(agentMetrics.agentId, 'agent-g'));
|
||||
expect(rowsG).toHaveLength(1);
|
||||
expect(rowsG[0].questionsCount).toBe(0);
|
||||
expect(rowsG[0].subagentsCount).toBe(1);
|
||||
expect(rowsG[0].compactionsCount).toBe(0);
|
||||
});
|
||||
|
||||
it('Empty database — completes without error', async () => {
|
||||
await expect(backfillMetrics(db)).resolves.not.toThrow();
|
||||
|
||||
const rows = await db.select().from(agentMetrics);
|
||||
expect(rows).toEqual([]);
|
||||
});
|
||||
|
||||
it('Re-run idempotency note — second run doubles counts', async () => {
|
||||
// Documented behavior: run only once against a fresh agent_metrics table
|
||||
await insertChunk(db, 'agent-h', { type: 'tool_use', name: 'Agent' });
|
||||
|
||||
await backfillMetrics(db);
|
||||
const rowsAfterFirst = await db.select().from(agentMetrics).where(eq(agentMetrics.agentId, 'agent-h'));
|
||||
expect(rowsAfterFirst[0].subagentsCount).toBe(1);
|
||||
|
||||
await backfillMetrics(db);
|
||||
const rowsAfterSecond = await db.select().from(agentMetrics).where(eq(agentMetrics.agentId, 'agent-h'));
|
||||
expect(rowsAfterSecond[0].subagentsCount).toBe(2);
|
||||
});
|
||||
});
|
||||
128
apps/server/scripts/backfill-metrics.ts
Normal file
128
apps/server/scripts/backfill-metrics.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
/**
|
||||
* Backfill script for agent_metrics table.
|
||||
*
|
||||
* Reads all existing agent_log_chunks rows and populates agent_metrics with
|
||||
* accumulated counts of questions, subagent spawns, and compaction events.
|
||||
*
|
||||
* Intended to be run once per production database after applying the migration
|
||||
* that introduces the agent_metrics table.
|
||||
*
|
||||
* Idempotency note: Uses ON CONFLICT DO UPDATE with additive increments to match
|
||||
* the ongoing insertChunk write-path behavior. Running against an empty
|
||||
* agent_metrics table is fully safe. Running a second time will double-count —
|
||||
* only run this script once per database, immediately after applying the migration.
|
||||
*/
|
||||
|
||||
import { asc, sql } from 'drizzle-orm';
|
||||
import { createDatabase, DrizzleDatabase, agentLogChunks, agentMetrics } from '../db/index.js';
|
||||
|
||||
const BATCH_SIZE = 500;
|
||||
const LOG_EVERY = 1000;
|
||||
|
||||
/**
|
||||
* Core backfill function. Accepts a DrizzleDatabase for testability.
|
||||
*/
|
||||
export async function backfillMetrics(db: DrizzleDatabase): Promise<void> {
|
||||
const accumulator = new Map<string, { questionsCount: number; subagentsCount: number; compactionsCount: number }>();
|
||||
let offset = 0;
|
||||
let totalChunks = 0;
|
||||
let malformedCount = 0;
|
||||
|
||||
while (true) {
|
||||
const batch = await db
|
||||
.select({ agentId: agentLogChunks.agentId, content: agentLogChunks.content })
|
||||
.from(agentLogChunks)
|
||||
.orderBy(asc(agentLogChunks.createdAt))
|
||||
.limit(BATCH_SIZE)
|
||||
.offset(offset);
|
||||
|
||||
if (batch.length === 0) break;
|
||||
|
||||
for (const chunk of batch) {
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(chunk.content);
|
||||
} catch {
|
||||
malformedCount++;
|
||||
totalChunks++;
|
||||
if (totalChunks % LOG_EVERY === 0) {
|
||||
console.log(`Processed ${totalChunks} chunks...`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (typeof parsed !== 'object' || parsed === null) {
|
||||
totalChunks++;
|
||||
if (totalChunks % LOG_EVERY === 0) {
|
||||
console.log(`Processed ${totalChunks} chunks...`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const obj = parsed as Record<string, unknown>;
|
||||
const type = obj['type'];
|
||||
const name = obj['name'];
|
||||
|
||||
if (type === 'tool_use' && name === 'AskUserQuestion') {
|
||||
const input = obj['input'] as Record<string, unknown> | undefined;
|
||||
const questions = input?.['questions'];
|
||||
const count = Array.isArray(questions) ? questions.length : 0;
|
||||
if (count > 0) {
|
||||
const entry = accumulator.get(chunk.agentId) ?? { questionsCount: 0, subagentsCount: 0, compactionsCount: 0 };
|
||||
entry.questionsCount += count;
|
||||
accumulator.set(chunk.agentId, entry);
|
||||
}
|
||||
} else if (type === 'tool_use' && name === 'Agent') {
|
||||
const entry = accumulator.get(chunk.agentId) ?? { questionsCount: 0, subagentsCount: 0, compactionsCount: 0 };
|
||||
entry.subagentsCount += 1;
|
||||
accumulator.set(chunk.agentId, entry);
|
||||
} else if (type === 'system' && obj['subtype'] === 'init' && obj['source'] === 'compact') {
|
||||
const entry = accumulator.get(chunk.agentId) ?? { questionsCount: 0, subagentsCount: 0, compactionsCount: 0 };
|
||||
entry.compactionsCount += 1;
|
||||
accumulator.set(chunk.agentId, entry);
|
||||
}
|
||||
|
||||
totalChunks++;
|
||||
if (totalChunks % LOG_EVERY === 0) {
|
||||
console.log(`Processed ${totalChunks} chunks...`);
|
||||
}
|
||||
}
|
||||
|
||||
offset += BATCH_SIZE;
|
||||
}
|
||||
|
||||
// Upsert accumulated counts into agent_metrics.
|
||||
// Uses additive ON CONFLICT DO UPDATE to match the ongoing insertChunk behavior.
|
||||
for (const [agentId, counts] of accumulator) {
|
||||
await db
|
||||
.insert(agentMetrics)
|
||||
.values({
|
||||
agentId,
|
||||
questionsCount: counts.questionsCount,
|
||||
subagentsCount: counts.subagentsCount,
|
||||
compactionsCount: counts.compactionsCount,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.onConflictDoUpdate({
|
||||
target: agentMetrics.agentId,
|
||||
set: {
|
||||
questionsCount: sql`${agentMetrics.questionsCount} + ${counts.questionsCount}`,
|
||||
subagentsCount: sql`${agentMetrics.subagentsCount} + ${counts.subagentsCount}`,
|
||||
compactionsCount: sql`${agentMetrics.compactionsCount} + ${counts.compactionsCount}`,
|
||||
updatedAt: new Date(),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
console.log(
|
||||
`Backfill complete: ${accumulator.size} agents updated, ${totalChunks} chunks processed, ${malformedCount} malformed chunks skipped`
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* CLI wrapper — opens a database from a path, then delegates to backfillMetrics.
|
||||
*/
|
||||
export async function backfillMetricsFromPath(dbPath: string): Promise<void> {
|
||||
const db = createDatabase(dbPath);
|
||||
await backfillMetrics(db);
|
||||
}
|
||||
Reference in New Issue
Block a user