From cf8c6aefdcfde3e4cf45ecdd555a9b808ba79f18 Mon Sep 17 00:00:00 2001 From: chenhaoli Date: Fri, 10 Oct 2025 08:22:35 +0800 Subject: [PATCH 1/6] refactor(agent-snapshot): improve code quality and maintainability - Remove complex prototype chain manipulation from AgentSnapshot constructor - Simplify AgentSnapshot to focus on core snapshot generation and replay functionality - Add generic verification method to reduce code duplication in SnapshotManager - Consolidate repeated file path operations with helper methods - Improve README with comprehensive documentation and examples - Remove unnecessary type assertions and comments - Maintain API compatibility while improving internal structure Breaking Changes: None - all public APIs remain unchanged --- multimodal/tarko/agent-snapshot/README.md | 208 +++++++++- .../src/agent-replay-snapshot-hook.ts | 3 +- .../agent-snapshot/src/agent-snapshot.ts | 362 ++++-------------- .../agent-snapshot/src/snapshot-manager.ts | 228 ++++------- 4 files changed, 357 insertions(+), 444 deletions(-) diff --git a/multimodal/tarko/agent-snapshot/README.md b/multimodal/tarko/agent-snapshot/README.md index 5798fdc042..250312878e 100644 --- a/multimodal/tarko/agent-snapshot/README.md +++ b/multimodal/tarko/agent-snapshot/README.md @@ -1,6 +1,14 @@ # @tarko/agent-snapshot -A snapshot-based agent test framework for `@tarko/agent` based Agents", +A snapshot-based testing framework for `@tarko/agent` based Agents. This package provides deterministic testing capabilities by capturing and replaying agent interactions, including LLM requests/responses, tool calls, and event streams. + +## Features + +- **Snapshot Generation**: Capture real agent interactions for test fixtures +- **Deterministic Replay**: Mock LLM responses using captured snapshots +- **Comprehensive Verification**: Validate LLM requests, event streams, and tool calls +- **Flexible Configuration**: Customize normalization and verification settings +- **CLI Support**: Command-line tools for snapshot management ## Installation @@ -8,5 +16,201 @@ A snapshot-based agent test framework for `@tarko/agent` based Agents", npm install @tarko/agent-snapshot ``` -## Usage +## Quick Start + +### Basic Usage + +```typescript +import { Agent } from '@tarko/agent'; +import { AgentSnapshot } from '@tarko/agent-snapshot'; + +// Create your agent +const agent = new Agent(/* your config */); + +// Create snapshot instance +const snapshot = new AgentSnapshot(agent, { + snapshotPath: './fixtures/my-test-case', + snapshotName: 'example-test' +}); + +// Generate snapshot (runs with real LLM) +await snapshot.generate("Hello, how can you help me?"); + +// Replay test (uses mocked responses) +const result = await snapshot.replay("Hello, how can you help me?"); +``` + +### Advanced Configuration + +```typescript +const snapshot = new AgentSnapshot(agent, { + snapshotPath: './fixtures/complex-test', + updateSnapshots: false, + normalizerConfig: { + fieldsToNormalize: [ + { pattern: /timestamp/i, replacement: '<>' }, + { pattern: 'id', replacement: '<>' } + ], + fieldsToIgnore: ['debug_info'] + }, + verification: { + verifyLLMRequests: true, + verifyEventStreams: true, + verifyToolCalls: true + } +}); +``` + +## API Reference + +### AgentSnapshot + +The main class for managing agent snapshots. + +#### Constructor + +```typescript +new AgentSnapshot(agent: Agent, options: AgentSnapshotOptions) +``` + +#### Methods + +- `generate(runOptions: AgentRunOptions): Promise` +- `replay(runOptions: AgentRunOptions, config?: TestRunConfig): Promise` +- `getAgent(): Agent` +- `getCurrentLoop(): number` + +### AgentSnapshotRunner + +Utility class for managing multiple test cases. + +```typescript +const runner = new AgentSnapshotRunner([ + { + name: 'basic-chat', + path: './test-cases/basic-chat.ts', + snapshotPath: './fixtures/basic-chat' + } +]); + +// Generate all snapshots +await runner.generateAll(); + +// Run all tests +await runner.replayAll(); +``` + +## Configuration Options + +### AgentSnapshotOptions + +```typescript +interface AgentSnapshotOptions { + snapshotPath: string; // Directory for snapshots + snapshotName?: string; // Test case name + updateSnapshots?: boolean; // Update mode flag + normalizerConfig?: AgentNormalizerConfig; + verification?: { + verifyLLMRequests?: boolean; + verifyEventStreams?: boolean; + verifyToolCalls?: boolean; + }; +} +``` + +### Normalizer Configuration + +The normalizer helps create stable snapshots by replacing dynamic values: + +```typescript +interface AgentNormalizerConfig { + fieldsToNormalize?: Array<{ + pattern: string | RegExp; + replacement?: any; + deep?: boolean; + }>; + fieldsToIgnore?: (string | RegExp)[]; + customNormalizers?: Array<{ + pattern: string | RegExp; + normalizer: (value: any, path: string) => any; + }>; +} +``` + +## Snapshot Structure + +Generated snapshots follow this directory structure: + +``` +fixtures/ +└── test-case-name/ + ├── initial/ + │ └── event-stream.jsonl + ├── loop-1/ + │ ├── llm-request.jsonl + │ ├── llm-response.jsonl + │ ├── event-stream.jsonl + │ └── tool-calls.jsonl + ├── loop-2/ + │ └── ... + └── event-stream.jsonl +``` + +## CLI Usage + +```bash +# Generate snapshots +npx agent-snapshot generate my-test-case + +# Run tests +npx agent-snapshot replay my-test-case + +# Update snapshots +npx agent-snapshot replay my-test-case --updateSnapshot +``` + +## Best Practices + +1. **Stable Test Data**: Use the normalizer to handle dynamic values like timestamps and IDs +2. **Focused Tests**: Create separate snapshots for different scenarios +3. **Version Control**: Commit snapshots to ensure consistent test behavior +4. **Update Mode**: Use `--updateSnapshot` carefully and review changes +5. **Verification Settings**: Adjust verification options based on test requirements + +## Troubleshooting + +### Common Issues + +- **Snapshot Mismatch**: Check normalizer configuration for dynamic fields +- **Missing Snapshots**: Ensure snapshots are generated before running tests +- **Loop Count Errors**: Verify agent behavior consistency between runs + +### Debug Tips + +- Enable detailed logging by setting appropriate log levels +- Use `.actual.jsonl` files to compare expected vs actual data +- Review snapshot directory structure for completeness + +## Integration with Testing Frameworks + +### Vitest Example + +```typescript +import { describe, it, expect } from 'vitest'; +import { AgentSnapshot } from '@tarko/agent-snapshot'; + +describe('Agent Tests', () => { + it('should handle basic conversation', async () => { + const snapshot = new AgentSnapshot(agent, { + snapshotPath: './fixtures/basic-conversation' + }); + + const result = await snapshot.replay("Hello world"); + expect(result.meta.loopCount).toBe(1); + }); +}); +``` + +## License +Apache-2.0 \ No newline at end of file diff --git a/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts b/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts index 066984b238..41857a4241 100644 --- a/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts +++ b/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts @@ -327,8 +327,7 @@ export class AgentReplaySnapshotHook extends AgentHookBase { await this.snapshotManager.verifyRequestSnapshot( path.basename(this.snapshotPath), loopDir, - // @ts-expect-error - payload, + payload as unknown as Record, this.updateSnapshots, ); } catch (error) { diff --git a/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts b/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts index a45280fd27..cebcf0113c 100644 --- a/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts +++ b/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts @@ -1,5 +1,3 @@ -/* eslint-disable @typescript-eslint/ban-types */ -/* eslint-disable @typescript-eslint/no-explicit-any */ /* * Copyright (c) 2025 Bytedance, Inc. and its affiliates. * SPDX-License-Identifier: Apache-2.0 @@ -10,10 +8,7 @@ import fs from 'fs'; import { Agent } from '@tarko/agent'; import { AgentRunOptions, - AgentRunObjectOptions, AgentEventStream, - isStreamingOptions, - isAgentRunObjectOptions, } from '@tarko/agent-interface'; import { AgentSnapshotOptions, @@ -21,11 +16,11 @@ import { SnapshotRunResult, TestRunConfig, } from './types'; +import { AgentNormalizerConfig } from './utils/snapshot-normalizer'; import { SnapshotManager } from './snapshot-manager'; import { AgentGenerateSnapshotHook } from './agent-generate-snapshot-hook'; import { AgentReplaySnapshotHook } from './agent-replay-snapshot-hook'; import { logger } from './utils/logger'; -import { AgentNormalizerConfig } from './utils/snapshot-normalizer'; /** * Agent Snapshot - Core class for managing agent snapshots and test execution @@ -35,202 +30,86 @@ import { AgentNormalizerConfig } from './utils/snapshot-normalizer'; * 2. Running tests using previously captured snapshots */ export class AgentSnapshot { - private hostedAgent: Agent; + private agent: Agent; private options: AgentSnapshotOptions; private snapshotPath: string; private snapshotName: string; private snapshotManager: SnapshotManager; private replayHook: AgentReplaySnapshotHook; private generateHook: AgentGenerateSnapshotHook | null = null; - /** - * Create a new AgentSnapshot instance - * - * @param agent The agent instance to snapshot/test - * @param options Configuration options - */ + constructor(agent: Agent, options: AgentSnapshotOptions) { - this.hostedAgent = agent; + this.agent = agent; this.options = options; - this.snapshotPath = options.snapshotPath || path.join(process.cwd(), 'fixtures'); - this.snapshotName = options.snapshotName ?? path.basename(options.snapshotPath); + this.snapshotName = options.snapshotName ?? path.basename(this.snapshotPath); this.snapshotManager = new SnapshotManager(this.snapshotPath, options.normalizerConfig); this.replayHook = new AgentReplaySnapshotHook(agent, { - snapshotPath: this.options.snapshotPath || path.join(process.cwd(), 'fixtures'), + snapshotPath: this.snapshotPath, snapshotName: this.snapshotName, }); - // Create directory if it doesn't exist + this.ensureSnapshotDirectory(); + } + + private ensureSnapshotDirectory(): void { if (!fs.existsSync(this.snapshotPath)) { fs.mkdirSync(this.snapshotPath, { recursive: true }); } - - const agentSnapshotProto = Object.getPrototypeOf(this); - const methodsToPreserve: Record = {}; - - Object.getOwnPropertyNames(agentSnapshotProto).forEach((key) => { - const descriptor = Object.getOwnPropertyDescriptor(agentSnapshotProto, key); - if (typeof descriptor?.value === 'function' && key !== 'constructor') { - methodsToPreserve[key] = (this[key as keyof this] as Function).bind(this); - } - }); - - // Set prototype chain to inherit from the original agent - Object.setPrototypeOf(this, Object.getPrototypeOf(agent)); - - // Copy own properties from the original agent to this instance - Object.getOwnPropertyNames(agent).forEach((prop) => { - if (!(prop in this)) { - Object.defineProperty(this, prop, { - get: () => agent[prop as keyof Agent], - set: (value) => { - (agent as any)[prop] = value; - }, - configurable: true, - }); - } - }); - - Object.entries(methodsToPreserve).forEach(([key, method]) => { - (this[key as keyof this] as unknown) = method; - }); } - /** - * Run method with interface aligned with Agent.run - * - * This method serves as a transparent wrapper around the agent's run method - * while simultaneously generating a snapshot of the interaction. - * - * @param input - String input for a basic text message - * @returns The final response event from the agent (stream is false) - */ - async run(input: string): Promise; - - /** - * Run method with interface aligned with Agent.run - * - * @param options - Object with input and optional configuration - * @returns The final response event from the agent (when stream is false) - */ - async run( - options: AgentRunObjectOptions & { stream?: false }, - ): Promise; - - /** - * Run method with interface aligned with Agent.run - * - * @param options - Object with input and streaming enabled - * @returns An async iterable of streaming events - */ - async run( - options: AgentRunObjectOptions & { stream: true }, - ): Promise>; - - /** - * Implementation of the run method to handle all overload cases - * This is a facade that matches Agent.run's interface exactly while generating snapshots - * - * @param runOptions - Input options - */ - async run( - runOptions: AgentRunOptions, - ): Promise> { - logger.info( - `AgentSnapshot.run called with ${typeof runOptions === 'string' ? 'string' : 'options object'}`, - ); - - // Initialize the snapshot generation hook if needed - if (!this.generateHook) { - this.generateHook = new AgentGenerateSnapshotHook(this.hostedAgent, { - snapshotPath: this.options.snapshotPath, - snapshotName: this.snapshotName, - }); + private getLoopCount(): number { + if (!fs.existsSync(this.snapshotPath)) { + return 0; } - // Set current run options and hook into agent - this.generateHook.setCurrentRunOptions(runOptions); - this.generateHook.hookAgent(); + const loopDirs = fs + .readdirSync(this.snapshotPath) + .filter( + (dir) => dir.startsWith('loop-') && fs.statSync(path.join(this.snapshotPath, dir)).isDirectory(), + ) + .sort((a, b) => { + const numA = parseInt(a.split('-')[1], 10); + const numB = parseInt(b.split('-')[1], 10); + return numA - numB; + }); - try { - // Determine if this is a streaming request - const isStreaming = - typeof runOptions === 'object' && - isAgentRunObjectOptions(runOptions) && - isStreamingOptions(runOptions); - - // Run the agent with the provided options - logger.info(`Executing agent with ${isStreaming ? 'streaming' : 'non-streaming'} mode`); - // Call run on the original agent to ensure correct this binding - // @ts-expect-error FIXME: remove string type. - const response = await this.hostedAgent.run(runOptions); - - // Return the response directly to maintain the same interface as Agent.run - return response; - } catch (error) { - logger.error(`Error during AgentSnapshot.run: ${error}`); - throw error; - } finally { - // We don't unhook here as the response might be an AsyncIterable that's consumed later - // The hook will be cleaned up when the agent is done processing - if (this.generateHook) { - this.generateHook.clearError(); - } - } + return loopDirs.length; } /** * Generate a snapshot by executing the agent with real LLM calls - * - * @param runOptions Options to pass to the agent's run method - * @returns Snapshot generation result */ async generate(runOptions: AgentRunOptions): Promise { - // Create unique test name if not provided - const snapshotName = this.snapshotName || `agent-snapshot-${Date.now()}`; - - // Initialize hook manager - this.generateHook = new AgentGenerateSnapshotHook(this.hostedAgent, { - snapshotPath: this.options.snapshotPath || path.join(process.cwd(), 'fixtures'), - snapshotName: snapshotName, + const startTime = Date.now(); + + this.generateHook = new AgentGenerateSnapshotHook(this.agent, { + snapshotPath: this.snapshotPath, + snapshotName: this.snapshotName, }); - if (this.snapshotPath) { - if (!fs.existsSync(this.snapshotPath)) { - fs.mkdirSync(this.snapshotPath, { recursive: true }); - } - } - - logger.info(`Starting snapshot generation for '${snapshotName}'`); - const startTime = Date.now(); + this.ensureSnapshotDirectory(); + logger.info(`Starting snapshot generation for '${this.snapshotName}'`); - // Set current run options and hook into agent this.generateHook.setCurrentRunOptions(runOptions); this.generateHook.hookAgent(); try { - // Run the agent with real LLM - // @ts-expect-error FIXME: remove string type. - const response = await this.hostedAgent.run(runOptions); + const response = await this.agent.run(runOptions as any); - // Check if there was an error in any hook if (this.generateHook.hasError()) { const error = this.generateHook.getLastError(); logger.error(`Error occurred during snapshot generation: ${error?.message}`); throw error; } - // Get all events from event stream - const events = this.hostedAgent.getEventStream().getEvents(); - - // Count the number of loops by checking directories created - const snapshotPath = path.join(this.options.snapshotPath); - const loopCount = this.countLoops(snapshotPath); + const events = this.agent.getEventStream().getEvents(); + const loopCount = this.getLoopCount(); logger.success(`Successfully generated snapshot with ${loopCount} loops`); return { - snapshotPath, + snapshotPath: this.snapshotPath, loopCount, response, events, @@ -240,152 +119,73 @@ export class AgentSnapshot { }, }; } catch (error) { - // Capture any errors from the agent or hooks logger.error(`Snapshot generation failed: ${error}`); throw error; } finally { - // Since the asynchronous iterator will be consumed in the outer layer, we don't unhook here - // But we should clear any errors to prepare for the next run - if (this.generateHook) { - this.generateHook.clearError(); - } + this.generateHook?.clearError(); } } /** * Run the agent using previously captured snapshots - * - * @param runOptions Options to pass to the agent's run method - * @param config Optional test run configuration - * @returns Test execution result */ async replay(runOptions: AgentRunOptions, config?: TestRunConfig): Promise { - // Get test configuration - const snapshotName = this.options.snapshotName || path.basename(this.options.snapshotPath); const updateSnapshots = config?.updateSnapshots || this.options.updateSnapshots || false; + const startTime = Date.now(); - // If a normalizer config was provided for this run, update the snapshot manager if (config?.normalizerConfig) { this.snapshotManager.updateAgentNormalizerConfig(config.normalizerConfig); } - // Merge verification settings from options and run config - const verification = { - verifyLLMRequests: - config?.verification?.verifyLLMRequests !== undefined - ? config.verification.verifyLLMRequests - : this.options.verification?.verifyLLMRequests !== false, - verifyEventStreams: - config?.verification?.verifyEventStreams !== undefined - ? config.verification.verifyEventStreams - : this.options.verification?.verifyEventStreams !== false, - verifyToolCalls: - config?.verification?.verifyToolCalls !== undefined - ? config.verification.verifyToolCalls - : this.options.verification?.verifyToolCalls !== false, - }; + const verification = this.buildVerificationConfig(config); - // Verify snapshot exists if (!fs.existsSync(this.snapshotPath)) { throw new Error( `Snapshot directory not found: ${this.snapshotPath}. Generate snapshots first using .generate()`, ); } + const loopCount = this.getLoopCount(); logger.info( - `Running test against snapshot '${snapshotName}'${updateSnapshots ? ' (update mode)' : ''}`, + `Running test against snapshot '${this.snapshotName}'${updateSnapshots ? ' (update mode)' : ''}`, ); logger.info( - `Verification settings: - LLM requests: ${verification.verifyLLMRequests ? 'enabled' : 'disabled'}, - Event streams: ${verification.verifyEventStreams ? 'enabled' : 'disabled'}, - Tool calls: ${verification.verifyToolCalls ? 'enabled' : 'disabled'}`, + `Verification settings: LLM requests: ${verification.verifyLLMRequests ? 'enabled' : 'disabled'}, ` + + `Event streams: ${verification.verifyEventStreams ? 'enabled' : 'disabled'}, ` + + `Tool calls: ${verification.verifyToolCalls ? 'enabled' : 'disabled'}`, ); - - // Count loop directories to know how many iterations to expect - const loopCount = this.countLoops(this.snapshotPath); logger.info(`Found ${loopCount} loops in test case`); - const startTime = Date.now(); - try { - // Set up mocking with a reference to this instance for loop tracking - await this.replayHook.setup(this.hostedAgent, this.snapshotPath, loopCount, { + await this.replayHook.setup(this.agent, this.snapshotPath, loopCount, { updateSnapshots, - // Pass the normalizer config to the mocker normalizerConfig: config?.normalizerConfig || this.options.normalizerConfig, - // Pass verification settings verification, }); - // Check for errors during setup if (this.replayHook.hasError()) { const error = this.replayHook.getLastError(); logger.error(`Error occurred during test setup: ${error?.message}`); throw error; } - // Get the mock LLM client const mockLLMClient = this.replayHook.getMockLLMClient(); + this.agent.setCustomLLMClient(mockLLMClient!); + this.agent._setIsReplay(); - this.hostedAgent.setCustomLLMClient(mockLLMClient!); - // Create a new agent instance with the mock LLM client - - // Run the agent using mocked LLM - const isStreaming = - typeof runOptions === 'object' && isStreamingOptions(runOptions as AgentRunObjectOptions); - let response; - let events: AgentEventStream.Event[] = []; - - // Set the `isReplay` flag to tell the agent that is replay mode. - this.hostedAgent._setIsReplay(); - - if (isStreaming) { - // Handle streaming mode - // @ts-expect-error FIXME: remove string type. - const asyncIterable = await this.hostedAgent.run(runOptions); - const streamEvents = []; - - // Consume all events from the stream - logger.info(`Processing streaming response...`); - for await (const event of asyncIterable as AsyncIterable) { - // Check for errors between stream events - if (this.replayHook.hasError()) { - const error = this.replayHook.getLastError(); - logger.error(`Error occurred during streaming: ${error?.message}`); - throw error; - } - streamEvents.push(event); - } - - response = asyncIterable; - // Get final events from event stream - events = this.hostedAgent.getEventStream().getEvents(); - - logger.success(`Streaming execution completed with ${streamEvents.length} events`); - } else { - // Handle non-streaming mode - // @ts-expect-error FIXME: remove string type. - response = await this.hostedAgent.run(runOptions); - - // Check for errors after run - if (this.replayHook.hasError()) { - const error = this.replayHook.getLastError(); - logger.error(`Error occurred during execution: ${error?.message}`); - throw error; - } - - // Get final events from event stream - events = this.hostedAgent.getEventStream().getEvents(); - - logger.success(`Execution completed successfully`); + const response = await this.agent.run(runOptions as any); + + if (this.replayHook.hasError()) { + const error = this.replayHook.getLastError(); + logger.error(`Error occurred during execution: ${error?.message}`); + throw error; } - // Verify execution metrics - const executedLoops = this.hostedAgent.getCurrentLoopIteration(); - logger.info( - `Executed ${executedLoops} agent loops out of ${loopCount} expected loops: ${JSON.stringify(this.options)}`, - ); + const events = this.agent.getEventStream().getEvents(); + const executedLoops = this.agent.getCurrentLoopIteration(); + + logger.success(`Execution completed successfully`); + logger.info(`Executed ${executedLoops} agent loops out of ${loopCount} expected loops`); if (executedLoops !== loopCount) { throw new Error( @@ -393,7 +193,6 @@ export class AgentSnapshot { ); } - // Final cleanup of any leftover actual files - call the unified method if (this.snapshotManager) { await this.snapshotManager.cleanupAllActualFiles(this.snapshotName); } @@ -402,63 +201,54 @@ export class AgentSnapshot { response, events, meta: { - snapshotName, + snapshotName: this.snapshotName, executionTime: Date.now() - startTime, loopCount: executedLoops, }, }; } catch (error) { - // Propagate any errors from the run or hooks logger.error(`Test execution failed: ${error}`); throw error; } finally { - // Clear any errors to prepare for the next run this.replayHook.clearError(); } } - /** - * Count the number of loop directories in the snapshot - */ - private countLoops(casePath: string): number { - if (!fs.existsSync(casePath)) { - return 0; - } - - const loopDirs = fs - .readdirSync(casePath) - .filter( - (dir) => dir.startsWith('loop-') && fs.statSync(path.join(casePath, dir)).isDirectory(), - ) - .sort((a, b) => { - const numA = parseInt(a.split('-')[1], 10); - const numB = parseInt(b.split('-')[1], 10); - return numA - numB; - }); - - return loopDirs.length; + private buildVerificationConfig(config?: TestRunConfig) { + return { + verifyLLMRequests: + config?.verification?.verifyLLMRequests !== undefined + ? config.verification.verifyLLMRequests + : this.options.verification?.verifyLLMRequests !== false, + verifyEventStreams: + config?.verification?.verifyEventStreams !== undefined + ? config.verification.verifyEventStreams + : this.options.verification?.verifyEventStreams !== false, + verifyToolCalls: + config?.verification?.verifyToolCalls !== undefined + ? config.verification.verifyToolCalls + : this.options.verification?.verifyToolCalls !== false, + }; } /** * Get the underlying agent instance */ getAgent(): Agent { - return this.hostedAgent; + return this.agent; } /** * Get the current loop number directly from Agent */ getCurrentLoop(): number { - return this.hostedAgent.getCurrentLoopIteration(); + return this.agent.getCurrentLoopIteration(); } /** * Update the normalizer configuration - * - * @param config New normalizer configuration */ updateAgentNormalizerConfig(config: AgentNormalizerConfig): void { this.snapshotManager.updateAgentNormalizerConfig(config); } -} +} \ No newline at end of file diff --git a/multimodal/tarko/agent-snapshot/src/snapshot-manager.ts b/multimodal/tarko/agent-snapshot/src/snapshot-manager.ts index 9abeabf8e7..7416caa46f 100644 --- a/multimodal/tarko/agent-snapshot/src/snapshot-manager.ts +++ b/multimodal/tarko/agent-snapshot/src/snapshot-manager.ts @@ -43,7 +43,6 @@ export class SnapshotManager { */ private getSnapshotPath(caseName: string, loopDir: string, filename: string): string { if (loopDir === '') { - // Root level files are stored directly in the case directory return path.join(this.fixturesRoot, caseName, filename); } return path.join(this.fixturesRoot, caseName, loopDir, filename); @@ -62,18 +61,13 @@ export class SnapshotManager { try { const content = fs.readFileSync(filePath, 'utf-8'); - // Special handling for llm-response.jsonl files if (filename === 'llm-response.jsonl') { try { - // First try to parse as a single response object return JSON.parse(content) as T; } catch (parseError) { - // If that fails, try to parse as a streaming response (array of chunks) - // Split by newlines, filter out empty lines, and parse each line const lines = content.split('\n').filter((line) => line.trim()); if (lines.length > 0) { try { - // Try parsing each line and combine into an array const chunks = lines.map((line) => JSON.parse(line)); return chunks as unknown as T; } catch (lineParseError) { @@ -85,7 +79,6 @@ export class SnapshotManager { } } - // Standard parsing for other file types return JSON.parse(content) as T; } catch (error) { logger.error(`Error reading snapshot from ${filePath}: ${error}`); @@ -105,7 +98,6 @@ export class SnapshotManager { const filePath = this.getSnapshotPath(caseName, loopDir, filename); const dirPath = path.dirname(filePath); - // Ensure directory exists if (!fs.existsSync(dirPath)) { await fs.promises.mkdir(dirPath, { recursive: true }); } @@ -119,6 +111,10 @@ export class SnapshotManager { } } + private getActualFilename(filename: string): string { + return filename.replace(/(\.[^.]+)$/, '.actual$1'); + } + /** * Write actual data to a separate file when verification fails */ @@ -128,12 +124,10 @@ export class SnapshotManager { filename: string, data: T, ): Promise { - // Generate actual filename by inserting .actual before the extension - const actualFilename = filename.replace(/(\.[^.]+)$/, '.actual$1'); + const actualFilename = this.getActualFilename(filename); const actualFilePath = this.getSnapshotPath(caseName, loopDir, actualFilename); await this.writeSnapshot(caseName, loopDir, actualFilename, data); - logger.info(`Actual data written to ${actualFilePath}`); return actualFilePath; @@ -147,7 +141,7 @@ export class SnapshotManager { loopDir: string, filename: string, ): Promise { - const actualFilename = filename.replace(/(\.[^.]+)$/, '.actual$1'); + const actualFilename = this.getActualFilename(filename); const actualFilePath = this.getSnapshotPath(caseName, loopDir, actualFilename); if (fs.existsSync(actualFilePath)) { @@ -160,11 +154,55 @@ export class SnapshotManager { } } + /** + * Generic verification method to reduce code duplication + */ + private async verifySnapshot( + caseName: string, + loopDir: string, + filename: string, + actualData: T, + updateSnapshots: boolean, + dataType: string, + ): Promise { + const expectedData = await this.readSnapshot(caseName, loopDir, filename); + + if (!expectedData) { + if (updateSnapshots) { + await this.writeSnapshot(caseName, loopDir, filename, actualData); + logger.success(`✅ Created new ${dataType} snapshot for ${caseName}/${loopDir}`); + return true; + } + throw new Error(`No ${dataType} snapshot found for ${caseName}/${loopDir}`); + } + + if (updateSnapshots) { + await this.writeSnapshot(caseName, loopDir, filename, actualData); + logger.warn( + `⚠️ Skipping ${dataType} verification for ${caseName}/${loopDir}, updating snapshot directly`, + ); + return true; + } + + const result = this.normalizer.compare(expectedData, actualData); + + if (!result.equal) { + await this.writeActualData(caseName, loopDir, filename, actualData); + logger.error(`❌ ${dataType} comparison failed for ${caseName}/${loopDir}:\n${result.diff}`); + + const actualPath = loopDir ? `${loopDir}/${this.getActualFilename(filename)}` : this.getActualFilename(filename); + throw new Error( + `${dataType} doesn't match for ${caseName}/${loopDir}. Actual data saved to ${actualPath}`, + ); + } + + await this.deleteActualDataIfExists(caseName, loopDir, filename); + logger.success(`✅ ${dataType} comparison passed for ${caseName}/${loopDir}`); + return true; + } + /** * Clean up all .actual.jsonl files in a given snapshot directory and its subdirectories - * - * @param caseName The name of the test case - * @returns Number of files cleaned up */ async cleanupAllActualFiles(caseName: string): Promise { const casePath = path.join(this.fixturesRoot, caseName); @@ -174,7 +212,6 @@ export class SnapshotManager { } try { - // Find all .actual.jsonl files in the snapshot directory and subdirectories const findActualFiles = (dir: string): string[] => { const results: string[] = []; const files = fs.readdirSync(dir); @@ -193,7 +230,6 @@ export class SnapshotManager { const actualFiles = findActualFiles(casePath); - // Delete each actual file for (const file of actualFiles) { try { await fs.promises.unlink(file); @@ -225,50 +261,14 @@ export class SnapshotManager { actualEventStream: AgentEventStream.Event[], updateSnapshots = false, ): Promise { - const filename = 'event-stream.jsonl'; - const expectedEventStream = await this.readSnapshot( + return this.verifySnapshot( caseName, loopDir, - filename, + 'event-stream.jsonl', + actualEventStream, + updateSnapshots, + 'Event stream', ); - - if (!expectedEventStream) { - if (updateSnapshots) { - await this.writeSnapshot(caseName, loopDir, filename, actualEventStream); - logger.success(`✅ Created new event stream snapshot for ${caseName}/${loopDir}`); - return true; - } - throw new Error(`No event stream snapshot found for ${caseName}/${loopDir}`); - } - - // Skip verification and directly update if updateSnapshots is true - if (updateSnapshots) { - await this.writeSnapshot(caseName, loopDir, filename, actualEventStream); - logger.warn( - `⚠️ Skipping event stream verification for ${caseName}/${loopDir}, updating snapshot directly`, - ); - return true; - } - - // Use the new normalizer to compare event streams - const result = this.normalizer.compare(expectedEventStream, actualEventStream); - - if (!result.equal) { - // Always write actual data for diagnostics - await this.writeActualData(caseName, loopDir, filename, actualEventStream); - - logger.error(`❌ Event stream comparison failed for ${caseName}/${loopDir}:\n${result.diff}`); - - throw new Error( - `Event stream doesn't match for ${caseName}/${loopDir}. ` + - `Actual data saved to ${loopDir ? `${loopDir}/` : ''}event-stream.actual.jsonl`, - ); - } - - // Verification passed, clean up any actual data files - await this.deleteActualDataIfExists(caseName, loopDir, filename); - logger.success(`✅ Event stream comparison passed for ${caseName}/${loopDir}`); - return true; } /** @@ -280,53 +280,15 @@ export class SnapshotManager { actualRequest: Record, updateSnapshots = false, ): Promise { - // Clone the request to prevent modifications - actualRequest = JSON.parse(JSON.stringify(actualRequest)); - const filename = 'llm-request.jsonl'; - - const expectedRequest = await this.readSnapshot>( + const clonedRequest = JSON.parse(JSON.stringify(actualRequest)); + return this.verifySnapshot( caseName, loopDir, - filename, + 'llm-request.jsonl', + clonedRequest, + updateSnapshots, + 'Request', ); - - if (!expectedRequest) { - if (updateSnapshots) { - await this.writeSnapshot(caseName, loopDir, filename, actualRequest); - logger.success(`✅ Created new request snapshot for ${caseName}/${loopDir}`); - return true; - } - throw new Error(`No request snapshot found for ${caseName}/${loopDir}`); - } - - // Skip verification and directly update if updateSnapshots is true - if (updateSnapshots) { - await this.writeSnapshot(caseName, loopDir, filename, actualRequest); - logger.warn( - `⚠️ Skipping request verification for ${caseName}/${loopDir}, updating snapshot directly`, - ); - return true; - } - - // Use the new normalizer for comparison - const result = this.normalizer.compare(expectedRequest, actualRequest); - - if (!result.equal) { - // Always write actual data for diagnostics - await this.writeActualData(caseName, loopDir, filename, actualRequest); - - logger.error(`❌ Request comparison failed for ${caseName}/${loopDir}:\n${result.diff}`); - - throw new Error( - `Request doesn't match for ${caseName}/${loopDir}. ` + - `Actual data saved to ${loopDir}/llm-request.actual.jsonl`, - ); - } - - // Verification passed, clean up any actual data files - await this.deleteActualDataIfExists(caseName, loopDir, filename); - logger.success(`✅ LLM request comparison passed for ${caseName}/${loopDir}`); - return true; } /** @@ -338,49 +300,15 @@ export class SnapshotManager { actualToolCalls: ToolCallData[], updateSnapshots = false, ): Promise { - // Clone the tool calls to prevent modifications - actualToolCalls = JSON.parse(JSON.stringify(actualToolCalls)); - const filename = 'tool-calls.jsonl'; - - const expectedToolCalls = await this.readSnapshot(caseName, loopDir, filename); - - if (!expectedToolCalls) { - if (updateSnapshots) { - await this.writeSnapshot(caseName, loopDir, filename, actualToolCalls); - logger.success(`✅ Created new tool calls snapshot for ${caseName}/${loopDir}`); - return true; - } - throw new Error(`No tool calls snapshot found for ${caseName}/${loopDir}`); - } - - // Skip verification and directly update if updateSnapshots is true - if (updateSnapshots) { - await this.writeSnapshot(caseName, loopDir, filename, actualToolCalls); - logger.warn( - `⚠️ Skipping tool calls verification for ${caseName}/${loopDir}, updating snapshot directly`, - ); - return true; - } - - // Use the normalizer for comparison - const result = this.normalizer.compare(expectedToolCalls, actualToolCalls); - - if (!result.equal) { - // Always write actual data for diagnostics - await this.writeActualData(caseName, loopDir, filename, actualToolCalls); - - logger.error(`❌ Tool calls comparison failed for ${caseName}/${loopDir}:\n${result.diff}`); - - throw new Error( - `Tool calls don't match for ${caseName}/${loopDir}. ` + - `Actual data saved to ${loopDir}/tool-calls.actual.jsonl`, - ); - } - - // Verification passed, clean up any actual data files - await this.deleteActualDataIfExists(caseName, loopDir, filename); - logger.success(`✅ Tool calls comparison passed for ${caseName}/${loopDir}`); - return true; + const clonedToolCalls = JSON.parse(JSON.stringify(actualToolCalls)); + return this.verifySnapshot( + caseName, + loopDir, + 'tool-calls.jsonl', + clonedToolCalls, + updateSnapshots, + 'Tool calls', + ); } /** @@ -389,12 +317,10 @@ export class SnapshotManager { async createTestCaseStructure(caseName: string, numLoops: number): Promise { const caseDir = path.join(this.fixturesRoot, caseName); - // Create case directory if (!fs.existsSync(caseDir)) { await fs.promises.mkdir(caseDir, { recursive: true }); } - // Create loop directories for (let i = 1; i <= numLoops; i++) { const loopDir = path.join(caseDir, `loop-${i}`); if (!fs.existsSync(loopDir)) { @@ -402,7 +328,6 @@ export class SnapshotManager { } } - // Create initial directory for pre-loop state const initialDir = path.join(caseDir, 'initial'); if (!fs.existsSync(initialDir)) { await fs.promises.mkdir(initialDir, { recursive: true }); @@ -424,19 +349,16 @@ export class SnapshotManager { const filePath = this.getSnapshotPath(caseName, loopDir, filename); const dirPath = path.dirname(filePath); - // Ensure directory exists if (!fs.existsSync(dirPath)) { await fs.promises.mkdir(dirPath, { recursive: true }); } - // Check if file already exists and shouldn't be updated if (fs.existsSync(filePath) && !updateIfExists) { logger.info(`Skipping write to existing file: ${filePath}`); return; } try { - // Serialize each chunk as a separate JSON line const chunksAsJsonLines = chunks.map((chunk) => JSON.stringify(chunk)).join('\n'); await fs.promises.writeFile(filePath, chunksAsJsonLines, 'utf-8'); logger.info(`Stream chunks written to ${filePath} (${chunks.length} chunks)`); @@ -458,14 +380,12 @@ export class SnapshotManager { try { const content = await fs.promises.readFile(filePath, 'utf-8'); - // Split by lines, filter empty lines, parse each line const lines = content.split('\n').filter((line) => line.trim()); if (lines.length === 0) { return []; } try { - // Parse each line as an object return lines.map((line) => JSON.parse(line)) as T[]; } catch (lineParseError) { logger.error(`Error parsing streaming chunks: ${lineParseError}`); @@ -483,4 +403,4 @@ export class SnapshotManager { updateAgentNormalizerConfig(config: AgentNormalizerConfig): void { this.normalizer = new AgentSnapshotNormalizer(config); } -} +} \ No newline at end of file From 8d8e43962261bb129df4d0946310702be1aa9bab Mon Sep 17 00:00:00 2001 From: chenhaoli Date: Fri, 10 Oct 2025 08:29:55 +0800 Subject: [PATCH 2/6] refactor(agent-snapshot): optimize code architecture and reduce duplication - Remove custom diff implementation, use existing snapshot-diff library - Extract ToolCallTracker utility to eliminate code duplication between hooks - Improve type safety by replacing 'any' with 'unknown' types - Simplify hook management with helper methods in AgentHookBase - Remove redundant comments and improve code clarity - Optimize CLI command handling in AgentSnapshotRunner - Maintain all existing Public APIs without breaking changes --- multimodal/omni-tars/omni-agent/src/index.ts | 2 +- .../src/agent-generate-snapshot-hook.ts | 138 +--------- .../agent-snapshot/src/agent-hook-base.ts | 92 +++---- .../src/agent-replay-snapshot-hook.ts | 87 +----- .../src/agent-snapshot-runner.ts | 154 ++++------- multimodal/tarko/agent-snapshot/src/index.ts | 1 + .../agent-snapshot/src/snapshot-manager.ts | 25 +- .../src/utils/snapshot-normalizer.ts | 254 ++---------------- .../src/utils/tool-call-tracker.ts | 68 +++++ 9 files changed, 201 insertions(+), 620 deletions(-) create mode 100644 multimodal/tarko/agent-snapshot/src/utils/tool-call-tracker.ts diff --git a/multimodal/omni-tars/omni-agent/src/index.ts b/multimodal/omni-tars/omni-agent/src/index.ts index a16934fc62..ffdceef47a 100644 --- a/multimodal/omni-tars/omni-agent/src/index.ts +++ b/multimodal/omni-tars/omni-agent/src/index.ts @@ -199,7 +199,7 @@ export default class OmniTARSAgent extends ComposableAgent { 'https://images.unsplash.com/photo-1493225457124-a3eb161ffa5f?w=400&h=300&fit=crop&crop=center', }, ], - workspace: { + z``: { navItems: [ { title: 'Code Server', diff --git a/multimodal/tarko/agent-snapshot/src/agent-generate-snapshot-hook.ts b/multimodal/tarko/agent-snapshot/src/agent-generate-snapshot-hook.ts index 0b2a614178..9741972e58 100644 --- a/multimodal/tarko/agent-snapshot/src/agent-generate-snapshot-hook.ts +++ b/multimodal/tarko/agent-snapshot/src/agent-generate-snapshot-hook.ts @@ -15,18 +15,9 @@ import { } from '@tarko/agent-interface'; import { logger } from './utils/logger'; import { AgentHookBase } from './agent-hook-base'; +import { ToolCallTracker, ToolCallData } from './utils/tool-call-tracker'; + -/** - * Structure to store tool call data for snapshot - */ -interface ToolCallData { - toolCallId: string; - name: string; - args: unknown; - result?: unknown; - error?: unknown; - executionTime?: number; -} /** * Agent Generate Snapshot Hook - Manages hooks into agent for test snapshot generation @@ -34,8 +25,7 @@ interface ToolCallData { export class AgentGenerateSnapshotHook extends AgentHookBase { private llmRequests: Record = {}; private llmResponses: Record = {}; - private toolCallsByLoop: Record = {}; - private startTimeByToolCall: Record = {}; + private toolCallTracker = new ToolCallTracker(); constructor( agent: Agent, @@ -47,48 +37,31 @@ export class AgentGenerateSnapshotHook extends AgentHookBase { super(agent, options); } - /** - * Hook called at the beginning of each agent loop - */ protected onEachAgentLoopStart(id: string): void | Promise { - logger.info(`Starting agent loop ${this.agent.getCurrentLoopIteration()}`); - - // Initialize tool calls array for this loop const currentLoop = this.agent.getCurrentLoopIteration(); - if (!this.toolCallsByLoop[currentLoop]) { - this.toolCallsByLoop[currentLoop] = []; - } + logger.info(`Starting agent loop ${currentLoop}`); + this.toolCallTracker.initializeLoop(currentLoop); - // Call original hook if it exists if (this.originalEachLoopStartHook) { return this.originalEachLoopStartHook.call(this.agent, id); } } - /** - * Hook called before sending a request to the LLM - */ protected onLLMRequest(id: string, payload: LLMRequestHookPayload): void | Promise { - // Get current loop from the Agent directly const currentLoop = this.agent.getCurrentLoopIteration(); - - // Store the request for current loop this.llmRequests[currentLoop] = payload; - // Create loop directory const loopDir = path.join(this.snapshotPath, `loop-${currentLoop}`); if (!fs.existsSync(loopDir)) { fs.mkdirSync(loopDir, { recursive: true }); } - // Write request to file fs.writeFileSync( path.join(loopDir, 'llm-request.jsonl'), JSON.stringify(payload, null, 2), 'utf-8', ); - // Dump current event stream state const events = this.agent.getEventStream().getEvents(); fs.writeFileSync( path.join(loopDir, 'event-stream.jsonl'), @@ -96,172 +69,97 @@ export class AgentGenerateSnapshotHook extends AgentHookBase { 'utf-8', ); - // Call original hook if it exists if (this.originalRequestHook) { return this.originalRequestHook.call(this.agent, id, payload); } } - /** - * Hook called after receiving a response from the LLM - */ protected onLLMResponse(id: string, payload: LLMResponseHookPayload): void | Promise { - // Store the response for the current loop using Agent's loop count const currentLoop = this.agent.getCurrentLoopIteration(); this.llmResponses[currentLoop] = payload; - // Call original hook if it exists if (this.originalResponseHook) { return this.originalResponseHook.call(this.agent, id, payload); } } - /** - * Hook called for streaming responses from the LLM - */ protected onLLMStreamingResponse(id: string, payload: LLMStreamingResponseHookPayload): void { const currentLoop = this.agent.getCurrentLoopIteration(); - const loopDir = `loop-${currentLoop}`; + const responsePath = path.join(this.snapshotPath, `loop-${currentLoop}`, 'llm-response.jsonl'); try { - // Get path to save response - const responsePath = path.join(this.snapshotPath, loopDir, 'llm-response.jsonl'); - - // Write streaming chunks to file this.writeStreamingChunks(responsePath, payload.chunks); - - logger.info(`Saved ${payload.chunks.length} streaming chunks for ${loopDir}`); + logger.info(`Saved ${payload.chunks.length} streaming chunks for loop-${currentLoop}`); } catch (error) { logger.error(`Failed to save streaming chunks: ${error}`); } - // Call original hook if it exists if (this.originalStreamingResponseHook) { this.originalStreamingResponseHook.call(this.agent, id, payload); } } - /** - * Hook called before a tool is executed - */ protected onBeforeToolCall( id: string, toolCall: { toolCallId: string; name: string }, args: unknown, ): unknown { const currentLoop = this.agent.getCurrentLoopIteration(); - - // Record starting time to calculate execution time later - this.startTimeByToolCall[toolCall.toolCallId] = Date.now(); - - // Store tool call information - if (!this.toolCallsByLoop[currentLoop]) { - this.toolCallsByLoop[currentLoop] = []; - } - - this.toolCallsByLoop[currentLoop].push({ - toolCallId: toolCall.toolCallId, - name: toolCall.name, - args, - }); + this.toolCallTracker.startToolCall(currentLoop, toolCall, args); logger.debug( `Tool call captured for ${toolCall.name} (${toolCall.toolCallId}) in loop ${currentLoop}`, ); - // Call original hook if it exists if (this.originalBeforeToolCallHook) { return this.originalBeforeToolCallHook.call(this.agent, id, toolCall, args); } - return args; } - /** - * Hook called after a tool is executed - */ protected onAfterToolCall( id: string, toolCall: { toolCallId: string; name: string }, result: unknown, ): unknown { const currentLoop = this.agent.getCurrentLoopIteration(); - const executionTime = - Date.now() - (this.startTimeByToolCall[toolCall.toolCallId] || Date.now()); - - // Find the tool call in our records and update with result - if (this.toolCallsByLoop[currentLoop]) { - const toolCallData = this.toolCallsByLoop[currentLoop].find( - (tc) => tc.toolCallId === toolCall.toolCallId, - ); - - if (toolCallData) { - toolCallData.result = result; - toolCallData.executionTime = executionTime; - } - } + this.toolCallTracker.finishToolCall(currentLoop, toolCall.toolCallId, result); logger.debug( `Tool call result captured for ${toolCall.name} (${toolCall.toolCallId}) in loop ${currentLoop}`, ); - // Write tool calls to file for current loop this.saveToolCalls(currentLoop); - // Call original hook if it exists if (this.originalAfterToolCallHook) { return this.originalAfterToolCallHook.call(this.agent, id, toolCall, result); } - return result; } - /** - * Hook called when a tool execution results in an error - */ protected onToolCallError( id: string, toolCall: { toolCallId: string; name: string }, error: unknown, ): unknown { const currentLoop = this.agent.getCurrentLoopIteration(); - const executionTime = - Date.now() - (this.startTimeByToolCall[toolCall.toolCallId] || Date.now()); - - // Find the tool call in our records and update with error - if (this.toolCallsByLoop[currentLoop]) { - const toolCallData = this.toolCallsByLoop[currentLoop].find( - (tc) => tc.toolCallId === toolCall.toolCallId, - ); - - if (toolCallData) { - toolCallData.error = error; - toolCallData.executionTime = executionTime; - } - } + this.toolCallTracker.finishToolCall(currentLoop, toolCall.toolCallId, undefined, error); logger.debug( `Tool call error captured for ${toolCall.name} (${toolCall.toolCallId}) in loop ${currentLoop}`, ); - // Write tool calls to file for current loop this.saveToolCalls(currentLoop); - // Call original hook if it exists if (this.originalToolCallErrorHook) { return this.originalToolCallErrorHook.call(this.agent, id, toolCall, error); } - return `Error: ${error}`; } - /** - * Save tool calls data to file for current loop - */ private saveToolCalls(loopNumber: number): void { - if (!this.toolCallsByLoop[loopNumber] || this.toolCallsByLoop[loopNumber].length === 0) { - return; - } + const toolCalls = this.toolCallTracker.getToolCallsForLoop(loopNumber); + if (toolCalls.length === 0) return; try { const loopDir = path.join(this.snapshotPath, `loop-${loopNumber}`); @@ -269,26 +167,19 @@ export class AgentGenerateSnapshotHook extends AgentHookBase { fs.mkdirSync(loopDir, { recursive: true }); } - // Write tool calls to file fs.writeFileSync( path.join(loopDir, 'tool-calls.jsonl'), - JSON.stringify(this.toolCallsByLoop[loopNumber], null, 2), + JSON.stringify(toolCalls, null, 2), 'utf-8', ); - logger.info( - `Saved ${this.toolCallsByLoop[loopNumber].length} tool calls for loop ${loopNumber}`, - ); + logger.info(`Saved ${toolCalls.length} tool calls for loop ${loopNumber}`); } catch (error) { logger.error(`Failed to save tool calls for loop ${loopNumber}: ${error}`); } } - /** - * Hook called at the end of the agent's execution loop - */ protected onAgentLoopEnd(id: string): void | Promise { - // Export final event stream state to the root directory const finalEvents = this.agent.getEventStream().getEvents(); fs.writeFileSync( path.join(this.snapshotPath, 'event-stream.jsonl'), @@ -298,7 +189,6 @@ export class AgentGenerateSnapshotHook extends AgentHookBase { logger.info(`Snapshot generation completed: ${this.snapshotPath}`); - // Call original hook if it exists if (this.originalLoopEndHook) { return this.originalLoopEndHook.call(this.agent, id); } diff --git a/multimodal/tarko/agent-snapshot/src/agent-hook-base.ts b/multimodal/tarko/agent-snapshot/src/agent-hook-base.ts index 09c9420a3e..a24d77279f 100644 --- a/multimodal/tarko/agent-snapshot/src/agent-hook-base.ts +++ b/multimodal/tarko/agent-snapshot/src/agent-hook-base.ts @@ -70,7 +70,15 @@ export abstract class AgentHookBase { hookAgent(): boolean { if (this.isHooked) return false; - // Store original hooks + this.storeOriginalHooks(); + this.installNewHooks(); + + this.isHooked = true; + logger.info(`Hooked into agent: ${this.snapshotName}`); + return true; + } + + private storeOriginalHooks(): void { this.originalRequestHook = this.agent.onLLMRequest; this.originalResponseHook = this.agent.onLLMResponse; this.originalStreamingResponseHook = this.agent.onLLMStreamingResponse; @@ -80,8 +88,9 @@ export abstract class AgentHookBase { this.originalAfterToolCallHook = this.agent.onAfterToolCall; this.originalToolCallErrorHook = this.agent.onToolCallError; this.originalProcessToolCallsHook = this.agent.onProcessToolCalls; + } - // Replace with our hooks + private installNewHooks(): void { this.agent.onLLMRequest = (id, payload) => this.safeExecuteHook(() => this.onLLMRequest(id, payload)); this.agent.onLLMResponse = (id, payload) => @@ -99,82 +108,49 @@ export abstract class AgentHookBase { this.safeExecuteHook(() => this.onToolCallError(id, toolCall, error)); this.agent.onProcessToolCalls = (id, toolCalls) => this.safeExecuteHook(() => this.onProcessToolCalls(id, toolCalls)); - - this.isHooked = true; - logger.info(`Hooked into agent: ${this.snapshotName}`); - return true; } /** * Unhook from the agent, restoring original hooks - * @param force If true, force unhooking even if isHooked is false */ unhookAgent(force = false): boolean { if (!this.isHooked && !force) return false; - // Restore original hooks - if (this.originalRequestHook) { - this.agent.onLLMRequest = this.originalRequestHook; - } - - if (this.originalResponseHook) { - this.agent.onLLMResponse = this.originalResponseHook; - } - - if (this.originalStreamingResponseHook) { - this.agent.onLLMStreamingResponse = this.originalStreamingResponseHook; - } - - if (this.originalLoopEndHook) { - this.agent.onAgentLoopEnd = this.originalLoopEndHook; - } - - if (this.originalEachLoopStartHook) { - this.agent.onEachAgentLoopStart = this.originalEachLoopStartHook; - } - - if (this.originalBeforeToolCallHook) { - this.agent.onBeforeToolCall = this.originalBeforeToolCallHook; - } - - if (this.originalAfterToolCallHook) { - this.agent.onAfterToolCall = this.originalAfterToolCallHook; - } - - if (this.originalToolCallErrorHook) { - this.agent.onToolCallError = this.originalToolCallErrorHook; - } - - if (this.originalProcessToolCallsHook) { - this.agent.onProcessToolCalls = this.originalProcessToolCallsHook; - } - + this.restoreOriginalHooks(); this.isHooked = false; logger.info(`Unhooked from agent: ${this.snapshotName}`); return true; } + private restoreOriginalHooks(): void { + const hooks = [ + { original: this.originalRequestHook, target: 'onLLMRequest' }, + { original: this.originalResponseHook, target: 'onLLMResponse' }, + { original: this.originalStreamingResponseHook, target: 'onLLMStreamingResponse' }, + { original: this.originalLoopEndHook, target: 'onAgentLoopEnd' }, + { original: this.originalEachLoopStartHook, target: 'onEachAgentLoopStart' }, + { original: this.originalBeforeToolCallHook, target: 'onBeforeToolCall' }, + { original: this.originalAfterToolCallHook, target: 'onAfterToolCall' }, + { original: this.originalToolCallErrorHook, target: 'onToolCallError' }, + { original: this.originalProcessToolCallsHook, target: 'onProcessToolCalls' }, + ] as const; + + hooks.forEach(({ original, target }) => { + if (original) { + (this.agent as any)[target] = original; + } + }); + } + /** * Safely execute a hook function, capturing any errors */ protected async safeExecuteHook(hookFn: () => T | Promise) { try { - const result = await hookFn(); - - // Handle both synchronous and asynchronous results - if (result instanceof Promise) { - return result.catch((error) => { - this.lastError = error; - logger.error(`Hook execution error: ${error.message}`); - throw error; // Re-throw to propagate - }); - } - - return result; + return await hookFn(); } catch (error) { this.lastError = error as Error; logger.error(`Hook execution error: ${(error as Error).message}`); - // do not throw it. } } @@ -203,13 +179,11 @@ export abstract class AgentHookBase { * Write streaming chunks to a file */ protected writeStreamingChunks(filePath: string, chunks: ChatCompletionChunk[]): void { - // Skip if no chunks if (!chunks || chunks.length === 0) { return; } try { - // Format each chunk as a JSON line const chunksAsJsonLines = chunks.map((chunk) => JSON.stringify(chunk)).join('\n'); fs.writeFileSync(filePath, chunksAsJsonLines, 'utf-8'); logger.debug(`${chunks.length} chunks written to ${filePath}`); diff --git a/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts b/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts index 41857a4241..796a9abe08 100644 --- a/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts +++ b/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts @@ -5,7 +5,7 @@ import path from 'path'; import { Agent } from '@tarko/agent'; -import { SnapshotManager, ToolCallData } from './snapshot-manager'; +import { SnapshotManager } from './snapshot-manager'; import { logger } from './utils/logger'; import { AgentEventStream, @@ -20,6 +20,7 @@ import { } from '@tarko/agent-interface'; import { AgentHookBase } from './agent-hook-base'; import { AgentNormalizerConfig } from './utils/snapshot-normalizer'; +import { ToolCallTracker, ToolCallData } from './utils/tool-call-tracker'; interface LLMMockerSetupOptions { updateSnapshots?: boolean; @@ -46,8 +47,7 @@ export class AgentReplaySnapshotHook extends AgentHookBase { private verifyLLMRequests = true; private verifyEventStreams = true; private verifyToolCalls = true; - private toolCallsByLoop: Record = {}; - private startTimeByToolCall: Record = {}; + private toolCallTracker = new ToolCallTracker(); /** * Set up the LLM mocker with an agent and test case @@ -257,18 +257,10 @@ export class AgentReplaySnapshotHook extends AgentHookBase { }; } - /** - * Hook implementation for agent loop start - */ protected onEachAgentLoopStart(id: string): void | Promise { const currentLoop = this.agent.getCurrentLoopIteration(); + this.toolCallTracker.initializeLoop(currentLoop); - // Initialize tool calls array for this loop - if (!this.toolCallsByLoop[currentLoop]) { - this.toolCallsByLoop[currentLoop] = []; - } - - // Pass through to original hook if present if (this.originalEachLoopStartHook) { return this.originalEachLoopStartHook.call(this.agent, id); } @@ -373,20 +365,14 @@ export class AgentReplaySnapshotHook extends AgentHookBase { } } - /** - * Hook implementation for before tool call - */ protected onBeforeToolCall( id: string, toolCall: { toolCallId: string; name: string }, args: unknown, ): unknown { const currentLoop = this.agent.getCurrentLoopIteration(); + this.toolCallTracker.startToolCall(currentLoop, toolCall, args); - // Record starting time to calculate execution time later - this.startTimeByToolCall[toolCall.toolCallId] = Date.now(); - - // Load expected tool calls from snapshot if (this.verifyToolCalls) { this.loadToolCallsFromSnapshot(currentLoop).catch((error) => { logger.error(`Error loading tool calls from snapshot: ${error}`); @@ -396,58 +382,28 @@ export class AgentReplaySnapshotHook extends AgentHookBase { }); } - // Add tool call to the current loop's collection - if (!this.toolCallsByLoop[currentLoop]) { - this.toolCallsByLoop[currentLoop] = []; - } - - this.toolCallsByLoop[currentLoop].push({ - toolCallId: toolCall.toolCallId, - name: toolCall.name, - args, - }); - logger.debug( `Tool call intercepted for ${toolCall.name} (${toolCall.toolCallId}) in loop ${currentLoop}`, ); - // Call original hook if present if (this.originalBeforeToolCallHook) { return this.originalBeforeToolCallHook.call(this.agent, id, toolCall, args); } - return args; } - /** - * Hook implementation for after tool call - */ protected onAfterToolCall( id: string, toolCall: { toolCallId: string; name: string }, result: unknown, ): unknown { const currentLoop = this.agent.getCurrentLoopIteration(); - const executionTime = - Date.now() - (this.startTimeByToolCall[toolCall.toolCallId] || Date.now()); - - // Find and update the corresponding tool call record - if (this.toolCallsByLoop[currentLoop]) { - const toolCallData = this.toolCallsByLoop[currentLoop].find( - (tc) => tc.toolCallId === toolCall.toolCallId, - ); - - if (toolCallData) { - toolCallData.result = result; - toolCallData.executionTime = executionTime; - } - } + this.toolCallTracker.finishToolCall(currentLoop, toolCall.toolCallId, result); logger.debug( `Tool call result intercepted for ${toolCall.name} (${toolCall.toolCallId}) in loop ${currentLoop}`, ); - // Verify tool calls if enabled if (this.verifyToolCalls) { this.verifyToolCallsForLoop(currentLoop).catch((error) => { logger.error(`Error verifying tool calls: ${error}`); @@ -457,43 +413,24 @@ export class AgentReplaySnapshotHook extends AgentHookBase { }); } - // Call original hook if present if (this.originalAfterToolCallHook) { return this.originalAfterToolCallHook.call(this.agent, id, toolCall, result); } - return result; } - /** - * Hook implementation for tool call error - */ protected onToolCallError( id: string, toolCall: { toolCallId: string; name: string }, error: unknown, ): unknown { const currentLoop = this.agent.getCurrentLoopIteration(); - const executionTime = - Date.now() - (this.startTimeByToolCall[toolCall.toolCallId] || Date.now()); - - // Find and update the corresponding tool call record - if (this.toolCallsByLoop[currentLoop]) { - const toolCallData = this.toolCallsByLoop[currentLoop].find( - (tc) => tc.toolCallId === toolCall.toolCallId, - ); - - if (toolCallData) { - toolCallData.error = error; - toolCallData.executionTime = executionTime; - } - } + this.toolCallTracker.finishToolCall(currentLoop, toolCall.toolCallId, undefined, error); logger.debug( `Tool call error intercepted for ${toolCall.name} (${toolCall.toolCallId}) in loop ${currentLoop}`, ); - // Verify tool calls if enabled if (this.verifyToolCalls) { this.verifyToolCallsForLoop(currentLoop).catch((error) => { logger.error(`Error verifying tool calls: ${error}`); @@ -503,11 +440,9 @@ export class AgentReplaySnapshotHook extends AgentHookBase { }); } - // Call original hook if present if (this.originalToolCallErrorHook) { return this.originalToolCallErrorHook.call(this.agent, id, toolCall, error); } - return `Error: ${error}`; } @@ -604,19 +539,17 @@ export class AgentReplaySnapshotHook extends AgentHookBase { logger.debug(`Loaded ${toolCalls.length} tool calls from snapshot for ${loopDir}`); } - /** - * Verify tool calls against snapshot for a specific loop - */ private async verifyToolCallsForLoop(loopNumber: number): Promise { - if (!this.snapshotManager || !this.toolCallsByLoop[loopNumber]) return; + if (!this.snapshotManager) return; + const toolCalls = this.toolCallTracker.getToolCallsForLoop(loopNumber); const loopDir = `loop-${loopNumber}`; try { await this.snapshotManager.verifyToolCallsSnapshot( path.basename(this.snapshotPath), loopDir, - this.toolCallsByLoop[loopNumber], + toolCalls, this.updateSnapshots, ); logger.success(`✅ Tool calls verification succeeded for ${loopDir}`); diff --git a/multimodal/tarko/agent-snapshot/src/agent-snapshot-runner.ts b/multimodal/tarko/agent-snapshot/src/agent-snapshot-runner.ts index 4593602571..d91da483de 100644 --- a/multimodal/tarko/agent-snapshot/src/agent-snapshot-runner.ts +++ b/multimodal/tarko/agent-snapshot/src/agent-snapshot-runner.ts @@ -7,21 +7,9 @@ import { Agent, AgentRunOptions } from '@tarko/agent'; import { AgentSnapshot } from './agent-snapshot'; import { SnapshotRunResult } from './types'; -/** - * Define case configurations for snapshot generation and testing - */ export interface CaseConfig { - /** - * Case name. - */ name: string; - /** - * Case module path, export {@type SnapshotCase} - */ path: string; - /** - * Generated Snapshot path. - */ snapshotPath: string; vitestSnapshotPath: string; } @@ -35,93 +23,80 @@ export class AgentSnapshotRunner { public readonly examples: CaseConfig[]; constructor(examples: CaseConfig[]) { - console.log(JSON.stringify(examples, null, 2)); - this.examples = examples; } - /** - * Check if the update snapshot flag is present in command line arguments - */ private shouldUpdateSnapshots(): boolean { return process.argv.includes('-u') || process.argv.includes('--updateSnapshot'); } - /** - * A simple cli to run agent snapshot - */ async cli() { - { - const args = process.argv.slice(2); - const command = args[0]; - const exampleName = args[1]; - console.log(args, command, exampleName); - - // Check for update flag - const updateSnapshots = this.shouldUpdateSnapshots(); - if (updateSnapshots) { - console.log('Update snapshots mode enabled (-u flag detected)'); + const args = process.argv.slice(2); + const command = args[0]; + const exampleName = args[1]; + const updateSnapshots = this.shouldUpdateSnapshots(); + + if (updateSnapshots) { + console.log('Update snapshots mode enabled (-u flag detected)'); + } + + if (command === 'generate') { + await this.handleGenerateCommand(exampleName); + } else if (command === 'replay') { + await this.handleReplayCommand(exampleName, updateSnapshots); + } else { + this.printUsage(); + } + } + + private async handleGenerateCommand(exampleName?: string): Promise { + if (!exampleName) { + await this.generateAll(); + } else if (exampleName === 'all') { + await this.generateAll(); + } else { + const example = this.getCaseByName(exampleName); + if (example) { + await this.generateSnapshot(example); + } else { + console.error(`Example "${exampleName}" not found.`); + process.exit(1); } + } + } - if (command === 'generate') { - if (exampleName) { - if (exampleName === 'all') { - // Generate snapshots for all examples using wildcard - await this.generateAll(); - } else { - const example = this.getCaseByName(exampleName); - if (example) { - await this.generateSnapshot(example); - } else { - console.error(`Example "${exampleName}" not found.`); - process.exit(1); - } - } - } else { - await this.generateAll(); - } - } else if (command === 'replay') { - if (exampleName) { - if (exampleName === 'all') { - // Test snapshots for all examples using wildcard - await this.replayAll(updateSnapshots); - } else { - const example = this.getCaseByName(exampleName); - if (example) { - await this.replaySnapshot(example, updateSnapshots); - } else { - console.error(`Example "${exampleName}" not found.`); - process.exit(1); - } - } - } else { - await this.replayAll(updateSnapshots); - } + private async handleReplayCommand(exampleName?: string, updateSnapshots = false): Promise { + if (!exampleName) { + await this.replayAll(updateSnapshots); + } else if (exampleName === 'all') { + await this.replayAll(updateSnapshots); + } else { + const example = this.getCaseByName(exampleName); + if (example) { + await this.replaySnapshot(example, updateSnapshots); } else { - console.log('Usage: cli.ts [generate|replay] [example-name] [-u|--updateSnapshot]'); - console.log('Options:'); - console.log( - ' -u, --updateSnapshot Update snapshots when replaying (skips verification and updates files directly)', - ); - console.log('Available examples:'); - this.examples.forEach((e) => console.log(`- ${e.name}`)); - console.log('- all (all examples)'); + console.error(`Example "${exampleName}" not found.`); + process.exit(1); } } } - /** - * Get example config by name - */ + private printUsage(): void { + console.log('Usage: cli.ts [generate|replay] [example-name] [-u|--updateSnapshot]'); + console.log('Options:'); + console.log( + ' -u, --updateSnapshot Update snapshots when replaying (skips verification and updates files directly)', + ); + console.log('Available examples:'); + this.examples.forEach((e) => console.log(`- ${e.name}`)); + console.log('- all (all examples)'); + } + getCaseByName(name: string): CaseConfig | undefined { return this.examples.find((e) => e.name === name); } - /** - * Load case - */ async loadSnapshotCase(exampleConfig: CaseConfig): Promise { - // const importPromise = new Function(`return import('${exampleConfig.path}')`)(); const importedModule = await import(exampleConfig.path); if (importedModule.agent && importedModule.runOptions) { @@ -137,13 +112,10 @@ export class AgentSnapshotRunner { } throw new Error( - `Invalid agent case module: ${exampleConfig.path}, required an "agent" instance and "runOptiond" exported`, + `Invalid agent case module: ${exampleConfig.path}, required an "agent" instance and "runOptions" exported`, ); } - /** - * Generate snapshot for a specific example - */ async generateSnapshot(exampleConfig: CaseConfig): Promise { console.log(`Generating snapshot for ${exampleConfig.name}...`); @@ -157,9 +129,6 @@ export class AgentSnapshotRunner { console.log(`Snapshot generated at ${exampleConfig.snapshotPath}`); } - /** - * Replay snapshot for a specific example - */ async replaySnapshot( exampleConfig: CaseConfig, updateSnapshots = false, @@ -171,18 +140,15 @@ export class AgentSnapshotRunner { const { agent, runOptions } = await this.loadSnapshotCase(exampleConfig); - console.log(`Testing agent instance`, agent); - console.log(`Testing agent run options`, runOptions); - if (!agent || !runOptions) { throw new Error( - `Invalid agent case module: ${exampleConfig.path}, required an "agent" instance and "runOptiond" exported`, + `Invalid agent case module: ${exampleConfig.path}, required an "agent" instance and "runOptions" exported`, ); } const agentSnapshot = new AgentSnapshot(agent, { snapshotPath: exampleConfig.snapshotPath, - updateSnapshots, // Pass the update flag to AgentSnapshot + updateSnapshots, }); const response = await agentSnapshot.replay(runOptions); @@ -190,18 +156,12 @@ export class AgentSnapshotRunner { return response; } - /** - * Generate snapshots for all examples - */ async generateAll(): Promise { for (const example of this.examples) { await this.generateSnapshot(example); } } - /** - * Test snapshots for all examples - */ async replayAll(updateSnapshots = false): Promise> { const results: Record = {}; for (const example of this.examples) { diff --git a/multimodal/tarko/agent-snapshot/src/index.ts b/multimodal/tarko/agent-snapshot/src/index.ts index fbb6337c92..59b67022c8 100644 --- a/multimodal/tarko/agent-snapshot/src/index.ts +++ b/multimodal/tarko/agent-snapshot/src/index.ts @@ -11,3 +11,4 @@ export * from './agent-replay-snapshot-hook'; export * from './agent-hook-base'; export * from './types'; export * from './utils/snapshot-normalizer'; +export * from './utils/tool-call-tracker'; diff --git a/multimodal/tarko/agent-snapshot/src/snapshot-manager.ts b/multimodal/tarko/agent-snapshot/src/snapshot-manager.ts index 7416caa46f..ab15925ace 100644 --- a/multimodal/tarko/agent-snapshot/src/snapshot-manager.ts +++ b/multimodal/tarko/agent-snapshot/src/snapshot-manager.ts @@ -9,18 +9,9 @@ import path from 'path'; import { AgentEventStream } from '@tarko/agent-interface'; import { logger } from './utils/logger'; import { AgentNormalizerConfig, AgentSnapshotNormalizer } from './utils/snapshot-normalizer'; +import { ToolCallData } from './utils/tool-call-tracker'; + -/** - * Interface for tool call data - */ -export interface ToolCallData { - toolCallId: string; - name: string; - args: unknown; - result?: unknown; - error?: unknown; - executionTime?: number; -} /** * SnapshotManager - Manages test snapshots for agent testing @@ -154,9 +145,6 @@ export class SnapshotManager { } } - /** - * Generic verification method to reduce code duplication - */ private async verifySnapshot( caseName: string, loopDir: string, @@ -252,9 +240,6 @@ export class SnapshotManager { } } - /** - * Verify that an event stream state matches the expected snapshot - */ async verifyEventStreamSnapshot( caseName: string, loopDir: string, @@ -271,9 +256,6 @@ export class SnapshotManager { ); } - /** - * Verify that a request matches the expected snapshot - */ async verifyRequestSnapshot( caseName: string, loopDir: string, @@ -291,9 +273,6 @@ export class SnapshotManager { ); } - /** - * Verify that tool calls match the expected snapshot - */ async verifyToolCallsSnapshot( caseName: string, loopDir: string, diff --git a/multimodal/tarko/agent-snapshot/src/utils/snapshot-normalizer.ts b/multimodal/tarko/agent-snapshot/src/utils/snapshot-normalizer.ts index f6a72f4a9e..35db97b6ae 100644 --- a/multimodal/tarko/agent-snapshot/src/utils/snapshot-normalizer.ts +++ b/multimodal/tarko/agent-snapshot/src/utils/snapshot-normalizer.ts @@ -1,9 +1,9 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ /* * Copyright (c) 2025 Bytedance, Inc. and its affiliates. * SPDX-License-Identifier: Apache-2.0 */ import stringify from 'fast-json-stable-stringify'; +import snapshotDiff from 'snapshot-diff'; /** * Configuration object that defines how to normalize snapshots @@ -24,10 +24,8 @@ export interface AgentNormalizerConfig { // Custom normalization functions customNormalizers?: Array<{ - // Apply function when field name matches this pattern pattern: string | RegExp; - // Function to apply when field name matches - normalizer: (value: any, path: string) => any; + normalizer: (value: unknown, path: string) => unknown; }>; } // Default configuration @@ -49,232 +47,12 @@ const DEFAULT_CONFIG: AgentNormalizerConfig = { fieldsToIgnore: [], }; -/** - * Simple diff implementation to replace snapshot-diff - */ -class SimpleDiffer { - private contextLines: number; - - constructor(contextLines = 3) { - this.contextLines = contextLines; - } - - /** - * Generate a unified diff between two strings - */ - diff( - expected: string, - actual: string, - expectedLabel = 'Expected', - actualLabel = 'Actual', - ): string { - const expectedLines = expected.split('\n'); - const actualLines = actual.split('\n'); - - const diffLines: string[] = []; - diffLines.push(`--- ${expectedLabel}`); - diffLines.push(`+++ ${actualLabel}`); - - const lcs = this.longestCommonSubsequence(expectedLines, actualLines); - const changes = this.generateChanges(expectedLines, actualLines, lcs); - - // Group changes into hunks - const hunks = this.groupChangesIntoHunks(changes, expectedLines.length, actualLines.length); - - for (const hunk of hunks) { - diffLines.push(`@@ -${hunk.oldStart},${hunk.oldCount} +${hunk.newStart},${hunk.newCount} @@`); - diffLines.push(...hunk.lines); - } - - return diffLines.join('\n'); - } - - /** - * Longest Common Subsequence algorithm for diff generation - */ - private longestCommonSubsequence(a: string[], b: string[]): number[][] { - const m = a.length; - const n = b.length; - const dp: number[][] = Array(m + 1) - .fill(null) - .map(() => Array(n + 1).fill(0)); - - for (let i = 1; i <= m; i++) { - for (let j = 1; j <= n; j++) { - if (a[i - 1] === b[j - 1]) { - dp[i][j] = dp[i - 1][j - 1] + 1; - } else { - dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]); - } - } - } - - return dp; - } - - /** - * Generate change operations based on LCS - */ - private generateChanges( - expected: string[], - actual: string[], - lcs: number[][], - ): Array<{ - type: 'add' | 'remove' | 'equal'; - expectedIndex: number; - actualIndex: number; - line: string; - }> { - const changes: Array<{ - type: 'add' | 'remove' | 'equal'; - expectedIndex: number; - actualIndex: number; - line: string; - }> = []; - - let i = expected.length; - let j = actual.length; - - while (i > 0 || j > 0) { - if (i > 0 && j > 0 && expected[i - 1] === actual[j - 1]) { - changes.unshift({ - type: 'equal', - expectedIndex: i - 1, - actualIndex: j - 1, - line: expected[i - 1], - }); - i--; - j--; - } else if (j > 0 && (i === 0 || lcs[i][j - 1] >= lcs[i - 1][j])) { - changes.unshift({ - type: 'add', - expectedIndex: -1, - actualIndex: j - 1, - line: actual[j - 1], - }); - j--; - } else if (i > 0) { - changes.unshift({ - type: 'remove', - expectedIndex: i - 1, - actualIndex: -1, - line: expected[i - 1], - }); - i--; - } - } - - return changes; - } - - /** - * Group changes into hunks with context lines - */ - private groupChangesIntoHunks( - changes: Array<{ - type: 'add' | 'remove' | 'equal'; - expectedIndex: number; - actualIndex: number; - line: string; - }>, - expectedLength: number, - actualLength: number, - ): Array<{ - oldStart: number; - oldCount: number; - newStart: number; - newCount: number; - lines: string[]; - }> { - const hunks: Array<{ - oldStart: number; - oldCount: number; - newStart: number; - newCount: number; - lines: string[]; - }> = []; - - let currentHunk: { - oldStart: number; - oldCount: number; - newStart: number; - newCount: number; - lines: string[]; - } | null = null; - - for (let i = 0; i < changes.length; i++) { - const change = changes[i]; - - if (change.type !== 'equal') { - // Start a new hunk if needed - if (!currentHunk) { - const contextStart = Math.max(0, i - this.contextLines); - currentHunk = { - oldStart: changes[contextStart]?.expectedIndex + 1 || 1, - oldCount: 0, - newStart: changes[contextStart]?.actualIndex + 1 || 1, - newCount: 0, - lines: [], - }; - - // Add context lines before the change - for (let j = contextStart; j < i; j++) { - if (changes[j].type === 'equal') { - currentHunk.lines.push(` ${changes[j].line}`); - currentHunk.oldCount++; - currentHunk.newCount++; - } - } - } - - // Add the change - if (change.type === 'remove') { - currentHunk.lines.push(`-${change.line}`); - currentHunk.oldCount++; - } else if (change.type === 'add') { - currentHunk.lines.push(`+${change.line}`); - currentHunk.newCount++; - } - } else { - // Equal line - add as context if we're in a hunk - if (currentHunk) { - currentHunk.lines.push(` ${change.line}`); - currentHunk.oldCount++; - currentHunk.newCount++; - - // Check if we should end the hunk - const nextChanges = changes.slice(i + 1, i + 1 + this.contextLines * 2); - const hasMoreChanges = nextChanges.some((c) => c.type !== 'equal'); - - if (!hasMoreChanges || i === changes.length - 1) { - // Add remaining context lines - const contextEnd = Math.min(i + this.contextLines, changes.length - 1); - for (let j = i + 1; j <= contextEnd; j++) { - if (changes[j]?.type === 'equal') { - currentHunk.lines.push(` ${changes[j].line}`); - currentHunk.oldCount++; - currentHunk.newCount++; - } - } - - hunks.push(currentHunk); - currentHunk = null; - } - } - } - } - - return hunks; - } -} - /** * Normalizes objects to ignore dynamic values when comparing snapshots */ export class AgentSnapshotNormalizer { private config: AgentNormalizerConfig; - private seenObjects = new WeakMap(); - private differ = new SimpleDiffer(3); + private seenObjects = new WeakMap(); constructor(config?: AgentNormalizerConfig) { this.config = { @@ -295,7 +73,7 @@ export class AgentSnapshotNormalizer { /** * Normalizes objects for comparison */ - normalize(obj: any, path = ''): any { + normalize(obj: unknown, path = ''): unknown { // Reset seen objects on top-level call if (path === '') { this.seenObjects = new WeakMap(); @@ -320,7 +98,7 @@ export class AgentSnapshotNormalizer { // Handle objects if (typeof obj === 'object') { - const result: Record = {}; + const result: Record = {}; for (const [key, value] of Object.entries(obj)) { const currentPath = path ? `${path}.${key}` : key; @@ -371,7 +149,7 @@ export class AgentSnapshotNormalizer { /** * Check if a field should be normalized and return the normalized value */ - private normalizeField(key: string, value: any, path: string): any { + private normalizeField(key: string, value: unknown, path: string): unknown { // First check custom normalizers if (this.config.customNormalizers) { for (const { pattern, normalizer } of this.config.customNormalizers) { @@ -404,7 +182,7 @@ export class AgentSnapshotNormalizer { /** * Compare two objects and generate a difference report */ - compare(expected: any, actual: any): { equal: boolean; diff: string | null } { + compare(expected: unknown, actual: unknown): { equal: boolean; diff: string | null } { const normalizedExpected = this.normalize(expected); const normalizedActual = this.normalize(actual); @@ -416,13 +194,12 @@ export class AgentSnapshotNormalizer { return { equal: true, diff: null }; } - // Generate difference report using our simple differ - const diff = this.differ.diff( - JSON.stringify(normalizedExpected, null, 2), - JSON.stringify(normalizedActual, null, 2), - 'Created Agent Snapshot', - 'Runtime Agent State', - ); + // Generate difference report using snapshot-diff + const diff = snapshotDiff(normalizedExpected, normalizedActual, { + aAnnotation: 'Created Agent Snapshot', + bAnnotation: 'Runtime Agent State', + contextLines: 3, + }); return { equal: false, diff }; } @@ -432,11 +209,10 @@ export class AgentSnapshotNormalizer { */ createSnapshotSerializer() { return { - test(val: any) { + test(val: unknown) { return typeof val === 'object' && val !== null; }, - serialize: (val: any) => { - // Directly return stringified normalized value to avoid printer recursion + serialize: (val: unknown) => { return JSON.stringify(this.normalize(val), null, 2); }, }; diff --git a/multimodal/tarko/agent-snapshot/src/utils/tool-call-tracker.ts b/multimodal/tarko/agent-snapshot/src/utils/tool-call-tracker.ts new file mode 100644 index 0000000000..bab4d98493 --- /dev/null +++ b/multimodal/tarko/agent-snapshot/src/utils/tool-call-tracker.ts @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2025 Bytedance, Inc. and its affiliates. + * SPDX-License-Identifier: Apache-2.0 + */ + +export interface ToolCallData { + toolCallId: string; + name: string; + args: unknown; + result?: unknown; + error?: unknown; + executionTime?: number; +} + +/** + * Shared utility for tracking tool calls across different hook implementations + */ +export class ToolCallTracker { + private toolCallsByLoop: Record = {}; + private startTimeByToolCall: Record = {}; + + initializeLoop(loopNumber: number): void { + if (!this.toolCallsByLoop[loopNumber]) { + this.toolCallsByLoop[loopNumber] = []; + } + } + + startToolCall( + loopNumber: number, + toolCall: { toolCallId: string; name: string }, + args: unknown, + ): void { + this.startTimeByToolCall[toolCall.toolCallId] = Date.now(); + this.initializeLoop(loopNumber); + + this.toolCallsByLoop[loopNumber].push({ + toolCallId: toolCall.toolCallId, + name: toolCall.name, + args, + }); + } + + finishToolCall(loopNumber: number, toolCallId: string, result?: unknown, error?: unknown): void { + const executionTime = Date.now() - (this.startTimeByToolCall[toolCallId] || Date.now()); + const toolCallData = this.findToolCall(loopNumber, toolCallId); + + if (toolCallData) { + toolCallData.result = result; + toolCallData.error = error; + toolCallData.executionTime = executionTime; + } + + delete this.startTimeByToolCall[toolCallId]; + } + + getToolCallsForLoop(loopNumber: number): ToolCallData[] { + return this.toolCallsByLoop[loopNumber] || []; + } + + private findToolCall(loopNumber: number, toolCallId: string): ToolCallData | undefined { + return this.toolCallsByLoop[loopNumber]?.find((tc) => tc.toolCallId === toolCallId); + } + + clear(): void { + this.toolCallsByLoop = {}; + this.startTimeByToolCall = {}; + } +} From e6e4fe0baa96c40fcba1fe34b488a2fdb51ca07e Mon Sep 17 00:00:00 2001 From: chenhaoli Date: Sat, 11 Oct 2025 14:26:03 +0800 Subject: [PATCH 3/6] chore: remove some frequent log --- .../tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts | 2 +- multimodal/tarko/agent-snapshot/src/agent-snapshot-runner.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts b/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts index 796a9abe08..16b6f54e76 100644 --- a/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts +++ b/multimodal/tarko/agent-snapshot/src/agent-replay-snapshot-hook.ts @@ -231,7 +231,7 @@ export class AgentReplaySnapshotHook extends AgentHookBase { if (index < chunks.length) { const chunk = chunks[index]; - logger.info(`Yielding chunk ${index + 1}/${chunks.length}`); + // logger.info(`Yielding chunk ${index + 1}/${chunks.length}`); index++; return { done: false, value: chunk }; } else { diff --git a/multimodal/tarko/agent-snapshot/src/agent-snapshot-runner.ts b/multimodal/tarko/agent-snapshot/src/agent-snapshot-runner.ts index d91da483de..f50f14a6db 100644 --- a/multimodal/tarko/agent-snapshot/src/agent-snapshot-runner.ts +++ b/multimodal/tarko/agent-snapshot/src/agent-snapshot-runner.ts @@ -152,7 +152,7 @@ export class AgentSnapshotRunner { }); const response = await agentSnapshot.replay(runOptions); - console.log(`Snapshot test result for ${exampleConfig.name}:`, response); + // console.log(`Snapshot test result for ${exampleConfig.name}:`, response); return response; } From 9f9970c138577e07af6e8be981144fa09eb95a1e Mon Sep 17 00:00:00 2001 From: chenhaoli Date: Sat, 11 Oct 2025 16:31:28 +0800 Subject: [PATCH 4/6] chore(agent-snapshot): make logger silent --- multimodal/tarko/agent-snapshot/src/utils/logger.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/multimodal/tarko/agent-snapshot/src/utils/logger.ts b/multimodal/tarko/agent-snapshot/src/utils/logger.ts index 1a995c1a2f..8b7294362f 100644 --- a/multimodal/tarko/agent-snapshot/src/utils/logger.ts +++ b/multimodal/tarko/agent-snapshot/src/utils/logger.ts @@ -3,6 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { ConsoleLogger } from '@agent-infra/logger'; +import { ConsoleLogger, LogLevel } from '@agent-infra/logger'; export const logger = new ConsoleLogger('[AgentSnapshot]'); +logger.setLevel(LogLevel.SILENT); From debf493c772e648603c4bf37291fa46b6e3d036e Mon Sep 17 00:00:00 2001 From: chenhaoli Date: Mon, 13 Oct 2025 20:42:01 +0800 Subject: [PATCH 5/6] fix(agent-snapshot): ensure proper stream consumption in replay method - Fix loop count mismatch in streaming mode by fully consuming streams - Track agent_run_end events to ensure complete execution - Add wait time for background processing and cleanup handlers - Improve error handling and logging for streaming responses Resolves issue where AgentSnapshot.replay() didn't wait for complete agent execution in streaming mode --- .../agent-snapshot/src/agent-snapshot.ts | 44 +++++++++++++++---- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts b/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts index cebcf0113c..89ff07030f 100644 --- a/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts +++ b/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts @@ -6,10 +6,7 @@ import path from 'path'; import fs from 'fs'; import { Agent } from '@tarko/agent'; -import { - AgentRunOptions, - AgentEventStream, -} from '@tarko/agent-interface'; +import { AgentRunOptions, AgentEventStream } from '@tarko/agent-interface'; import { AgentSnapshotOptions, SnapshotGenerationResult, @@ -66,7 +63,8 @@ export class AgentSnapshot { const loopDirs = fs .readdirSync(this.snapshotPath) .filter( - (dir) => dir.startsWith('loop-') && fs.statSync(path.join(this.snapshotPath, dir)).isDirectory(), + (dir) => + dir.startsWith('loop-') && fs.statSync(path.join(this.snapshotPath, dir)).isDirectory(), ) .sort((a, b) => { const numA = parseInt(a.split('-')[1], 10); @@ -82,7 +80,7 @@ export class AgentSnapshot { */ async generate(runOptions: AgentRunOptions): Promise { const startTime = Date.now(); - + this.generateHook = new AgentGenerateSnapshotHook(this.agent, { snapshotPath: this.snapshotPath, snapshotName: this.snapshotName, @@ -151,8 +149,8 @@ export class AgentSnapshot { ); logger.info( `Verification settings: LLM requests: ${verification.verifyLLMRequests ? 'enabled' : 'disabled'}, ` + - `Event streams: ${verification.verifyEventStreams ? 'enabled' : 'disabled'}, ` + - `Tool calls: ${verification.verifyToolCalls ? 'enabled' : 'disabled'}`, + `Event streams: ${verification.verifyEventStreams ? 'enabled' : 'disabled'}, ` + + `Tool calls: ${verification.verifyToolCalls ? 'enabled' : 'disabled'}`, ); logger.info(`Found ${loopCount} loops in test case`); @@ -175,6 +173,34 @@ export class AgentSnapshot { const response = await this.agent.run(runOptions as any); + // For streaming responses, consume the entire stream to ensure execution completes + if (response && typeof response[Symbol.asyncIterator] === 'function') { + // This is a streaming response, consume it fully + try { + let agentRunEndReceived = false; + + for await (const chunk of response) { + // Track when we receive the agent_run_end event + if (chunk.type === 'agent_run_end') { + agentRunEndReceived = true; + } + // Just consume the chunks, the actual execution happens in the background + } + + // Ensure we received the agent_run_end event + if (!agentRunEndReceived) { + logger.warn('Stream completed without receiving agent_run_end event'); + } + + // Additional wait to ensure all background processing is complete + // This ensures any final cleanup handlers are executed + await new Promise((resolve) => setTimeout(resolve, 50)); + } catch (streamError) { + logger.error(`Error consuming stream: ${streamError}`); + throw streamError; + } + } + if (this.replayHook.hasError()) { const error = this.replayHook.getLastError(); logger.error(`Error occurred during execution: ${error?.message}`); @@ -251,4 +277,4 @@ export class AgentSnapshot { updateAgentNormalizerConfig(config: AgentNormalizerConfig): void { this.snapshotManager.updateAgentNormalizerConfig(config); } -} \ No newline at end of file +} From d542b75bddcc5c35ed4102e8b86cd2b4e9ec5ba8 Mon Sep 17 00:00:00 2001 From: chenhaoli Date: Mon, 13 Oct 2025 20:48:31 +0800 Subject: [PATCH 6/6] fix(agent-snapshot): resolve TypeScript compilation errors - Use proper type casting for async iterator checks - Add unknown type conversion to satisfy TypeScript compiler - Improve type safety in stream consumption logic Fixes build errors in agent-snapshot package --- multimodal/tarko/agent-snapshot/src/agent-snapshot.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts b/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts index 89ff07030f..26c8676f60 100644 --- a/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts +++ b/multimodal/tarko/agent-snapshot/src/agent-snapshot.ts @@ -174,14 +174,14 @@ export class AgentSnapshot { const response = await this.agent.run(runOptions as any); // For streaming responses, consume the entire stream to ensure execution completes - if (response && typeof response[Symbol.asyncIterator] === 'function') { + if (response && typeof (response as any)[Symbol.asyncIterator] === 'function') { // This is a streaming response, consume it fully try { let agentRunEndReceived = false; - for await (const chunk of response) { + for await (const chunk of response as unknown as AsyncIterable) { // Track when we receive the agent_run_end event - if (chunk.type === 'agent_run_end') { + if (chunk && typeof chunk === 'object' && chunk.type === 'agent_run_end') { agentRunEndReceived = true; } // Just consume the chunks, the actual execution happens in the background