From fc44837917880033137a751b7f834c5d4d01b1f3 Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Tue, 9 Dec 2025 14:29:49 +0200 Subject: [PATCH 01/13] VLM integration via Ollama added --- .env | 1 + docker-compose.yml | 5 + prisma/migrations/migration_lock.toml | 4 +- prisma/schema.prisma | 2 + src/_data_/index.ts | 1 + src/compare/compare.module.ts | 6 +- src/compare/compare.service.ts | 5 + .../libs/image-comparator.interface.ts | 3 +- src/compare/libs/vlm/README.md | 92 ++++++++++ src/compare/libs/vlm/ollama.controller.ts | 50 ++++++ src/compare/libs/vlm/ollama.service.spec.ts | 98 ++++++++++ src/compare/libs/vlm/ollama.service.ts | 63 +++++++ src/compare/libs/vlm/ollama.types.ts | 41 +++++ src/compare/libs/vlm/vlm.service.spec.ts | 154 ++++++++++++++++ src/compare/libs/vlm/vlm.service.ts | 167 ++++++++++++++++++ src/compare/libs/vlm/vlm.types.ts | 19 ++ src/compare/utils/index.ts | 5 + src/test-runs/diffResult.ts | 6 + src/test-runs/dto/testRunResult.dto.ts | 3 + src/test-runs/test-runs.service.ts | 1 + 20 files changed, 722 insertions(+), 4 deletions(-) create mode 100644 src/compare/libs/vlm/README.md create mode 100644 src/compare/libs/vlm/ollama.controller.ts create mode 100644 src/compare/libs/vlm/ollama.service.spec.ts create mode 100644 src/compare/libs/vlm/ollama.service.ts create mode 100644 src/compare/libs/vlm/ollama.types.ts create mode 100644 src/compare/libs/vlm/vlm.service.spec.ts create mode 100644 src/compare/libs/vlm/vlm.service.ts create mode 100644 src/compare/libs/vlm/vlm.types.ts diff --git a/.env b/.env index 7674f0f4..40eab26a 100644 --- a/.env +++ b/.env @@ -37,3 +37,4 @@ AWS_S3_BUCKET_NAME= #SERVER_TIMEOUT=120000 #SERVER_HEADERS_TIMEOUT=60000 #SERVER_KEEP_ALIVE_TIMEOUT=5000 +OLLAMA_BASE_URL=http://localhost:11434 diff --git a/docker-compose.yml b/docker-compose.yml index b99aa76b..7b610a2e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,10 +16,15 @@ services: APP_FRONTEND_URL: ${APP_FRONTEND_URL} BODY_PARSER_JSON_LIMIT: ${BODY_PARSER_JSON_LIMIT} ELASTIC_URL: ${ELASTIC_URL} + # VLM: Uncomment to use Ollama running on host machine + OLLAMA_BASE_URL: http://host.docker.internal:11434 ports: - "${APP_PORT}:3000" expose: - "${APP_PORT}" + # VLM: Uncomment to use Ollama running on host machine + extra_hosts: + - host.docker.internal:host-gateway depends_on: postgres: condition: service_healthy diff --git a/prisma/migrations/migration_lock.toml b/prisma/migrations/migration_lock.toml index fbffa92c..044d57cd 100644 --- a/prisma/migrations/migration_lock.toml +++ b/prisma/migrations/migration_lock.toml @@ -1,3 +1,3 @@ # Please do not edit this file manually -# It should be added in your version-control system (i.e. Git) -provider = "postgresql" \ No newline at end of file +# It should be added in your version-control system (e.g., Git) +provider = "postgresql" diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 014ea95a..040e758f 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -69,6 +69,7 @@ model TestRun { baselineBranchName String? ignoreAreas String @default("[]") tempIgnoreAreas String @default("[]") + vlmDescription String? baseline Baseline? build Build @relation(fields: [buildId], references: [id]) project Project? @relation(fields: [projectId], references: [id]) @@ -138,6 +139,7 @@ enum ImageComparison { pixelmatch lookSame odiff + vlm } enum Role { diff --git a/src/_data_/index.ts b/src/_data_/index.ts index 3c45ad77..ba6ce65a 100644 --- a/src/_data_/index.ts +++ b/src/_data_/index.ts @@ -96,6 +96,7 @@ export const generateTestRun = (testRun?: Partial): TestRun => { baselineBranchName: 'master', branchName: 'develop', merge: false, + vlmDescription: null, ...testRun, }; }; diff --git a/src/compare/compare.module.ts b/src/compare/compare.module.ts index c4c622c8..f7e9a9b8 100644 --- a/src/compare/compare.module.ts +++ b/src/compare/compare.module.ts @@ -3,10 +3,14 @@ import { CompareService } from './compare.service'; import { LookSameService } from './libs/looks-same/looks-same.service'; import { OdiffService } from './libs/odiff/odiff.service'; import { PixelmatchService } from './libs/pixelmatch/pixelmatch.service'; +import { VlmService } from './libs/vlm/vlm.service'; +import { OllamaController } from './libs/vlm/ollama.controller'; +import { OllamaService } from './libs/vlm/ollama.service'; import { StaticModule } from '../static/static.module'; @Module({ - providers: [CompareService, PixelmatchService, LookSameService, OdiffService], + controllers: [OllamaController], + providers: [CompareService, PixelmatchService, LookSameService, OdiffService, VlmService, OllamaService], imports: [StaticModule], exports: [CompareService], }) diff --git a/src/compare/compare.service.ts b/src/compare/compare.service.ts index 6bb144a0..f7895e9f 100644 --- a/src/compare/compare.service.ts +++ b/src/compare/compare.service.ts @@ -7,6 +7,7 @@ import { PrismaService } from '../prisma/prisma.service'; import { DiffResult } from '../test-runs/diffResult'; import { LookSameService } from './libs/looks-same/looks-same.service'; import { OdiffService } from './libs/odiff/odiff.service'; +import { VlmService } from './libs/vlm/vlm.service'; import { isHddStaticServiceConfigured } from '../static/utils'; @Injectable() @@ -17,6 +18,7 @@ export class CompareService { private readonly pixelmatchService: PixelmatchService, private readonly lookSameService: LookSameService, private readonly odiffService: OdiffService, + private readonly vlmService: VlmService, private readonly prismaService: PrismaService ) {} @@ -44,6 +46,9 @@ export class CompareService { return this.odiffService; } + case ImageComparison.vlm: { + return this.vlmService; + } default: { this.logger.warn(`Unknown ImageComparison value: ${imageComparison}. Falling back to pixelmatch.`); return this.pixelmatchService; diff --git a/src/compare/libs/image-comparator.interface.ts b/src/compare/libs/image-comparator.interface.ts index a186199b..96e213f6 100644 --- a/src/compare/libs/image-comparator.interface.ts +++ b/src/compare/libs/image-comparator.interface.ts @@ -3,8 +3,9 @@ import { ImageCompareInput } from './ImageCompareInput'; import { LooksSameConfig } from './looks-same/looks-same.types'; import { OdiffConfig } from './odiff/odiff.types'; import { PixelmatchConfig } from './pixelmatch/pixelmatch.types'; +import { VlmConfig } from './vlm/vlm.types'; -export type ImageCompareConfig = PixelmatchConfig | LooksSameConfig | OdiffConfig; +export type ImageCompareConfig = PixelmatchConfig | LooksSameConfig | OdiffConfig | VlmConfig; export interface ImageComparator { getDiff(data: ImageCompareInput, config: ImageCompareConfig): Promise; diff --git a/src/compare/libs/vlm/README.md b/src/compare/libs/vlm/README.md new file mode 100644 index 00000000..a8095d25 --- /dev/null +++ b/src/compare/libs/vlm/README.md @@ -0,0 +1,92 @@ +# VLM (Vision Language Model) Image Comparison + +AI-powered semantic image comparison using Vision Language Models via Ollama. + +## Quick Start + +### 1. Install & Start Ollama + +```bash +# Install (macOS) +brew install ollama + +# Start Ollama +ollama serve +``` + +### 2. Download a Model + +```bash +# Recommended for accuracy +ollama pull llava:7b + +# Or for speed (smaller, less accurate) +ollama pull moondream +``` + +### 3. Configure Backend + +Add to `.env`: +```bash +OLLAMA_BASE_URL=http://localhost:11434 +``` + +### 4. Use VLM in Project + +Set project's image comparison to `vlm` with config: +```json +{ + "model": "llava:7b", + "temperature": 0.1 +} +``` + +Optional custom prompt: +```json +{ + "model": "llava:7b", + "prompt": "Focus on button colors and text changes", + "temperature": 0.1 +} +``` + +## Recommended Models + +| Model | Size | Speed | Accuracy | Best For | +|-------|------|-------|----------|----------| +| `llava:7b` | 4.7GB | ⚡⚡ | ⭐⭐⭐ | **Recommended** - best balance | +| `llava:13b` | 8GB | ⚡ | ⭐⭐⭐⭐ | Best accuracy | +| `moondream` | 1.7GB | ⚡⚡⚡ | ⭐⭐ | Fast, may hallucinate | +| `minicpm-v` | 5.5GB | ⚡⚡ | ⭐⭐⭐ | Good alternative | + +## Configuration + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `model` | string | `moondream` | Ollama vision model name | +| `prompt` | string | `""` | Custom context prepended to system prompt | +| `temperature` | number | `0.1` | Lower = more consistent results | + +## How It Works + +1. VLM analyzes both images semantically +2. Returns `YES` (pass) or `NO` (fail) based on meaningful differences +3. Ignores technical differences (anti-aliasing, sub-pixel, minor spacing) +4. Provides description of differences found + +## API Endpoints + +```bash +# List available models +GET /ollama/models + +# Compare two images (for testing) +POST /ollama/compare?model=llava:7b&prompt=&temperature=0.1 +``` + +**Example:** +```bash +curl -X POST "http://localhost:3000/ollama/compare?model=llava:7b&prompt=Are%20these%20images%20the%20same&temperature=0.1" \ + -F "images=@baseline.png" \ + -F "images=@comparison.png" +``` diff --git a/src/compare/libs/vlm/ollama.controller.ts b/src/compare/libs/vlm/ollama.controller.ts new file mode 100644 index 00000000..3d9a98dc --- /dev/null +++ b/src/compare/libs/vlm/ollama.controller.ts @@ -0,0 +1,50 @@ +import { Controller, Get, Post, Query, HttpException, HttpStatus, UseInterceptors, UploadedFiles } from '@nestjs/common'; +import { FilesInterceptor } from '@nestjs/platform-express'; +import { ApiTags, ApiConsumes, ApiBody } from '@nestjs/swagger'; +import { OllamaService } from './ollama.service'; + +@ApiTags('Ollama') +@Controller('ollama') +export class OllamaController { + constructor(private readonly ollamaService: OllamaService) {} + + @Get('models') + async listModels() { + return { models: await this.ollamaService.listModels() }; + } + + @Post('compare') + @ApiConsumes('multipart/form-data') + @ApiBody({ + schema: { + type: 'object', + required: ['images'], + properties: { + images: { + type: 'array', + items: { type: 'string', format: 'binary' }, + description: 'Two images to compare (baseline and comparison)', + }, + }, + }, + }) + @UseInterceptors(FilesInterceptor('images', 2)) + async compareImages( + @UploadedFiles() files: Express.Multer.File[], + @Query('model') model: string, + @Query('prompt') prompt: string, + @Query('temperature') temperature: string + ) { + if (files?.length !== 2) { + throw new HttpException('Two images required', HttpStatus.BAD_REQUEST); + } + + return this.ollamaService.generate({ + model, + prompt, + format: 'json', + images: files.map((f) => f.buffer.toString('base64')), + options: { temperature: Number(temperature) }, + }); + } +} diff --git a/src/compare/libs/vlm/ollama.service.spec.ts b/src/compare/libs/vlm/ollama.service.spec.ts new file mode 100644 index 00000000..48abb0e8 --- /dev/null +++ b/src/compare/libs/vlm/ollama.service.spec.ts @@ -0,0 +1,98 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { ConfigService } from '@nestjs/config'; +import { OllamaService } from './ollama.service'; + +describe('OllamaService', () => { + let service: OllamaService; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + providers: [ + OllamaService, + { + provide: ConfigService, + useValue: { + getOrThrow: jest.fn().mockReturnValue('http://localhost:11434'), + }, + }, + ], + }).compile(); + + service = module.get(OllamaService); + }); + + describe('generate', () => { + it('should call Ollama API with correct parameters', async () => { + const mockResponse = { response: 'YES', done: true }; + globalThis.fetch = jest.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve(mockResponse), + }); + + const result = await service.generate({ + model: 'llava', + prompt: 'Test prompt', + images: ['base64img'], + }); + + expect(fetch).toHaveBeenCalledWith( + 'http://localhost:11434/api/generate', + expect.objectContaining({ + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + }) + ); + expect(result).toEqual(mockResponse); + }); + + it('should throw error when API returns non-ok status', async () => { + globalThis.fetch = jest.fn().mockResolvedValue({ + ok: false, + status: 500, + text: () => Promise.resolve('Internal Server Error'), + }); + + await expect( + service.generate({ model: 'llava', prompt: 'Test' }) + ).rejects.toThrow('Ollama API returned status 500'); + }); + + it('should throw error when OLLAMA_BASE_URL is not configured', async () => { + const mockConfigService = { + getOrThrow: jest.fn().mockImplementation(() => { + throw new Error('Configuration key "OLLAMA_BASE_URL" does not exist'); + }), + } as any; + const newService = new OllamaService(mockConfigService); + + await expect( + newService.generate({ model: 'llava', prompt: 'Test' }) + ).rejects.toThrow('OLLAMA_BASE_URL'); + }); + }); + + describe('listModels', () => { + it('should return list of models', async () => { + const mockModels = { models: [{ name: 'llava:7b' }, { name: 'moondream' }] }; + globalThis.fetch = jest.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve(mockModels), + }); + + const result = await service.listModels(); + + expect(fetch).toHaveBeenCalledWith('http://localhost:11434/api/tags'); + expect(result).toEqual(mockModels.models); + }); + + it('should throw error when API fails', async () => { + globalThis.fetch = jest.fn().mockResolvedValue({ + ok: false, + status: 503, + text: () => Promise.resolve('Service Unavailable'), + }); + + await expect(service.listModels()).rejects.toThrow('Failed to list models'); + }); + }); +}); diff --git a/src/compare/libs/vlm/ollama.service.ts b/src/compare/libs/vlm/ollama.service.ts new file mode 100644 index 00000000..c585ee7b --- /dev/null +++ b/src/compare/libs/vlm/ollama.service.ts @@ -0,0 +1,63 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; +import { + OllamaGenerateRequest, + OllamaGenerateResponse, + OllamaModel, + OllamaModelsResponse, +} from './ollama.types'; + +@Injectable() +export class OllamaService { + private readonly logger: Logger = new Logger(OllamaService.name); + private baseUrl: string | null = null; + + constructor(private readonly configService: ConfigService) {} + + private getBaseUrl(): string { + if (!this.baseUrl) { + this.baseUrl = this.configService.getOrThrow('OLLAMA_BASE_URL'); + } + return this.baseUrl; + } + + async generate(request: OllamaGenerateRequest): Promise { + const baseUrl = this.getBaseUrl(); + try { + const response = await fetch(`${baseUrl}/api/generate`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ ...request, stream: request.stream ?? false }), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`Ollama API returned status ${response.status}: ${errorText}`); + } + + return await response.json(); + } catch (error) { + this.logger.error(`Ollama generate request failed: ${error.message}`); + throw error; + } + } + + async listModels(): Promise { + const baseUrl = this.getBaseUrl(); + try { + const response = await fetch(`${baseUrl}/api/tags`); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`Failed to list models: ${response.status} ${errorText}`); + } + + const data: OllamaModelsResponse = await response.json(); + return data.models; + } catch (error) { + this.logger.error(`Failed to list models: ${error.message}`); + throw error; + } + } +} + diff --git a/src/compare/libs/vlm/ollama.types.ts b/src/compare/libs/vlm/ollama.types.ts new file mode 100644 index 00000000..505bab2e --- /dev/null +++ b/src/compare/libs/vlm/ollama.types.ts @@ -0,0 +1,41 @@ +export interface OllamaGenerateRequest { + model: string; + prompt: string; + images?: string[]; + stream?: boolean; + format?: 'json'; + options?: { + temperature?: number; + top_k?: number; + top_p?: number; + }; +} + +export interface VlmComparisonResult { + identical: boolean; + description: string; +} + +export interface OllamaGenerateResponse { + model: string; + created_at: string; + response: string; + thinking?: string; + done: boolean; + context?: number[]; + total_duration?: number; + load_duration?: number; + prompt_eval_count?: number; +} + +export interface OllamaModel { + name: string; + size?: number; + digest?: string; + modified_at?: string; +} + +export interface OllamaModelsResponse { + models: OllamaModel[]; +} + diff --git a/src/compare/libs/vlm/vlm.service.spec.ts b/src/compare/libs/vlm/vlm.service.spec.ts new file mode 100644 index 00000000..ddb5b8ea --- /dev/null +++ b/src/compare/libs/vlm/vlm.service.spec.ts @@ -0,0 +1,154 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { TestStatus } from '@prisma/client'; +import { PNG } from 'pngjs'; +import { StaticService } from '../../../static/static.service'; +import { NO_BASELINE_RESULT } from '../consts'; +import { DEFAULT_CONFIG, VlmService } from './vlm.service'; +import { OllamaService } from './ollama.service'; + +const initService = async ({ + getImageMock = jest.fn(), + saveImageMock = jest.fn(), + ollamaGenerateMock = jest.fn(), +}) => { + const module: TestingModule = await Test.createTestingModule({ + providers: [ + VlmService, + { + provide: StaticService, + useValue: { + getImage: getImageMock, + saveImage: saveImageMock, + }, + }, + { + provide: OllamaService, + useValue: { + generate: ollamaGenerateMock, + }, + }, + ], + }).compile(); + + return module.get(VlmService); +}; + +describe('VlmService', () => { + const image = new PNG({ width: 20, height: 20 }); + + it('should return NO_BASELINE_RESULT when baseline is missing', async () => { + const getImageMock = jest.fn().mockReturnValueOnce(undefined).mockReturnValueOnce(image); + const service = await initService({ getImageMock }); + + const result = await service.getDiff( + { baseline: null, image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: true }, + DEFAULT_CONFIG + ); + + expect(result).toStrictEqual(NO_BASELINE_RESULT); + }); + + it('should return ok status when VLM returns identical=true in JSON', async () => { + const getImageMock = jest.fn().mockReturnValue(image); + const ollamaGenerateMock = jest.fn().mockResolvedValue({ + response: '{"identical": true, "description": "Screenshots are visually identical."}', + done: true, + }); + const service = await initService({ getImageMock, ollamaGenerateMock }); + + const result = await service.getDiff( + { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, + DEFAULT_CONFIG + ); + + expect(result.status).toBe(TestStatus.ok); + expect(result.vlmDescription).toBe('Screenshots are visually identical.'); + expect(result.pixelMisMatchCount).toBe(0); + expect(result.diffPercent).toBe(0); + }); + + it('should return unresolved when VLM returns identical=false in JSON', async () => { + const getImageMock = jest.fn().mockReturnValue(image); + const saveImageMock = jest.fn().mockResolvedValue('diff.png'); + const ollamaGenerateMock = jest.fn().mockResolvedValue({ + response: '{"identical": false, "description": "Button text changed from Submit to Send."}', + done: true, + }); + const service = await initService({ getImageMock, saveImageMock, ollamaGenerateMock }); + + const result = await service.getDiff( + { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: true }, + DEFAULT_CONFIG + ); + + expect(result.status).toBe(TestStatus.unresolved); + expect(result.vlmDescription).toBe('Button text changed from Submit to Send.'); + expect(result.diffName).toBe('diff.png'); + expect(result.pixelMisMatchCount).toBeDefined(); + expect(result.diffPercent).toBeDefined(); + }); + + it('should handle invalid JSON response as error', async () => { + const getImageMock = jest.fn().mockReturnValue(image); + const ollamaGenerateMock = jest.fn().mockResolvedValue({ + response: 'Invalid JSON response from model', + done: true, + }); + const service = await initService({ getImageMock, ollamaGenerateMock }); + + const result = await service.getDiff( + { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, + DEFAULT_CONFIG + ); + + expect(result.status).toBe(TestStatus.unresolved); + expect(result.vlmDescription).toContain('VLM analysis failed'); + }); + + it('should use custom model and temperature from config', async () => { + const getImageMock = jest.fn().mockReturnValue(image); + const ollamaGenerateMock = jest.fn().mockResolvedValue({ + response: '{"identical": true, "description": "No differences."}', + done: true, + }); + const service = await initService({ getImageMock, ollamaGenerateMock }); + + await service.getDiff( + { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, + { model: 'llava:13b', prompt: 'Custom context', temperature: 0.2 } + ); + + expect(ollamaGenerateMock).toHaveBeenCalledWith({ + model: 'llava:13b', + prompt: expect.stringContaining('Custom context'), + images: expect.any(Array), + format: 'json', + options: { temperature: 0.2 }, + }); + }); + + it('should handle API errors gracefully', async () => { + const getImageMock = jest.fn().mockReturnValue(image); + const ollamaGenerateMock = jest.fn().mockRejectedValue(new Error('Connection refused')); + const service = await initService({ getImageMock, ollamaGenerateMock }); + + const result = await service.getDiff( + { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, + DEFAULT_CONFIG + ); + + expect(result.status).toBe(TestStatus.unresolved); + expect(result.vlmDescription).toContain('VLM analysis failed'); + expect(result.pixelMisMatchCount).toBe(0); + expect(result.diffName).toBeNull(); + }); + + it('should parse config with defaults for invalid input', async () => { + const service = await initService({}); + + expect(service.parseConfig('')).toStrictEqual(DEFAULT_CONFIG); + expect(service.parseConfig('invalid')).toStrictEqual(DEFAULT_CONFIG); + expect(service.parseConfig('{"model":"llava:7b"}').model).toBe('llava:7b'); + expect(service.parseConfig('{"model":"llava:7b"}').prompt).toBe(DEFAULT_CONFIG.prompt); + }); +}); diff --git a/src/compare/libs/vlm/vlm.service.ts b/src/compare/libs/vlm/vlm.service.ts new file mode 100644 index 00000000..b999aae0 --- /dev/null +++ b/src/compare/libs/vlm/vlm.service.ts @@ -0,0 +1,167 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { TestStatus } from '@prisma/client'; +import Pixelmatch from 'pixelmatch'; +import { PNG } from 'pngjs'; +import { StaticService } from '../../../static/static.service'; +import { DiffResult } from '../../../test-runs/diffResult'; +import { parseConfig, pngToBase64, scaleImageToSize } from '../../utils'; +import { NO_BASELINE_RESULT } from '../consts'; +import { ImageComparator } from '../image-comparator.interface'; +import { ImageCompareInput } from '../ImageCompareInput'; +import { VlmConfig } from './vlm.types'; +import { OllamaService } from './ollama.service'; +import { VlmComparisonResult } from './ollama.types'; + +export const SYSTEM_PROMPT = `Compare two UI screenshots for visual regression testing. + +CHECK for differences: +- Data: text, numbers, counts, values +- Elements: missing, added, or moved components +- State: selected, disabled, expanded, checked +- Structure: row/column count, list items, tabs + +IGNORE rendering artifacts: anti-aliasing, shadows, 1-2px shifts.`; + +// Internal constant - not exposed to user config to ensure consistent JSON output +const JSON_FORMAT_INSTRUCTION = ` +Respond with JSON: {"identical": true/false, "description": "explanation"} +- Set "identical": true if screenshots match or have only ignorable artifacts +- Set "identical": false if meaningful differences exist +- Always provide a brief description`; + +export const DEFAULT_CONFIG: VlmConfig = { + model: 'llava:7b', + prompt: SYSTEM_PROMPT, + temperature: 0.1, +}; + +@Injectable() +export class VlmService implements ImageComparator { + private readonly logger: Logger = new Logger(VlmService.name); + + constructor( + private readonly staticService: StaticService, + private readonly ollamaService: OllamaService + ) {} + + parseConfig(configJson: string): VlmConfig { + return parseConfig(configJson, DEFAULT_CONFIG, this.logger); + } + + async getDiff(data: ImageCompareInput, config: VlmConfig): Promise { + const result: DiffResult = { + ...NO_BASELINE_RESULT, + }; + + const baseline = await this.staticService.getImage(data.baseline); + const image = await this.staticService.getImage(data.image); + + if (!baseline || !image) { + return NO_BASELINE_RESULT; + } + + result.isSameDimension = baseline.width === image.width && baseline.height === image.height; + + try { + const baselineBase64 = pngToBase64(baseline); + const imageBase64 = pngToBase64(image); + const { pass, description } = await this.compareImagesWithVLM(baselineBase64, imageBase64, config); + result.vlmDescription = description; + + if (pass) { + result.status = TestStatus.ok; + result.pixelMisMatchCount = 0; + result.diffPercent = 0; + result.diffName = null; + } else { + result.status = TestStatus.unresolved; + const pixelDiff = this.calculatePixelDiff(baseline, image); + result.pixelMisMatchCount = pixelDiff.pixelMisMatchCount; + result.diffPercent = pixelDiff.diffPercent; + result.diffName = data.saveDiffAsFile ? await this.saveDiffImage(baseline, image) : null; + } + } catch (error) { + this.logger.error(`VLM comparison failed: ${error.message}`, error.stack); + result.status = TestStatus.unresolved; + result.pixelMisMatchCount = 0; + result.diffPercent = 0; + result.diffName = null; + result.vlmDescription = `VLM analysis failed: ${error.message}`; + } + + return result; + } + + private async compareImagesWithVLM( + baselineBase64: string, + imageBase64: string, + config: VlmConfig + ): Promise<{ pass: boolean; description: string }> { + const data = await this.ollamaService.generate({ + model: config.model, + prompt: `${config.prompt}\n${JSON_FORMAT_INSTRUCTION}`, + images: [baselineBase64, imageBase64], + format: 'json', + options: { + temperature: config.temperature, + }, + }); + + // Some models return result in thinking field instead of response + const content = data.response || data.thinking; + this.logger.debug(`VLM Response: ${content}`); + + if (!content) { + throw new Error('Empty response from model'); + } + + return this.parseVlmResponse(content); + } + + private parseVlmResponse(response: string): { pass: boolean; description: string } { + const parsed = JSON.parse(response) as VlmComparisonResult; + + if (typeof parsed.identical !== 'boolean') { + throw new TypeError('Missing or invalid "identical" field'); + } + + return { + pass: parsed.identical, + description: parsed.description || 'No description provided', + }; + } + + private calculatePixelDiff(baseline: PNG, image: PNG): { pixelMisMatchCount: number; diffPercent: number } { + const maxWidth = Math.max(baseline.width, image.width); + const maxHeight = Math.max(baseline.height, image.height); + const scaledBaseline = scaleImageToSize(baseline, maxWidth, maxHeight); + const scaledImage = scaleImageToSize(image, maxWidth, maxHeight); + + const diff = new PNG({ width: maxWidth, height: maxHeight }); + const pixelMisMatchCount = Pixelmatch(scaledBaseline.data, scaledImage.data, diff.data, maxWidth, maxHeight, { + threshold: 0.1, + includeAA: true, + }); + + const diffPercent = Number(((pixelMisMatchCount * 100) / (maxWidth * maxHeight)).toFixed(2)); + this.logger.debug(`Pixelmatch: ${pixelMisMatchCount} pixels (${diffPercent}%)`); + + return { pixelMisMatchCount, diffPercent }; + } + + private async saveDiffImage(baseline: PNG, image: PNG): Promise { + const maxWidth = Math.max(baseline.width, image.width); + const maxHeight = Math.max(baseline.height, image.height); + const scaledBaseline = scaleImageToSize(baseline, maxWidth, maxHeight); + const scaledImage = scaleImageToSize(image, maxWidth, maxHeight); + + const diff = new PNG({ width: maxWidth, height: maxHeight }); + Pixelmatch(scaledBaseline.data, scaledImage.data, diff.data, maxWidth, maxHeight, { + threshold: 0.1, + includeAA: true, + }); + + const diffBuffer = PNG.sync.write(diff); + return this.staticService.saveImage('diff', diffBuffer); + } +} diff --git a/src/compare/libs/vlm/vlm.types.ts b/src/compare/libs/vlm/vlm.types.ts new file mode 100644 index 00000000..318cf43b --- /dev/null +++ b/src/compare/libs/vlm/vlm.types.ts @@ -0,0 +1,19 @@ +export interface VlmConfig { + /** + * Ollama vision model to use for image comparison. + * @default "llava:7b" + */ + model: string; + + /** + * Custom prompt for image comparison. + */ + prompt: string; + + /** + * Temperature parameter controlling response randomness (0.0-1.0). + * Lower values = more consistent results. + * @default 0.1 + */ + temperature: number; +} diff --git a/src/compare/utils/index.ts b/src/compare/utils/index.ts index 79e593b1..ad0c3666 100644 --- a/src/compare/utils/index.ts +++ b/src/compare/utils/index.ts @@ -2,6 +2,11 @@ import { Logger } from '@nestjs/common'; import { PNG } from 'pngjs'; import { IgnoreAreaDto } from 'src/test-runs/dto/ignore-area.dto'; +export function pngToBase64(png: PNG): string { + const buffer = PNG.sync.write(png); + return buffer.toString('base64'); +} + export function scaleImageToSize(image: PNG, width: number, height: number): PNG { if (width > image.width || height > image.height) { const preparedImage = new PNG({ width, height, fill: true }); diff --git a/src/test-runs/diffResult.ts b/src/test-runs/diffResult.ts index 66b7462c..b4219595 100644 --- a/src/test-runs/diffResult.ts +++ b/src/test-runs/diffResult.ts @@ -6,4 +6,10 @@ export interface DiffResult { pixelMisMatchCount: number; diffPercent: number; isSameDimension: boolean; + /** + * Optional array of analysis insights (e.g., from VLM or other AI services) + * Each string represents a distinct observation or difference + * Can be displayed as bullet points in UI + */ + vlmDescription?: string; } diff --git a/src/test-runs/dto/testRunResult.dto.ts b/src/test-runs/dto/testRunResult.dto.ts index a4364424..bde5cd04 100644 --- a/src/test-runs/dto/testRunResult.dto.ts +++ b/src/test-runs/dto/testRunResult.dto.ts @@ -5,6 +5,8 @@ import { TestRunDto } from './testRun.dto'; export class TestRunResultDto extends TestRunDto { @ApiPropertyOptional() pixelMisMatchCount?: number; + @ApiPropertyOptional() + vlmDescription?: string; @ApiProperty() url: string; @ApiProperty() @@ -14,6 +16,7 @@ export class TestRunResultDto extends TestRunDto { super(testRun); this.baselineName = testVariation.baselineName; this.pixelMisMatchCount = testRun.pixelMisMatchCount; + this.vlmDescription = testRun.vlmDescription; this.url = `${process.env.APP_FRONTEND_URL}/${testVariation.projectId}?buildId=${testRun.buildId}&testId=${testRun.id}`; } } diff --git a/src/test-runs/test-runs.service.ts b/src/test-runs/test-runs.service.ts index 40e7a254..ce3dddc6 100644 --- a/src/test-runs/test-runs.service.ts +++ b/src/test-runs/test-runs.service.ts @@ -173,6 +173,7 @@ export class TestRunsService { pixelMisMatchCount: diffResult && diffResult.pixelMisMatchCount, diffPercent: diffResult && diffResult.diffPercent, status: diffResult ? diffResult.status : TestStatus.new, + vlmDescription: diffResult?.vlmDescription, }, }) .then((testRun) => { From a9d6da696ce7f6bb62a36639f2a7004be67f8a7c Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Tue, 9 Dec 2025 14:36:23 +0200 Subject: [PATCH 02/13] test --- docker-compose.yml | 2 -- src/compare/libs/vlm/README.md | 34 ++++++++++++--------- src/compare/libs/vlm/ollama.controller.ts | 11 ++++++- src/compare/libs/vlm/ollama.service.spec.ts | 10 +++--- src/compare/libs/vlm/ollama.service.ts | 8 +---- src/compare/libs/vlm/ollama.types.ts | 1 - src/compare/libs/vlm/vlm.service.spec.ts | 6 +--- 7 files changed, 35 insertions(+), 37 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 7b610a2e..65bb9bf7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,13 +16,11 @@ services: APP_FRONTEND_URL: ${APP_FRONTEND_URL} BODY_PARSER_JSON_LIMIT: ${BODY_PARSER_JSON_LIMIT} ELASTIC_URL: ${ELASTIC_URL} - # VLM: Uncomment to use Ollama running on host machine OLLAMA_BASE_URL: http://host.docker.internal:11434 ports: - "${APP_PORT}:3000" expose: - "${APP_PORT}" - # VLM: Uncomment to use Ollama running on host machine extra_hosts: - host.docker.internal:host-gateway depends_on: diff --git a/src/compare/libs/vlm/README.md b/src/compare/libs/vlm/README.md index a8095d25..fa72431b 100644 --- a/src/compare/libs/vlm/README.md +++ b/src/compare/libs/vlm/README.md @@ -41,7 +41,7 @@ Set project's image comparison to `vlm` with config: } ``` -Optional custom prompt: +Optional custom prompt (replaces default system prompt): ```json { "model": "llava:7b", @@ -50,11 +50,15 @@ Optional custom prompt: } ``` +**Note:** The `prompt` field replaces the entire system prompt. If omitted, a default system prompt is used that focuses on semantic differences while ignoring rendering artifacts. + ## Recommended Models | Model | Size | Speed | Accuracy | Best For | |-------|------|-------|----------|----------| -| `llava:7b` | 4.7GB | ⚡⚡ | ⭐⭐⭐ | **Recommended** - best balance | +| `llava:7b` | 4.7GB | ⚡⚡ | ⭐⭐⭐ | **Recommended** - best balance (minimal) | +| `qwen3-vl:8b` | ~8GB | ⚡⚡ | ⭐⭐⭐ | Minimal model option | +| `gemma3:latest` | ~ | ⚡⚡ | ⭐⭐⭐ | Minimal model option | | `llava:13b` | 8GB | ⚡ | ⭐⭐⭐⭐ | Best accuracy | | `moondream` | 1.7GB | ⚡⚡⚡ | ⭐⭐ | Fast, may hallucinate | | `minicpm-v` | 5.5GB | ⚡⚡ | ⭐⭐⭐ | Good alternative | @@ -63,16 +67,23 @@ Optional custom prompt: | Option | Type | Default | Description | |--------|------|---------|-------------| -| `model` | string | `moondream` | Ollama vision model name | -| `prompt` | string | `""` | Custom context prepended to system prompt | -| `temperature` | number | `0.1` | Lower = more consistent results | +| `model` | string | `llava:7b` | Ollama vision model name | +| `prompt` | string | System prompt (see below) | Custom prompt for image comparison | +| `temperature` | number | `0.1` | Lower = more consistent results (0.0-1.0) | ## How It Works 1. VLM analyzes both images semantically -2. Returns `YES` (pass) or `NO` (fail) based on meaningful differences -3. Ignores technical differences (anti-aliasing, sub-pixel, minor spacing) -4. Provides description of differences found +2. Returns JSON with `{"identical": true/false, "description": "..."}` +3. `identical: true` = images match (pass), `identical: false` = differences found (fail) +4. Ignores technical differences (anti-aliasing, shadows, 1-2px shifts) +5. Provides description of differences found + +### Default System Prompt + +The default prompt instructs the model to: +- **CHECK** for: data changes, missing/added elements, state changes, structural differences +- **IGNORE**: rendering artifacts, anti-aliasing, shadows, minor pixel shifts ## API Endpoints @@ -83,10 +94,3 @@ GET /ollama/models # Compare two images (for testing) POST /ollama/compare?model=llava:7b&prompt=&temperature=0.1 ``` - -**Example:** -```bash -curl -X POST "http://localhost:3000/ollama/compare?model=llava:7b&prompt=Are%20these%20images%20the%20same&temperature=0.1" \ - -F "images=@baseline.png" \ - -F "images=@comparison.png" -``` diff --git a/src/compare/libs/vlm/ollama.controller.ts b/src/compare/libs/vlm/ollama.controller.ts index 3d9a98dc..3ede8190 100644 --- a/src/compare/libs/vlm/ollama.controller.ts +++ b/src/compare/libs/vlm/ollama.controller.ts @@ -1,4 +1,13 @@ -import { Controller, Get, Post, Query, HttpException, HttpStatus, UseInterceptors, UploadedFiles } from '@nestjs/common'; +import { + Controller, + Get, + Post, + Query, + HttpException, + HttpStatus, + UseInterceptors, + UploadedFiles, +} from '@nestjs/common'; import { FilesInterceptor } from '@nestjs/platform-express'; import { ApiTags, ApiConsumes, ApiBody } from '@nestjs/swagger'; import { OllamaService } from './ollama.service'; diff --git a/src/compare/libs/vlm/ollama.service.spec.ts b/src/compare/libs/vlm/ollama.service.spec.ts index 48abb0e8..aed8e63d 100644 --- a/src/compare/libs/vlm/ollama.service.spec.ts +++ b/src/compare/libs/vlm/ollama.service.spec.ts @@ -52,9 +52,9 @@ describe('OllamaService', () => { text: () => Promise.resolve('Internal Server Error'), }); - await expect( - service.generate({ model: 'llava', prompt: 'Test' }) - ).rejects.toThrow('Ollama API returned status 500'); + await expect(service.generate({ model: 'llava', prompt: 'Test' })).rejects.toThrow( + 'Ollama API returned status 500' + ); }); it('should throw error when OLLAMA_BASE_URL is not configured', async () => { @@ -65,9 +65,7 @@ describe('OllamaService', () => { } as any; const newService = new OllamaService(mockConfigService); - await expect( - newService.generate({ model: 'llava', prompt: 'Test' }) - ).rejects.toThrow('OLLAMA_BASE_URL'); + await expect(newService.generate({ model: 'llava', prompt: 'Test' })).rejects.toThrow('OLLAMA_BASE_URL'); }); }); diff --git a/src/compare/libs/vlm/ollama.service.ts b/src/compare/libs/vlm/ollama.service.ts index c585ee7b..2d6629d8 100644 --- a/src/compare/libs/vlm/ollama.service.ts +++ b/src/compare/libs/vlm/ollama.service.ts @@ -1,11 +1,6 @@ import { Injectable, Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; -import { - OllamaGenerateRequest, - OllamaGenerateResponse, - OllamaModel, - OllamaModelsResponse, -} from './ollama.types'; +import { OllamaGenerateRequest, OllamaGenerateResponse, OllamaModel, OllamaModelsResponse } from './ollama.types'; @Injectable() export class OllamaService { @@ -60,4 +55,3 @@ export class OllamaService { } } } - diff --git a/src/compare/libs/vlm/ollama.types.ts b/src/compare/libs/vlm/ollama.types.ts index 505bab2e..2f90ee78 100644 --- a/src/compare/libs/vlm/ollama.types.ts +++ b/src/compare/libs/vlm/ollama.types.ts @@ -38,4 +38,3 @@ export interface OllamaModel { export interface OllamaModelsResponse { models: OllamaModel[]; } - diff --git a/src/compare/libs/vlm/vlm.service.spec.ts b/src/compare/libs/vlm/vlm.service.spec.ts index ddb5b8ea..5a24f96c 100644 --- a/src/compare/libs/vlm/vlm.service.spec.ts +++ b/src/compare/libs/vlm/vlm.service.spec.ts @@ -6,11 +6,7 @@ import { NO_BASELINE_RESULT } from '../consts'; import { DEFAULT_CONFIG, VlmService } from './vlm.service'; import { OllamaService } from './ollama.service'; -const initService = async ({ - getImageMock = jest.fn(), - saveImageMock = jest.fn(), - ollamaGenerateMock = jest.fn(), -}) => { +const initService = async ({ getImageMock = jest.fn(), saveImageMock = jest.fn(), ollamaGenerateMock = jest.fn() }) => { const module: TestingModule = await Test.createTestingModule({ providers: [ VlmService, From 016d7f58c46da0a2a92dc4348747f4e52de83afd Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Tue, 9 Dec 2025 14:54:04 +0200 Subject: [PATCH 03/13] test --- src/compare/compare.service.spec.ts | 19 ++++++++++++++++++- src/compare/libs/vlm/vlm.service.spec.ts | 21 +++++++++++++++------ 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/compare/compare.service.spec.ts b/src/compare/compare.service.spec.ts index 9940b176..3ba6f821 100644 --- a/src/compare/compare.service.spec.ts +++ b/src/compare/compare.service.spec.ts @@ -1,9 +1,12 @@ import { Test, TestingModule } from '@nestjs/testing'; +import { ConfigService } from '@nestjs/config'; import { PrismaService } from '../prisma/prisma.service'; import { CompareService } from './compare.service'; import { LookSameService } from './libs/looks-same/looks-same.service'; import { OdiffService } from './libs/odiff/odiff.service'; import { PixelmatchService } from './libs/pixelmatch/pixelmatch.service'; +import { VlmService } from './libs/vlm/vlm.service'; +import { OllamaService } from './libs/vlm/ollama.service'; import { StaticModule } from '../static/static.module'; import { ImageComparison } from '@prisma/client'; import * as utils from '../static/utils'; @@ -16,7 +19,21 @@ describe('CompareService', () => { beforeEach(async () => { const module: TestingModule = await Test.createTestingModule({ - providers: [CompareService, OdiffService, PixelmatchService, LookSameService, PrismaService], + providers: [ + CompareService, + OdiffService, + PixelmatchService, + LookSameService, + VlmService, + OllamaService, + PrismaService, + { + provide: ConfigService, + useValue: { + getOrThrow: jest.fn().mockReturnValue('http://localhost:11434'), + }, + }, + ], imports: [StaticModule], }).compile(); diff --git a/src/compare/libs/vlm/vlm.service.spec.ts b/src/compare/libs/vlm/vlm.service.spec.ts index 5a24f96c..c6e54fca 100644 --- a/src/compare/libs/vlm/vlm.service.spec.ts +++ b/src/compare/libs/vlm/vlm.service.spec.ts @@ -139,12 +139,21 @@ describe('VlmService', () => { expect(result.diffName).toBeNull(); }); - it('should parse config with defaults for invalid input', async () => { + it.each([ + ['empty string', '', DEFAULT_CONFIG], + ['invalid JSON', 'invalid', DEFAULT_CONFIG], + ['partial config', '{"model":"llava:7b"}', { model: 'llava:7b' }], + [ + 'full config', + '{"model":"llava:13b","prompt":"Custom prompt","temperature":0.2}', + { + model: 'llava:13b', + prompt: 'Custom prompt', + temperature: 0.2, + }, + ], + ])('should parse config: %s', async (_, configJson, expected) => { const service = await initService({}); - - expect(service.parseConfig('')).toStrictEqual(DEFAULT_CONFIG); - expect(service.parseConfig('invalid')).toStrictEqual(DEFAULT_CONFIG); - expect(service.parseConfig('{"model":"llava:7b"}').model).toBe('llava:7b'); - expect(service.parseConfig('{"model":"llava:7b"}').prompt).toBe(DEFAULT_CONFIG.prompt); + expect(service.parseConfig(configJson)).toEqual(expected); }); }); From 136feea1fabdbe45c392173b4704b7bb607fc95d Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Tue, 9 Dec 2025 16:46:20 +0200 Subject: [PATCH 04/13] test --- .github/workflows/workflow.yml | 10 ++++++++-- prisma/Dockerfile | 4 +--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 43f7bbc0..72659fba 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -27,8 +27,14 @@ jobs: - name: Unit tests run: npm run test:cov - - name: Build and run containers - run: docker compose up --build -d + - name: Start postgres + run: docker compose up postgres -d + + - name: Run migrations + run: docker compose up --build migration + + - name: Start remaining containers + run: docker compose up api -d - name: Run acceptance tests run: npm run test:acceptance diff --git a/prisma/Dockerfile b/prisma/Dockerfile index f9ca9969..cbf33d39 100644 --- a/prisma/Dockerfile +++ b/prisma/Dockerfile @@ -12,6 +12,4 @@ RUN npm ci --verbose RUN chmod +x /app/wait-for-it.sh RUN chmod +x /app/entrypoint.sh -ENTRYPOINT ["/app/entrypoint.sh"] - -CMD ["sh"] \ No newline at end of file +ENTRYPOINT ["/app/entrypoint.sh"] \ No newline at end of file From 23ec94130e7fd6153f28ce0d7286f72909e49bec Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Tue, 9 Dec 2025 17:59:14 +0200 Subject: [PATCH 05/13] test --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 72659fba..9dfe0c04 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -31,7 +31,7 @@ jobs: run: docker compose up postgres -d - name: Run migrations - run: docker compose up --build migration + run: docker compose build migration && docker compose run --rm migration - name: Start remaining containers run: docker compose up api -d From ba8914782ce30abea31544210b9bf1736e3691af Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Tue, 9 Dec 2025 18:12:16 +0200 Subject: [PATCH 06/13] test --- .github/workflows/workflow.yml | 10 ++-------- .../20251209181002_add_vlm_support/migration.sql | 6 ++++++ prisma/schema.prisma | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) create mode 100644 prisma/migrations/20251209181002_add_vlm_support/migration.sql diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 9dfe0c04..43f7bbc0 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -27,14 +27,8 @@ jobs: - name: Unit tests run: npm run test:cov - - name: Start postgres - run: docker compose up postgres -d - - - name: Run migrations - run: docker compose build migration && docker compose run --rm migration - - - name: Start remaining containers - run: docker compose up api -d + - name: Build and run containers + run: docker compose up --build -d - name: Run acceptance tests run: npm run test:acceptance diff --git a/prisma/migrations/20251209181002_add_vlm_support/migration.sql b/prisma/migrations/20251209181002_add_vlm_support/migration.sql new file mode 100644 index 00000000..bed01119 --- /dev/null +++ b/prisma/migrations/20251209181002_add_vlm_support/migration.sql @@ -0,0 +1,6 @@ +-- AlterEnum +ALTER TYPE "ImageComparison" ADD VALUE 'vlm'; + +-- AlterTable +ALTER TABLE "TestRun" ADD COLUMN "vlmDescription" TEXT; + diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 040e758f..93f0cda9 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -69,7 +69,7 @@ model TestRun { baselineBranchName String? ignoreAreas String @default("[]") tempIgnoreAreas String @default("[]") - vlmDescription String? + vlmDescription String? @default(null) baseline Baseline? build Build @relation(fields: [buildId], references: [id]) project Project? @relation(fields: [projectId], references: [id]) From d697385ba260430092570953fc5569c1c41e1786 Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Tue, 9 Dec 2025 18:30:33 +0200 Subject: [PATCH 07/13] test --- prisma/schema.prisma | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 93f0cda9..040e758f 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -69,7 +69,7 @@ model TestRun { baselineBranchName String? ignoreAreas String @default("[]") tempIgnoreAreas String @default("[]") - vlmDescription String? @default(null) + vlmDescription String? baseline Baseline? build Build @relation(fields: [buildId], references: [id]) project Project? @relation(fields: [projectId], references: [id]) From ea9004fd1f5b944badf39055c19c3b9413f26e48 Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Tue, 9 Dec 2025 18:31:07 +0200 Subject: [PATCH 08/13] Update workflow.yml --- .github/workflows/workflow.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 43f7bbc0..30de2cb7 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -34,6 +34,7 @@ jobs: run: npm run test:acceptance - name: Run e2e tests + timeout-minutes: 1 run: npm run test:e2e - name: Dump docker logs on failure From f2280b9254c556cc0758a0222912c9c61a066d14 Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Tue, 9 Dec 2025 18:39:28 +0200 Subject: [PATCH 09/13] Update workflow.yml --- .github/workflows/workflow.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 30de2cb7..e3977402 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -33,8 +33,10 @@ jobs: - name: Run acceptance tests run: npm run test:acceptance + - name: Wait for services to be ready with migrations + run: sleep 30 + - name: Run e2e tests - timeout-minutes: 1 run: npm run test:e2e - name: Dump docker logs on failure From 187278384b2a993c9a8cbf58e09a16ea2927929b Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Wed, 17 Dec 2025 18:33:27 +0200 Subject: [PATCH 10/13] test --- src/compare/libs/vlm/README.md | 1 - src/compare/libs/vlm/vlm.service.spec.ts | 12 ++--- src/compare/libs/vlm/vlm.service.ts | 65 +++++++----------------- src/test-runs/dto/testRun.dto.spec.ts | 39 ++++++++++++++ src/test-runs/dto/testRun.dto.ts | 3 ++ src/test-runs/test-runs.service.spec.ts | 7 ++- src/test-runs/test-runs.service.ts | 2 +- 7 files changed, 73 insertions(+), 56 deletions(-) create mode 100644 src/test-runs/dto/testRun.dto.spec.ts diff --git a/src/compare/libs/vlm/README.md b/src/compare/libs/vlm/README.md index fa72431b..e317049d 100644 --- a/src/compare/libs/vlm/README.md +++ b/src/compare/libs/vlm/README.md @@ -57,7 +57,6 @@ Optional custom prompt (replaces default system prompt): | Model | Size | Speed | Accuracy | Best For | |-------|------|-------|----------|----------| | `llava:7b` | 4.7GB | ⚡⚡ | ⭐⭐⭐ | **Recommended** - best balance (minimal) | -| `qwen3-vl:8b` | ~8GB | ⚡⚡ | ⭐⭐⭐ | Minimal model option | | `gemma3:latest` | ~ | ⚡⚡ | ⭐⭐⭐ | Minimal model option | | `llava:13b` | 8GB | ⚡ | ⭐⭐⭐⭐ | Best accuracy | | `moondream` | 1.7GB | ⚡⚡⚡ | ⭐⭐ | Fast, may hallucinate | diff --git a/src/compare/libs/vlm/vlm.service.spec.ts b/src/compare/libs/vlm/vlm.service.spec.ts index c6e54fca..75b31bd9 100644 --- a/src/compare/libs/vlm/vlm.service.spec.ts +++ b/src/compare/libs/vlm/vlm.service.spec.ts @@ -6,7 +6,7 @@ import { NO_BASELINE_RESULT } from '../consts'; import { DEFAULT_CONFIG, VlmService } from './vlm.service'; import { OllamaService } from './ollama.service'; -const initService = async ({ getImageMock = jest.fn(), saveImageMock = jest.fn(), ollamaGenerateMock = jest.fn() }) => { +const initService = async ({ getImageMock = jest.fn(), ollamaGenerateMock = jest.fn() }) => { const module: TestingModule = await Test.createTestingModule({ providers: [ VlmService, @@ -14,7 +14,6 @@ const initService = async ({ getImageMock = jest.fn(), saveImageMock = jest.fn() provide: StaticService, useValue: { getImage: getImageMock, - saveImage: saveImageMock, }, }, { @@ -65,12 +64,11 @@ describe('VlmService', () => { it('should return unresolved when VLM returns identical=false in JSON', async () => { const getImageMock = jest.fn().mockReturnValue(image); - const saveImageMock = jest.fn().mockResolvedValue('diff.png'); const ollamaGenerateMock = jest.fn().mockResolvedValue({ response: '{"identical": false, "description": "Button text changed from Submit to Send."}', done: true, }); - const service = await initService({ getImageMock, saveImageMock, ollamaGenerateMock }); + const service = await initService({ getImageMock, ollamaGenerateMock }); const result = await service.getDiff( { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: true }, @@ -79,9 +77,9 @@ describe('VlmService', () => { expect(result.status).toBe(TestStatus.unresolved); expect(result.vlmDescription).toBe('Button text changed from Submit to Send.'); - expect(result.diffName).toBe('diff.png'); - expect(result.pixelMisMatchCount).toBeDefined(); - expect(result.diffPercent).toBeDefined(); + expect(result.diffName).toBeNull(); + expect(result.pixelMisMatchCount).toBe(0); + expect(result.diffPercent).toBe(0); }); it('should handle invalid JSON response as error', async () => { diff --git a/src/compare/libs/vlm/vlm.service.ts b/src/compare/libs/vlm/vlm.service.ts index b999aae0..278c70e7 100644 --- a/src/compare/libs/vlm/vlm.service.ts +++ b/src/compare/libs/vlm/vlm.service.ts @@ -1,10 +1,8 @@ import { Injectable, Logger } from '@nestjs/common'; import { TestStatus } from '@prisma/client'; -import Pixelmatch from 'pixelmatch'; -import { PNG } from 'pngjs'; import { StaticService } from '../../../static/static.service'; import { DiffResult } from '../../../test-runs/diffResult'; -import { parseConfig, pngToBase64, scaleImageToSize } from '../../utils'; +import { parseConfig, pngToBase64 } from '../../utils'; import { NO_BASELINE_RESULT } from '../consts'; import { ImageComparator } from '../image-comparator.interface'; import { ImageCompareInput } from '../ImageCompareInput'; @@ -23,11 +21,21 @@ CHECK for differences: IGNORE rendering artifacts: anti-aliasing, shadows, 1-2px shifts.`; // Internal constant - not exposed to user config to ensure consistent JSON output -const JSON_FORMAT_INSTRUCTION = ` -Respond with JSON: {"identical": true/false, "description": "explanation"} -- Set "identical": true if screenshots match or have only ignorable artifacts -- Set "identical": false if meaningful differences exist -- Always provide a brief description`; +const JSON_FORMAT_INSTRUCTION = `CRITICAL: You must respond with ONLY valid JSON in this exact format: +{"identical": Boolean, "description": String} + +**JSON Schema Reference:** +The JSON object MUST conform to the following schema: +{ + "identical": , + "description": +} + +**Requirements:** +1. **"identical":** Must be a standard boolean (\`true\` or \`false\`). +2. **"description":** Must be a detailed string explaining the reasoning. + * If identical is \`true\`, the description should be "Screenshots are functionally identical based on all comparison criteria." + * If identical is \`false\`, the description must clearly and concisely list the differences found (e.g., "The user count changed from 12 to 15, and the 'New User' button is missing."). Escape any internal double quotes with \\".`; export const DEFAULT_CONFIG: VlmConfig = { model: 'llava:7b', @@ -75,10 +83,9 @@ export class VlmService implements ImageComparator { result.diffName = null; } else { result.status = TestStatus.unresolved; - const pixelDiff = this.calculatePixelDiff(baseline, image); - result.pixelMisMatchCount = pixelDiff.pixelMisMatchCount; - result.diffPercent = pixelDiff.diffPercent; - result.diffName = data.saveDiffAsFile ? await this.saveDiffImage(baseline, image) : null; + result.pixelMisMatchCount = 0; + result.diffPercent = 0; + result.diffName = null; } } catch (error) { this.logger.error(`VLM comparison failed: ${error.message}`, error.stack); @@ -130,38 +137,4 @@ export class VlmService implements ImageComparator { description: parsed.description || 'No description provided', }; } - - private calculatePixelDiff(baseline: PNG, image: PNG): { pixelMisMatchCount: number; diffPercent: number } { - const maxWidth = Math.max(baseline.width, image.width); - const maxHeight = Math.max(baseline.height, image.height); - const scaledBaseline = scaleImageToSize(baseline, maxWidth, maxHeight); - const scaledImage = scaleImageToSize(image, maxWidth, maxHeight); - - const diff = new PNG({ width: maxWidth, height: maxHeight }); - const pixelMisMatchCount = Pixelmatch(scaledBaseline.data, scaledImage.data, diff.data, maxWidth, maxHeight, { - threshold: 0.1, - includeAA: true, - }); - - const diffPercent = Number(((pixelMisMatchCount * 100) / (maxWidth * maxHeight)).toFixed(2)); - this.logger.debug(`Pixelmatch: ${pixelMisMatchCount} pixels (${diffPercent}%)`); - - return { pixelMisMatchCount, diffPercent }; - } - - private async saveDiffImage(baseline: PNG, image: PNG): Promise { - const maxWidth = Math.max(baseline.width, image.width); - const maxHeight = Math.max(baseline.height, image.height); - const scaledBaseline = scaleImageToSize(baseline, maxWidth, maxHeight); - const scaledImage = scaleImageToSize(image, maxWidth, maxHeight); - - const diff = new PNG({ width: maxWidth, height: maxHeight }); - Pixelmatch(scaledBaseline.data, scaledImage.data, diff.data, maxWidth, maxHeight, { - threshold: 0.1, - includeAA: true, - }); - - const diffBuffer = PNG.sync.write(diff); - return this.staticService.saveImage('diff', diffBuffer); - } } diff --git a/src/test-runs/dto/testRun.dto.spec.ts b/src/test-runs/dto/testRun.dto.spec.ts new file mode 100644 index 00000000..88e21aa2 --- /dev/null +++ b/src/test-runs/dto/testRun.dto.spec.ts @@ -0,0 +1,39 @@ +import { TestRun } from '@prisma/client'; +import { generateTestRun } from '../../_data_'; +import { TestRunDto } from './testRun.dto'; + +describe('TestRunDto', () => { + it('should map all fields correctly including vlmDescription', () => { + const testRun: TestRun = generateTestRun({ + vlmDescription: 'VLM analysis result', + }); + + const result = new TestRunDto(testRun); + + expect(result).toMatchObject({ + id: testRun.id, + buildId: testRun.buildId, + imageName: testRun.imageName, + diffName: testRun.diffName, + diffPercent: testRun.diffPercent, + diffTollerancePercent: testRun.diffTollerancePercent, + status: testRun.status, + testVariationId: testRun.testVariationId, + name: testRun.name, + baselineName: testRun.baselineName, + os: testRun.os, + browser: testRun.browser, + viewport: testRun.viewport, + device: testRun.device, + customTags: testRun.customTags, + ignoreAreas: testRun.ignoreAreas, + tempIgnoreAreas: testRun.tempIgnoreAreas, + comment: testRun.comment, + branchName: testRun.branchName, + baselineBranchName: testRun.baselineBranchName, + merge: testRun.merge, + vlmDescription: testRun.vlmDescription, + }); + }); +}); + diff --git a/src/test-runs/dto/testRun.dto.ts b/src/test-runs/dto/testRun.dto.ts index 748d9e22..4aa53168 100644 --- a/src/test-runs/dto/testRun.dto.ts +++ b/src/test-runs/dto/testRun.dto.ts @@ -44,6 +44,8 @@ export class TestRunDto { baselineBranchName: string; @ApiProperty() merge: boolean; + @ApiPropertyOptional() + vlmDescription?: string; constructor(testRun: TestRun) { this.id = testRun.id; @@ -67,5 +69,6 @@ export class TestRunDto { this.branchName = testRun.branchName; this.baselineBranchName = testRun.baselineBranchName; this.merge = testRun.merge; + this.vlmDescription = testRun.vlmDescription; } } diff --git a/src/test-runs/test-runs.service.spec.ts b/src/test-runs/test-runs.service.spec.ts index ae3c8991..2f2c9d9f 100644 --- a/src/test-runs/test-runs.service.spec.ts +++ b/src/test-runs/test-runs.service.spec.ts @@ -345,6 +345,7 @@ describe('TestRunsService', () => { diffName: null, pixelMisMatchCount: null, diffPercent: null, + vlmDescription: null, }, }); expect(eventTestRunUpdatedMock).toHaveBeenCalledWith(testRun); @@ -357,6 +358,7 @@ describe('TestRunsService', () => { pixelMisMatchCount: 11, diffPercent: 22, isSameDimension: true, + vlmDescription: 'VLM detected significant color differences in the header section', }; const id = 'some id'; const testRunUpdateMock = jest.fn().mockResolvedValueOnce(testRun); @@ -375,6 +377,7 @@ describe('TestRunsService', () => { diffName: diff.diffName, pixelMisMatchCount: diff.pixelMisMatchCount, diffPercent: diff.diffPercent, + vlmDescription: diff.vlmDescription, }, }); expect(eventTestRunUpdatedMock).toHaveBeenCalledWith(testRun); @@ -383,7 +386,9 @@ describe('TestRunsService', () => { it('findMany', async () => { const buildId = 'some id'; - const testRun: TestRun = generateTestRun(); + const testRun: TestRun = generateTestRun({ + vlmDescription: 'VLM analysis completed', + }); const testRunFindManyMock = jest.fn().mockResolvedValueOnce([testRun]); service = await initService({ testRunFindManyMock, diff --git a/src/test-runs/test-runs.service.ts b/src/test-runs/test-runs.service.ts index ce3dddc6..674893c8 100644 --- a/src/test-runs/test-runs.service.ts +++ b/src/test-runs/test-runs.service.ts @@ -173,7 +173,7 @@ export class TestRunsService { pixelMisMatchCount: diffResult && diffResult.pixelMisMatchCount, diffPercent: diffResult && diffResult.diffPercent, status: diffResult ? diffResult.status : TestStatus.new, - vlmDescription: diffResult?.vlmDescription, + vlmDescription: diffResult && diffResult?.vlmDescription, }, }) .then((testRun) => { From 4af87c72b5f547a871c4e5846a6c5448ad965b4e Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Thu, 18 Dec 2025 11:41:49 +0200 Subject: [PATCH 11/13] update --- package-lock.json | 16 ++ package.json | 1 + src/compare/libs/vlm/ollama.controller.ts | 9 +- src/compare/libs/vlm/ollama.service.spec.ts | 197 ++++++++++++++++---- src/compare/libs/vlm/ollama.service.ts | 47 ++--- src/compare/libs/vlm/ollama.types.ts | 36 ---- src/compare/libs/vlm/vlm.service.spec.ts | 86 ++++++++- src/compare/libs/vlm/vlm.service.ts | 40 ++-- src/compare/libs/vlm/vlm.types.ts | 7 + 9 files changed, 312 insertions(+), 127 deletions(-) diff --git a/package-lock.json b/package-lock.json index 7a8a8c91..7b0eaa81 100644 --- a/package-lock.json +++ b/package-lock.json @@ -34,6 +34,7 @@ "ldapts": "^7.1.0", "looks-same": "^9.0.0", "odiff-bin": "^2.6.1", + "ollama": "^0.6.3", "passport": "^0.6.0", "passport-jwt": "^4.0.1", "passport-local": "^1.0.0", @@ -11581,6 +11582,15 @@ "devOptional": true, "license": "MIT" }, + "node_modules/ollama": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/ollama/-/ollama-0.6.3.tgz", + "integrity": "sha512-KEWEhIqE5wtfzEIZbDCLH51VFZ6Z3ZSa6sIOg/E/tBV8S51flyqBOXi+bRxlOYKDf8i327zG9eSTb8IJxvm3Zg==", + "license": "MIT", + "dependencies": { + "whatwg-fetch": "^3.6.20" + } + }, "node_modules/on-finished": { "version": "2.4.1", "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", @@ -14686,6 +14696,12 @@ "url": "https://opencollective.com/webpack" } }, + "node_modules/whatwg-fetch": { + "version": "3.6.20", + "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.6.20.tgz", + "integrity": "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==", + "license": "MIT" + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index d5c743d3..ed2f36ac 100644 --- a/package.json +++ b/package.json @@ -52,6 +52,7 @@ "ldapts": "^7.1.0", "looks-same": "^9.0.0", "odiff-bin": "^2.6.1", + "ollama": "^0.6.3", "passport": "^0.6.0", "passport-jwt": "^4.0.1", "passport-local": "^1.0.0", diff --git a/src/compare/libs/vlm/ollama.controller.ts b/src/compare/libs/vlm/ollama.controller.ts index 3ede8190..1bf0db11 100644 --- a/src/compare/libs/vlm/ollama.controller.ts +++ b/src/compare/libs/vlm/ollama.controller.ts @@ -50,9 +50,14 @@ export class OllamaController { return this.ollamaService.generate({ model, - prompt, + messages: [ + { + role: 'user', + content: prompt, + images: files.map((f) => new Uint8Array(f.buffer)), + }, + ], format: 'json', - images: files.map((f) => f.buffer.toString('base64')), options: { temperature: Number(temperature) }, }); } diff --git a/src/compare/libs/vlm/ollama.service.spec.ts b/src/compare/libs/vlm/ollama.service.spec.ts index aed8e63d..0a56b905 100644 --- a/src/compare/libs/vlm/ollama.service.spec.ts +++ b/src/compare/libs/vlm/ollama.service.spec.ts @@ -2,10 +2,28 @@ import { Test, TestingModule } from '@nestjs/testing'; import { ConfigService } from '@nestjs/config'; import { OllamaService } from './ollama.service'; +// Mock the ollama module +const mockChat = jest.fn(); +const mockList = jest.fn(); + +jest.mock('ollama', () => { + const MockOllama = jest.fn().mockImplementation(() => ({ + chat: mockChat, + list: mockList, + })); + return { + Ollama: MockOllama, + }; +}); + + describe('OllamaService', () => { let service: OllamaService; beforeEach(async () => { + // Reset mocks + jest.clearAllMocks(); + const module: TestingModule = await Test.createTestingModule({ providers: [ OllamaService, @@ -22,39 +40,106 @@ describe('OllamaService', () => { }); describe('generate', () => { - it('should call Ollama API with correct parameters', async () => { - const mockResponse = { response: 'YES', done: true }; - globalThis.fetch = jest.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve(mockResponse), - }); + it('should call Ollama SDK with correct parameters for Uint8Array', async () => { + const mockResponse = { + model: 'llava', + created_at: new Date(), + message: { content: 'YES', role: 'assistant' }, + done: true, + done_reason: 'stop', + total_duration: 1000, + load_duration: 100, + prompt_eval_count: 10, + prompt_eval_duration: 200, + eval_count: 5, + eval_duration: 300, + }; + mockChat.mockResolvedValue(mockResponse); + const testBytes = new Uint8Array([1, 2, 3, 4]); const result = await service.generate({ model: 'llava', - prompt: 'Test prompt', - images: ['base64img'], + messages: [ + { + role: 'user', + content: 'Test prompt', + images: [testBytes], + }, + ], }); - expect(fetch).toHaveBeenCalledWith( - 'http://localhost:11434/api/generate', - expect.objectContaining({ - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - }) - ); - expect(result).toEqual(mockResponse); + expect(mockChat).toHaveBeenCalledWith({ + model: 'llava', + messages: [ + { + role: 'user', + content: 'Test prompt', + images: [testBytes], + }, + ], + stream: false, + format: undefined, + options: undefined, + }); + expect(result.message.content).toBe('YES'); + expect(result.done).toBe(true); }); - it('should throw error when API returns non-ok status', async () => { - globalThis.fetch = jest.fn().mockResolvedValue({ - ok: false, - status: 500, - text: () => Promise.resolve('Internal Server Error'), + it('should call Ollama SDK with correct parameters for base64 strings', async () => { + const mockResponse = { + model: 'llava', + created_at: new Date(), + message: { content: 'YES', role: 'assistant' }, + done: true, + done_reason: 'stop', + total_duration: 1000, + load_duration: 100, + prompt_eval_count: 10, + prompt_eval_duration: 200, + eval_count: 5, + eval_duration: 300, + }; + mockChat.mockResolvedValue(mockResponse); + + // Use a longer base64 string + const longBase64 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='; + const result = await service.generate({ + model: 'llava', + messages: [ + { + role: 'user', + content: 'Test prompt', + images: [longBase64], // base64 string - passed through as-is + }, + ], }); - await expect(service.generate({ model: 'llava', prompt: 'Test' })).rejects.toThrow( - 'Ollama API returned status 500' - ); + expect(mockChat).toHaveBeenCalledWith({ + model: 'llava', + messages: [ + { + role: 'user', + content: 'Test prompt', + images: [longBase64], + }, + ], + stream: false, + format: undefined, + options: undefined, + }); + expect(result.message.content).toBe('YES'); + expect(result.done).toBe(true); + }); + + it('should throw error when SDK call fails', async () => { + mockChat.mockRejectedValue(new Error('Connection refused')); + + await expect( + service.generate({ + model: 'llava', + messages: [{ role: 'user', content: 'Test' }], + }) + ).rejects.toThrow('Connection refused'); }); it('should throw error when OLLAMA_BASE_URL is not configured', async () => { @@ -65,32 +150,68 @@ describe('OllamaService', () => { } as any; const newService = new OllamaService(mockConfigService); - await expect(newService.generate({ model: 'llava', prompt: 'Test' })).rejects.toThrow('OLLAMA_BASE_URL'); + await expect( + newService.generate({ + model: 'llava', + messages: [{ role: 'user', content: 'Test' }], + }) + ).rejects.toThrow('OLLAMA_BASE_URL'); }); }); describe('listModels', () => { it('should return list of models', async () => { - const mockModels = { models: [{ name: 'llava:7b' }, { name: 'moondream' }] }; - globalThis.fetch = jest.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve(mockModels), - }); + const mockDate = new Date('2024-01-01'); + const mockResponse = { + models: [ + { + name: 'llava:7b', + model: 'llava:7b', + size: 1000, + digest: 'abc123', + modified_at: mockDate, + expires_at: mockDate, + size_vram: 500, + details: { + parent_model: '', + format: 'gguf', + family: 'llama', + families: ['llama'], + parameter_size: '7B', + quantization_level: 'Q4_0', + }, + }, + { + name: 'moondream', + model: 'moondream', + size: 2000, + digest: 'def456', + modified_at: mockDate, + expires_at: mockDate, + size_vram: 1000, + details: { + parent_model: '', + format: 'gguf', + family: 'moondream', + families: ['moondream'], + parameter_size: '1.6B', + quantization_level: 'Q4_0', + }, + }, + ], + }; + mockList.mockResolvedValue(mockResponse); const result = await service.listModels(); - expect(fetch).toHaveBeenCalledWith('http://localhost:11434/api/tags'); - expect(result).toEqual(mockModels.models); + expect(mockList).toHaveBeenCalled(); + expect(result).toEqual(mockResponse.models); }); it('should throw error when API fails', async () => { - globalThis.fetch = jest.fn().mockResolvedValue({ - ok: false, - status: 503, - text: () => Promise.resolve('Service Unavailable'), - }); + mockList.mockRejectedValue(new Error('Service Unavailable')); - await expect(service.listModels()).rejects.toThrow('Failed to list models'); + await expect(service.listModels()).rejects.toThrow('Service Unavailable'); }); }); }); diff --git a/src/compare/libs/vlm/ollama.service.ts b/src/compare/libs/vlm/ollama.service.ts index 2d6629d8..7fad0106 100644 --- a/src/compare/libs/vlm/ollama.service.ts +++ b/src/compare/libs/vlm/ollama.service.ts @@ -1,54 +1,43 @@ import { Injectable, Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; -import { OllamaGenerateRequest, OllamaGenerateResponse, OllamaModel, OllamaModelsResponse } from './ollama.types'; +import { Ollama, ChatRequest, ChatResponse, ListResponse, ModelResponse } from 'ollama'; @Injectable() export class OllamaService { private readonly logger: Logger = new Logger(OllamaService.name); - private baseUrl: string | null = null; + private ollamaClient: Ollama | null = null; constructor(private readonly configService: ConfigService) {} - private getBaseUrl(): string { - if (!this.baseUrl) { - this.baseUrl = this.configService.getOrThrow('OLLAMA_BASE_URL'); + private getOllamaClient(): Ollama { + if (!this.ollamaClient) { + const baseUrl = this.configService.getOrThrow('OLLAMA_BASE_URL'); + this.ollamaClient = new Ollama({ host: baseUrl }); } - return this.baseUrl; + return this.ollamaClient; } - async generate(request: OllamaGenerateRequest): Promise { - const baseUrl = this.getBaseUrl(); + async generate(request: ChatRequest): Promise { + const client = this.getOllamaClient(); + try { - const response = await fetch(`${baseUrl}/api/generate`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ ...request, stream: request.stream ?? false }), + const response = await client.chat({ + ...request, + stream: false, }); - if (!response.ok) { - const errorText = await response.text(); - throw new Error(`Ollama API returned status ${response.status}: ${errorText}`); - } - - return await response.json(); + return response; } catch (error) { this.logger.error(`Ollama generate request failed: ${error.message}`); throw error; } } - async listModels(): Promise { - const baseUrl = this.getBaseUrl(); + async listModels(): Promise { + const client = this.getOllamaClient(); try { - const response = await fetch(`${baseUrl}/api/tags`); - - if (!response.ok) { - const errorText = await response.text(); - throw new Error(`Failed to list models: ${response.status} ${errorText}`); - } - - const data: OllamaModelsResponse = await response.json(); - return data.models; + const response: ListResponse = await client.list(); + return response.models; } catch (error) { this.logger.error(`Failed to list models: ${error.message}`); throw error; diff --git a/src/compare/libs/vlm/ollama.types.ts b/src/compare/libs/vlm/ollama.types.ts index 2f90ee78..f4b61053 100644 --- a/src/compare/libs/vlm/ollama.types.ts +++ b/src/compare/libs/vlm/ollama.types.ts @@ -1,40 +1,4 @@ -export interface OllamaGenerateRequest { - model: string; - prompt: string; - images?: string[]; - stream?: boolean; - format?: 'json'; - options?: { - temperature?: number; - top_k?: number; - top_p?: number; - }; -} - export interface VlmComparisonResult { identical: boolean; description: string; } - -export interface OllamaGenerateResponse { - model: string; - created_at: string; - response: string; - thinking?: string; - done: boolean; - context?: number[]; - total_duration?: number; - load_duration?: number; - prompt_eval_count?: number; -} - -export interface OllamaModel { - name: string; - size?: number; - digest?: string; - modified_at?: string; -} - -export interface OllamaModelsResponse { - models: OllamaModel[]; -} diff --git a/src/compare/libs/vlm/vlm.service.spec.ts b/src/compare/libs/vlm/vlm.service.spec.ts index 75b31bd9..7b43083b 100644 --- a/src/compare/libs/vlm/vlm.service.spec.ts +++ b/src/compare/libs/vlm/vlm.service.spec.ts @@ -46,8 +46,17 @@ describe('VlmService', () => { it('should return ok status when VLM returns identical=true in JSON', async () => { const getImageMock = jest.fn().mockReturnValue(image); const ollamaGenerateMock = jest.fn().mockResolvedValue({ - response: '{"identical": true, "description": "Screenshots are visually identical."}', + model: 'llava:7b', + created_at: new Date(), + message: { content: '{"identical": true, "description": "Screenshots are visually identical."}', role: 'assistant' }, done: true, + done_reason: 'stop', + total_duration: 1000, + load_duration: 100, + prompt_eval_count: 10, + prompt_eval_duration: 200, + eval_count: 5, + eval_duration: 300, }); const service = await initService({ getImageMock, ollamaGenerateMock }); @@ -65,8 +74,17 @@ describe('VlmService', () => { it('should return unresolved when VLM returns identical=false in JSON', async () => { const getImageMock = jest.fn().mockReturnValue(image); const ollamaGenerateMock = jest.fn().mockResolvedValue({ - response: '{"identical": false, "description": "Button text changed from Submit to Send."}', + model: 'llava:7b', + created_at: new Date(), + message: { content: '{"identical": false, "description": "Button text changed from Submit to Send."}', role: 'assistant' }, done: true, + done_reason: 'stop', + total_duration: 1000, + load_duration: 100, + prompt_eval_count: 10, + prompt_eval_duration: 200, + eval_count: 5, + eval_duration: 300, }); const service = await initService({ getImageMock, ollamaGenerateMock }); @@ -85,8 +103,17 @@ describe('VlmService', () => { it('should handle invalid JSON response as error', async () => { const getImageMock = jest.fn().mockReturnValue(image); const ollamaGenerateMock = jest.fn().mockResolvedValue({ - response: 'Invalid JSON response from model', + model: 'llava:7b', + created_at: new Date(), + message: { content: 'Invalid JSON response from model', role: 'assistant' }, done: true, + done_reason: 'stop', + total_duration: 1000, + load_duration: 100, + prompt_eval_count: 10, + prompt_eval_duration: 200, + eval_count: 5, + eval_duration: 300, }); const service = await initService({ getImageMock, ollamaGenerateMock }); @@ -102,8 +129,17 @@ describe('VlmService', () => { it('should use custom model and temperature from config', async () => { const getImageMock = jest.fn().mockReturnValue(image); const ollamaGenerateMock = jest.fn().mockResolvedValue({ - response: '{"identical": true, "description": "No differences."}', + model: 'llava:13b', + created_at: new Date(), + message: { content: '{"identical": true, "description": "No differences."}', role: 'assistant' }, done: true, + done_reason: 'stop', + total_duration: 1000, + load_duration: 100, + prompt_eval_count: 10, + prompt_eval_duration: 200, + eval_count: 5, + eval_duration: 300, }); const service = await initService({ getImageMock, ollamaGenerateMock }); @@ -114,8 +150,13 @@ describe('VlmService', () => { expect(ollamaGenerateMock).toHaveBeenCalledWith({ model: 'llava:13b', - prompt: expect.stringContaining('Custom context'), - images: expect.any(Array), + messages: [ + { + role: 'user', + content: expect.stringContaining('Custom context'), + images: expect.any(Array), + }, + ], format: 'json', options: { temperature: 0.2 }, }); @@ -137,17 +178,48 @@ describe('VlmService', () => { expect(result.diffName).toBeNull(); }); + it('should use thinking field when useThinking is true', async () => { + const getImageMock = jest.fn().mockReturnValue(image); + const ollamaGenerateMock = jest.fn().mockResolvedValue({ + model: 'llava:7b', + created_at: new Date(), + message: { + content: '{"identical": false, "description": "Content field"}', + thinking: '{"identical": true, "description": "Thinking field"}', + role: 'assistant', + }, + done: true, + done_reason: 'stop', + total_duration: 1000, + load_duration: 100, + prompt_eval_count: 10, + prompt_eval_duration: 200, + eval_count: 5, + eval_duration: 300, + }); + const service = await initService({ getImageMock, ollamaGenerateMock }); + + const result = await service.getDiff( + { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, + { ...DEFAULT_CONFIG, useThinking: true } + ); + + expect(result.status).toBe(TestStatus.ok); + expect(result.vlmDescription).toBe('Thinking field'); + }); + it.each([ ['empty string', '', DEFAULT_CONFIG], ['invalid JSON', 'invalid', DEFAULT_CONFIG], ['partial config', '{"model":"llava:7b"}', { model: 'llava:7b' }], [ 'full config', - '{"model":"llava:13b","prompt":"Custom prompt","temperature":0.2}', + '{"model":"llava:13b","prompt":"Custom prompt","temperature":0.2,"useThinking":true}', { model: 'llava:13b', prompt: 'Custom prompt', temperature: 0.2, + useThinking: true, }, ], ])('should parse config: %s', async (_, configJson, expected) => { diff --git a/src/compare/libs/vlm/vlm.service.ts b/src/compare/libs/vlm/vlm.service.ts index 278c70e7..bb7dd434 100644 --- a/src/compare/libs/vlm/vlm.service.ts +++ b/src/compare/libs/vlm/vlm.service.ts @@ -2,23 +2,25 @@ import { Injectable, Logger } from '@nestjs/common'; import { TestStatus } from '@prisma/client'; import { StaticService } from '../../../static/static.service'; import { DiffResult } from '../../../test-runs/diffResult'; -import { parseConfig, pngToBase64 } from '../../utils'; +import { parseConfig } from '../../utils'; import { NO_BASELINE_RESULT } from '../consts'; import { ImageComparator } from '../image-comparator.interface'; import { ImageCompareInput } from '../ImageCompareInput'; import { VlmConfig } from './vlm.types'; import { OllamaService } from './ollama.service'; import { VlmComparisonResult } from './ollama.types'; +import { PNG } from 'pngjs'; export const SYSTEM_PROMPT = `Compare two UI screenshots for visual regression testing. -CHECK for differences: -- Data: text, numbers, counts, values -- Elements: missing, added, or moved components -- State: selected, disabled, expanded, checked -- Structure: row/column count, list items, tabs +CRITICAL: Your primary goal is to detect ANY differences that would be immediately noticeable to a human eye when viewing these screenshots side-by-side. -IGNORE rendering artifacts: anti-aliasing, shadows, 1-2px shifts.`; +MANDATORY CHECKS - You MUST examine and report differences in: + - Does one screenshot show data/content differently? + - Text content + - Missing, added, or moved UI components + +IGNORE ONLY: Minor rendering artifacts imperceptible to human eye (anti-aliasing, subtle shadows, 1-2px shifts that don't affect content visibility or functionality).`; // Internal constant - not exposed to user config to ensure consistent JSON output const JSON_FORMAT_INSTRUCTION = `CRITICAL: You must respond with ONLY valid JSON in this exact format: @@ -41,6 +43,7 @@ export const DEFAULT_CONFIG: VlmConfig = { model: 'llava:7b', prompt: SYSTEM_PROMPT, temperature: 0.1, + useThinking: false, }; @Injectable() @@ -71,9 +74,9 @@ export class VlmService implements ImageComparator { result.isSameDimension = baseline.width === image.width && baseline.height === image.height; try { - const baselineBase64 = pngToBase64(baseline); - const imageBase64 = pngToBase64(image); - const { pass, description } = await this.compareImagesWithVLM(baselineBase64, imageBase64, config); + const baselineBytes = new Uint8Array(PNG.sync.write(baseline)); + const imageBytes = new Uint8Array(PNG.sync.write(image)); + const { pass, description } = await this.compareImagesWithVLM(baselineBytes, imageBytes, config); result.vlmDescription = description; if (pass) { @@ -100,14 +103,19 @@ export class VlmService implements ImageComparator { } private async compareImagesWithVLM( - baselineBase64: string, - imageBase64: string, + baselineBytes: Uint8Array, + imageBytes: Uint8Array, config: VlmConfig ): Promise<{ pass: boolean; description: string }> { const data = await this.ollamaService.generate({ model: config.model, - prompt: `${config.prompt}\n${JSON_FORMAT_INSTRUCTION}`, - images: [baselineBase64, imageBase64], + messages: [ + { + role: 'user', + content: `${config.prompt}\n${JSON_FORMAT_INSTRUCTION}`, + images: [baselineBytes, imageBytes], + }, + ], format: 'json', options: { temperature: config.temperature, @@ -115,7 +123,9 @@ export class VlmService implements ImageComparator { }); // Some models return result in thinking field instead of response - const content = data.response || data.thinking; + const preferred = config.useThinking ? data.message.thinking : data.message.content; + const fallback = config.useThinking ? data.message.content : data.message.thinking; + const content = preferred || fallback; this.logger.debug(`VLM Response: ${content}`); if (!content) { diff --git a/src/compare/libs/vlm/vlm.types.ts b/src/compare/libs/vlm/vlm.types.ts index 318cf43b..02e99357 100644 --- a/src/compare/libs/vlm/vlm.types.ts +++ b/src/compare/libs/vlm/vlm.types.ts @@ -16,4 +16,11 @@ export interface VlmConfig { * @default 0.1 */ temperature: number; + + /** + * Whether to prefer thinking field over content field for response. + * Some models return result in thinking field instead of response. + * @default false + */ + useThinking?: boolean; } From 7497e58d4c5fb260c1c622b6acc738ac7d40b33b Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Thu, 18 Dec 2025 18:31:17 +0200 Subject: [PATCH 12/13] test --- package-lock.json | 12 +- package.json | 3 +- src/compare/libs/vlm/README.md | 67 +++--- src/compare/libs/vlm/ollama.service.spec.ts | 4 +- src/compare/libs/vlm/vlm.service.spec.ts | 229 ++++++++++++++++---- src/compare/libs/vlm/vlm.service.ts | 131 +++++------ src/compare/libs/vlm/vlm.types.ts | 2 +- src/test-runs/dto/testRun.dto.spec.ts | 1 - 8 files changed, 311 insertions(+), 138 deletions(-) diff --git a/package-lock.json b/package-lock.json index 7b0eaa81..7d9360dc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -46,7 +46,8 @@ "rimraf": "^5.0.1", "rxjs": "^7.8.2", "swagger-ui-express": "^4.6.3", - "uuid-apikey": "^1.5.3" + "uuid-apikey": "^1.5.3", + "zod": "^4.2.1" }, "devDependencies": { "@darraghor/eslint-plugin-nestjs-typed": "^6.9.3", @@ -14979,6 +14980,15 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zod": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.2.1.tgz", + "integrity": "sha512-0wZ1IRqGGhMP76gLqz8EyfBXKk0J2qo2+H3fi4mcUP/KtTocoX08nmIAHl1Z2kJIZbZee8KOpBCSNPRgauucjw==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } } } } diff --git a/package.json b/package.json index ed2f36ac..e5b1cbe9 100644 --- a/package.json +++ b/package.json @@ -64,7 +64,8 @@ "rimraf": "^5.0.1", "rxjs": "^7.8.2", "swagger-ui-express": "^4.6.3", - "uuid-apikey": "^1.5.3" + "uuid-apikey": "^1.5.3", + "zod": "^4.2.1" }, "devDependencies": { "@darraghor/eslint-plugin-nestjs-typed": "^6.9.3", diff --git a/src/compare/libs/vlm/README.md b/src/compare/libs/vlm/README.md index e317049d..f0a1b19a 100644 --- a/src/compare/libs/vlm/README.md +++ b/src/compare/libs/vlm/README.md @@ -1,6 +1,30 @@ # VLM (Vision Language Model) Image Comparison -AI-powered semantic image comparison using Vision Language Models via Ollama. +Hybrid image comparison combining pixelmatch for objective difference detection and Vision Language Models (via Ollama) for human-noticeability analysis. + +## Architecture Flow + +```text +VLM Comparison Request + │ + ▼ +Run Pixelmatch Comparison + │ + ├─→ No Differences Found → Return OK Status + │ + └─→ Differences Found + │ + ▼ + Save Diff Image + │ + ▼ + Run VLM with 3 Images: + (Baseline, Comparison, Diff) + │ + ├─→ Not Noticeable → Override: Return OK Status + │ + └─→ Noticeable → Return Unresolved with VLM Description +``` ## Quick Start @@ -18,10 +42,9 @@ ollama serve ```bash # Recommended for accuracy -ollama pull llava:7b +ollama pull gemma3:12b -# Or for speed (smaller, less accurate) -ollama pull moondream +# Note: Smaller models do not show proper results - use gemma3:12b only ``` ### 3. Configure Backend @@ -36,7 +59,7 @@ OLLAMA_BASE_URL=http://localhost:11434 Set project's image comparison to `vlm` with config: ```json { - "model": "llava:7b", + "model": "gemma3:12b", "temperature": 0.1 } ``` @@ -44,46 +67,30 @@ Set project's image comparison to `vlm` with config: Optional custom prompt (replaces default system prompt): ```json { - "model": "llava:7b", + "model": "gemma3:12b", "prompt": "Focus on button colors and text changes", "temperature": 0.1 } ``` -**Note:** The `prompt` field replaces the entire system prompt. If omitted, a default system prompt is used that focuses on semantic differences while ignoring rendering artifacts. +**Note:** The `prompt` field replaces the entire system prompt. If omitted, a default system prompt is used that analyzes the diff image to determine if highlighted differences are noticeable to humans. ## Recommended Models -| Model | Size | Speed | Accuracy | Best For | -|-------|------|-------|----------|----------| -| `llava:7b` | 4.7GB | ⚡⚡ | ⭐⭐⭐ | **Recommended** - best balance (minimal) | -| `gemma3:latest` | ~ | ⚡⚡ | ⭐⭐⭐ | Minimal model option | -| `llava:13b` | 8GB | ⚡ | ⭐⭐⭐⭐ | Best accuracy | -| `moondream` | 1.7GB | ⚡⚡⚡ | ⭐⭐ | Fast, may hallucinate | -| `minicpm-v` | 5.5GB | ⚡⚡ | ⭐⭐⭐ | Good alternative | +| Model | Size | +|-------|------| +| `gemma3:12b` | ~12GB - **Recommended** | + +**Note:** Models smaller than the default (`gemma3:12b`) have been tested and do not show proper results. They fail to follow structured output formats reliably and may produce incorrect or inconsistent responses. For production use, only use `gemma3:12b` or `llava:13b`. ## Configuration | Option | Type | Default | Description | |--------|------|---------|-------------| -| `model` | string | `llava:7b` | Ollama vision model name | +| `model` | string | `gemma3:12b` | Ollama vision model name | | `prompt` | string | System prompt (see below) | Custom prompt for image comparison | | `temperature` | number | `0.1` | Lower = more consistent results (0.0-1.0) | -## How It Works - -1. VLM analyzes both images semantically -2. Returns JSON with `{"identical": true/false, "description": "..."}` -3. `identical: true` = images match (pass), `identical: false` = differences found (fail) -4. Ignores technical differences (anti-aliasing, shadows, 1-2px shifts) -5. Provides description of differences found - -### Default System Prompt - -The default prompt instructs the model to: -- **CHECK** for: data changes, missing/added elements, state changes, structural differences -- **IGNORE**: rendering artifacts, anti-aliasing, shadows, minor pixel shifts - ## API Endpoints ```bash @@ -91,5 +98,5 @@ The default prompt instructs the model to: GET /ollama/models # Compare two images (for testing) -POST /ollama/compare?model=llava:7b&prompt=&temperature=0.1 +POST /ollama/compare?model=gemma3:12b&prompt=&temperature=0.1 ``` diff --git a/src/compare/libs/vlm/ollama.service.spec.ts b/src/compare/libs/vlm/ollama.service.spec.ts index 0a56b905..f128839e 100644 --- a/src/compare/libs/vlm/ollama.service.spec.ts +++ b/src/compare/libs/vlm/ollama.service.spec.ts @@ -16,7 +16,6 @@ jest.mock('ollama', () => { }; }); - describe('OllamaService', () => { let service: OllamaService; @@ -102,7 +101,8 @@ describe('OllamaService', () => { mockChat.mockResolvedValue(mockResponse); // Use a longer base64 string - const longBase64 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='; + const longBase64 = + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='; const result = await service.generate({ model: 'llava', messages: [ diff --git a/src/compare/libs/vlm/vlm.service.spec.ts b/src/compare/libs/vlm/vlm.service.spec.ts index 7b43083b..375842eb 100644 --- a/src/compare/libs/vlm/vlm.service.spec.ts +++ b/src/compare/libs/vlm/vlm.service.spec.ts @@ -2,11 +2,17 @@ import { Test, TestingModule } from '@nestjs/testing'; import { TestStatus } from '@prisma/client'; import { PNG } from 'pngjs'; import { StaticService } from '../../../static/static.service'; -import { NO_BASELINE_RESULT } from '../consts'; +import { NO_BASELINE_RESULT, EQUAL_RESULT } from '../consts'; import { DEFAULT_CONFIG, VlmService } from './vlm.service'; import { OllamaService } from './ollama.service'; +import { PixelmatchService } from '../pixelmatch/pixelmatch.service'; +import { DiffResult } from '../../../test-runs/diffResult'; -const initService = async ({ getImageMock = jest.fn(), ollamaGenerateMock = jest.fn() }) => { +const initService = async ({ + getImageMock = jest.fn(), + ollamaGenerateMock = jest.fn(), + pixelmatchGetDiffMock = jest.fn(), +}) => { const module: TestingModule = await Test.createTestingModule({ providers: [ VlmService, @@ -22,6 +28,12 @@ const initService = async ({ getImageMock = jest.fn(), ollamaGenerateMock = jest generate: ollamaGenerateMock, }, }, + { + provide: PixelmatchService, + useValue: { + getDiff: pixelmatchGetDiffMock, + }, + }, ], }).compile(); @@ -30,10 +42,11 @@ const initService = async ({ getImageMock = jest.fn(), ollamaGenerateMock = jest describe('VlmService', () => { const image = new PNG({ width: 20, height: 20 }); + const diffImage = new PNG({ width: 20, height: 20 }); - it('should return NO_BASELINE_RESULT when baseline is missing', async () => { - const getImageMock = jest.fn().mockReturnValueOnce(undefined).mockReturnValueOnce(image); - const service = await initService({ getImageMock }); + it('should return NO_BASELINE_RESULT when pixelmatch returns no baseline', async () => { + const pixelmatchGetDiffMock = jest.fn().mockResolvedValue(NO_BASELINE_RESULT); + const service = await initService({ pixelmatchGetDiffMock }); const result = await service.getDiff( { baseline: null, image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: true }, @@ -41,14 +54,53 @@ describe('VlmService', () => { ); expect(result).toStrictEqual(NO_BASELINE_RESULT); + expect(pixelmatchGetDiffMock).toHaveBeenCalled(); }); - it('should return ok status when VLM returns identical=true in JSON', async () => { - const getImageMock = jest.fn().mockReturnValue(image); + it('should return OK immediately when pixelmatch finds no differences (VLM not called)', async () => { + const pixelmatchResult: DiffResult = { + ...EQUAL_RESULT, + status: TestStatus.ok, + pixelMisMatchCount: 0, + diffPercent: 0, + }; + const pixelmatchGetDiffMock = jest.fn().mockResolvedValue(pixelmatchResult); + const ollamaGenerateMock = jest.fn(); + const service = await initService({ pixelmatchGetDiffMock, ollamaGenerateMock }); + + const result = await service.getDiff( + { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, + DEFAULT_CONFIG + ); + + expect(result.status).toBe(TestStatus.ok); + expect(result.pixelMisMatchCount).toBe(0); + expect(result.diffPercent).toBe(0); + expect(ollamaGenerateMock).not.toHaveBeenCalled(); // VLM should not be called + }); + + it('should override to OK when pixelmatch finds differences but VLM says not noticeable', async () => { + const pixelmatchResult: DiffResult = { + status: TestStatus.unresolved, + diffName: 'diff.png', + pixelMisMatchCount: 100, + diffPercent: 2.5, + isSameDimension: true, + }; + const pixelmatchGetDiffMock = jest.fn().mockResolvedValue(pixelmatchResult); + const getImageMock = jest + .fn() + .mockReturnValueOnce(image) // baseline + .mockReturnValueOnce(image) // comparison + .mockReturnValueOnce(diffImage); // diff const ollamaGenerateMock = jest.fn().mockResolvedValue({ model: 'llava:7b', created_at: new Date(), - message: { content: '{"identical": true, "description": "Screenshots are visually identical."}', role: 'assistant' }, + message: { + content: + '{"identical": true, "description": "Differences are minor rendering artifacts, not noticeable to humans."}', + role: 'assistant', + }, done: true, done_reason: 'stop', total_duration: 1000, @@ -58,25 +110,51 @@ describe('VlmService', () => { eval_count: 5, eval_duration: 300, }); - const service = await initService({ getImageMock, ollamaGenerateMock }); + const service = await initService({ getImageMock, ollamaGenerateMock, pixelmatchGetDiffMock }); const result = await service.getDiff( { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, DEFAULT_CONFIG ); - expect(result.status).toBe(TestStatus.ok); - expect(result.vlmDescription).toBe('Screenshots are visually identical.'); - expect(result.pixelMisMatchCount).toBe(0); - expect(result.diffPercent).toBe(0); + expect(result.status).toBe(TestStatus.ok); // Overridden by VLM + expect(result.vlmDescription).toBe('Differences are minor rendering artifacts, not noticeable to humans.'); + expect(result.pixelMisMatchCount).toBe(100); // Preserved from pixelmatch + expect(result.diffPercent).toBe(2.5); // Preserved from pixelmatch + expect(result.diffName).toBe('diff.png'); // Preserved from pixelmatch + expect(ollamaGenerateMock).toHaveBeenCalledWith( + expect.objectContaining({ + messages: [ + expect.objectContaining({ + images: expect.arrayContaining([expect.any(Uint8Array), expect.any(Uint8Array), expect.any(Uint8Array)]), + }), + ], + }) + ); }); - it('should return unresolved when VLM returns identical=false in JSON', async () => { - const getImageMock = jest.fn().mockReturnValue(image); + it('should keep unresolved when pixelmatch finds differences and VLM confirms noticeable', async () => { + const pixelmatchResult: DiffResult = { + status: TestStatus.unresolved, + diffName: 'diff.png', + pixelMisMatchCount: 500, + diffPercent: 12.5, + isSameDimension: true, + }; + const pixelmatchGetDiffMock = jest.fn().mockResolvedValue(pixelmatchResult); + const getImageMock = jest + .fn() + .mockReturnValueOnce(image) // baseline + .mockReturnValueOnce(image) // comparison + .mockReturnValueOnce(diffImage); // diff const ollamaGenerateMock = jest.fn().mockResolvedValue({ model: 'llava:7b', created_at: new Date(), - message: { content: '{"identical": false, "description": "Button text changed from Submit to Send."}', role: 'assistant' }, + message: { + content: + '{"identical": false, "description": "Button text changed from Submit to Send, and user count changed from 12 to 15."}', + role: 'assistant', + }, done: true, done_reason: 'stop', total_duration: 1000, @@ -86,22 +164,36 @@ describe('VlmService', () => { eval_count: 5, eval_duration: 300, }); - const service = await initService({ getImageMock, ollamaGenerateMock }); + const service = await initService({ getImageMock, ollamaGenerateMock, pixelmatchGetDiffMock }); const result = await service.getDiff( { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: true }, DEFAULT_CONFIG ); - expect(result.status).toBe(TestStatus.unresolved); - expect(result.vlmDescription).toBe('Button text changed from Submit to Send.'); - expect(result.diffName).toBeNull(); - expect(result.pixelMisMatchCount).toBe(0); - expect(result.diffPercent).toBe(0); + expect(result.status).toBe(TestStatus.unresolved); // Kept as unresolved + expect(result.vlmDescription).toBe( + 'Button text changed from Submit to Send, and user count changed from 12 to 15.' + ); + expect(result.pixelMisMatchCount).toBe(500); // Preserved from pixelmatch + expect(result.diffPercent).toBe(12.5); // Preserved from pixelmatch + expect(result.diffName).toBe('diff.png'); // Preserved from pixelmatch }); - it('should handle invalid JSON response as error', async () => { - const getImageMock = jest.fn().mockReturnValue(image); + it('should handle invalid JSON response as error and return pixelmatch result', async () => { + const pixelmatchResult: DiffResult = { + status: TestStatus.unresolved, + diffName: 'diff.png', + pixelMisMatchCount: 200, + diffPercent: 5.0, + isSameDimension: true, + }; + const pixelmatchGetDiffMock = jest.fn().mockResolvedValue(pixelmatchResult); + const getImageMock = jest + .fn() + .mockReturnValueOnce(image) // baseline + .mockReturnValueOnce(image) // comparison + .mockReturnValueOnce(diffImage); // diff const ollamaGenerateMock = jest.fn().mockResolvedValue({ model: 'llava:7b', created_at: new Date(), @@ -115,19 +207,33 @@ describe('VlmService', () => { eval_count: 5, eval_duration: 300, }); - const service = await initService({ getImageMock, ollamaGenerateMock }); + const service = await initService({ getImageMock, ollamaGenerateMock, pixelmatchGetDiffMock }); const result = await service.getDiff( { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, DEFAULT_CONFIG ); - expect(result.status).toBe(TestStatus.unresolved); + expect(result.status).toBe(TestStatus.unresolved); // From pixelmatch expect(result.vlmDescription).toContain('VLM analysis failed'); + expect(result.pixelMisMatchCount).toBe(200); // Preserved from pixelmatch + expect(result.diffPercent).toBe(5.0); // Preserved from pixelmatch }); it('should use custom model and temperature from config', async () => { - const getImageMock = jest.fn().mockReturnValue(image); + const pixelmatchResult: DiffResult = { + status: TestStatus.unresolved, + diffName: 'diff.png', + pixelMisMatchCount: 150, + diffPercent: 3.75, + isSameDimension: true, + }; + const pixelmatchGetDiffMock = jest.fn().mockResolvedValue(pixelmatchResult); + const getImageMock = jest + .fn() + .mockReturnValueOnce(image) // baseline + .mockReturnValueOnce(image) // comparison + .mockReturnValueOnce(diffImage); // diff const ollamaGenerateMock = jest.fn().mockResolvedValue({ model: 'llava:13b', created_at: new Date(), @@ -141,7 +247,7 @@ describe('VlmService', () => { eval_count: 5, eval_duration: 300, }); - const service = await initService({ getImageMock, ollamaGenerateMock }); + const service = await initService({ getImageMock, ollamaGenerateMock, pixelmatchGetDiffMock }); await service.getDiff( { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, @@ -154,7 +260,7 @@ describe('VlmService', () => { { role: 'user', content: expect.stringContaining('Custom context'), - images: expect.any(Array), + images: expect.arrayContaining([expect.any(Uint8Array), expect.any(Uint8Array), expect.any(Uint8Array)]), }, ], format: 'json', @@ -162,24 +268,49 @@ describe('VlmService', () => { }); }); - it('should handle API errors gracefully', async () => { - const getImageMock = jest.fn().mockReturnValue(image); + it('should handle API errors gracefully and return pixelmatch result', async () => { + const pixelmatchResult: DiffResult = { + status: TestStatus.unresolved, + diffName: 'diff.png', + pixelMisMatchCount: 300, + diffPercent: 7.5, + isSameDimension: true, + }; + const pixelmatchGetDiffMock = jest.fn().mockResolvedValue(pixelmatchResult); + const getImageMock = jest + .fn() + .mockReturnValueOnce(image) // baseline + .mockReturnValueOnce(image) // comparison + .mockReturnValueOnce(diffImage); // diff const ollamaGenerateMock = jest.fn().mockRejectedValue(new Error('Connection refused')); - const service = await initService({ getImageMock, ollamaGenerateMock }); + const service = await initService({ getImageMock, ollamaGenerateMock, pixelmatchGetDiffMock }); const result = await service.getDiff( { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, DEFAULT_CONFIG ); - expect(result.status).toBe(TestStatus.unresolved); + expect(result.status).toBe(TestStatus.unresolved); // From pixelmatch expect(result.vlmDescription).toContain('VLM analysis failed'); - expect(result.pixelMisMatchCount).toBe(0); - expect(result.diffName).toBeNull(); + expect(result.pixelMisMatchCount).toBe(300); // Preserved from pixelmatch + expect(result.diffPercent).toBe(7.5); // Preserved from pixelmatch + expect(result.diffName).toBe('diff.png'); // Preserved from pixelmatch }); it('should use thinking field when useThinking is true', async () => { - const getImageMock = jest.fn().mockReturnValue(image); + const pixelmatchResult: DiffResult = { + status: TestStatus.unresolved, + diffName: 'diff.png', + pixelMisMatchCount: 80, + diffPercent: 2.0, + isSameDimension: true, + }; + const pixelmatchGetDiffMock = jest.fn().mockResolvedValue(pixelmatchResult); + const getImageMock = jest + .fn() + .mockReturnValueOnce(image) // baseline + .mockReturnValueOnce(image) // comparison + .mockReturnValueOnce(diffImage); // diff const ollamaGenerateMock = jest.fn().mockResolvedValue({ model: 'llava:7b', created_at: new Date(), @@ -197,17 +328,39 @@ describe('VlmService', () => { eval_count: 5, eval_duration: 300, }); - const service = await initService({ getImageMock, ollamaGenerateMock }); + const service = await initService({ getImageMock, ollamaGenerateMock, pixelmatchGetDiffMock }); const result = await service.getDiff( { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, { ...DEFAULT_CONFIG, useThinking: true } ); - expect(result.status).toBe(TestStatus.ok); + expect(result.status).toBe(TestStatus.ok); // Overridden by VLM expect(result.vlmDescription).toBe('Thinking field'); }); + it('should handle missing diff image gracefully', async () => { + const pixelmatchResult: DiffResult = { + status: TestStatus.unresolved, + diffName: null, // No diff saved + pixelMisMatchCount: 100, + diffPercent: 2.5, + isSameDimension: true, + }; + const pixelmatchGetDiffMock = jest.fn().mockResolvedValue(pixelmatchResult); + const getImageMock = jest.fn().mockReturnValueOnce(image).mockReturnValueOnce(image).mockReturnValueOnce(null); // diff image missing + const ollamaGenerateMock = jest.fn(); + const service = await initService({ getImageMock, ollamaGenerateMock, pixelmatchGetDiffMock }); + + const result = await service.getDiff( + { baseline: 'baseline', image: 'image', diffTollerancePercent: 0.1, ignoreAreas: [], saveDiffAsFile: false }, + DEFAULT_CONFIG + ); + + expect(result).toEqual(pixelmatchResult); // Should return pixelmatch result as-is + expect(ollamaGenerateMock).not.toHaveBeenCalled(); // VLM should not be called + }); + it.each([ ['empty string', '', DEFAULT_CONFIG], ['invalid JSON', 'invalid', DEFAULT_CONFIG], diff --git a/src/compare/libs/vlm/vlm.service.ts b/src/compare/libs/vlm/vlm.service.ts index bb7dd434..0d93257b 100644 --- a/src/compare/libs/vlm/vlm.service.ts +++ b/src/compare/libs/vlm/vlm.service.ts @@ -3,45 +3,34 @@ import { TestStatus } from '@prisma/client'; import { StaticService } from '../../../static/static.service'; import { DiffResult } from '../../../test-runs/diffResult'; import { parseConfig } from '../../utils'; -import { NO_BASELINE_RESULT } from '../consts'; import { ImageComparator } from '../image-comparator.interface'; import { ImageCompareInput } from '../ImageCompareInput'; import { VlmConfig } from './vlm.types'; import { OllamaService } from './ollama.service'; -import { VlmComparisonResult } from './ollama.types'; +import { PixelmatchService, DEFAULT_CONFIG as PIXELMATCH_DEFAULT_CONFIG } from '../pixelmatch/pixelmatch.service'; import { PNG } from 'pngjs'; +import { z } from 'zod'; -export const SYSTEM_PROMPT = `Compare two UI screenshots for visual regression testing. +export const DEFAULT_PROMPT = `You are provided with three images: +1. First image: baseline screenshot +2. Second image: new version screenshot +3. Diff image -CRITICAL: Your primary goal is to detect ANY differences that would be immediately noticeable to a human eye when viewing these screenshots side-by-side. +Spot any difference in text, color, shape and position of elements treat as different event slight change +Ignore minor rendering artifacts that are imperceptible to users like antialliasing +Describe the difference like 100 words`; -MANDATORY CHECKS - You MUST examine and report differences in: - - Does one screenshot show data/content differently? - - Text content - - Missing, added, or moved UI components - -IGNORE ONLY: Minor rendering artifacts imperceptible to human eye (anti-aliasing, subtle shadows, 1-2px shifts that don't affect content visibility or functionality).`; - -// Internal constant - not exposed to user config to ensure consistent JSON output -const JSON_FORMAT_INSTRUCTION = `CRITICAL: You must respond with ONLY valid JSON in this exact format: -{"identical": Boolean, "description": String} - -**JSON Schema Reference:** -The JSON object MUST conform to the following schema: -{ - "identical": , - "description": -} - -**Requirements:** -1. **"identical":** Must be a standard boolean (\`true\` or \`false\`). -2. **"description":** Must be a detailed string explaining the reasoning. - * If identical is \`true\`, the description should be "Screenshots are functionally identical based on all comparison criteria." - * If identical is \`false\`, the description must clearly and concisely list the differences found (e.g., "The user count changed from 12 to 15, and the 'New User' button is missing."). Escape any internal double quotes with \\".`; +const VlmComparisonResultSchema: z.ZodObject<{ + identical: z.ZodBoolean; + description: z.ZodString; +}> = z.object({ + identical: z.boolean(), + description: z.string(), +}); export const DEFAULT_CONFIG: VlmConfig = { - model: 'llava:7b', - prompt: SYSTEM_PROMPT, + model: 'gemma3:12b', + prompt: DEFAULT_PROMPT, temperature: 0.1, useThinking: false, }; @@ -52,7 +41,8 @@ export class VlmService implements ImageComparator { constructor( private readonly staticService: StaticService, - private readonly ollamaService: OllamaService + private readonly ollamaService: OllamaService, + private readonly pixelmatchService: PixelmatchService ) {} parseConfig(configJson: string): VlmConfig { @@ -60,51 +50,65 @@ export class VlmService implements ImageComparator { } async getDiff(data: ImageCompareInput, config: VlmConfig): Promise { - const result: DiffResult = { - ...NO_BASELINE_RESULT, - }; - - const baseline = await this.staticService.getImage(data.baseline); - const image = await this.staticService.getImage(data.image); + const pixelmatchResult = await this.pixelmatchService.getDiff( + { + ...data, + saveDiffAsFile: true, + }, + PIXELMATCH_DEFAULT_CONFIG + ); - if (!baseline || !image) { - return NO_BASELINE_RESULT; + if (pixelmatchResult.status === TestStatus.new) { + return pixelmatchResult; } - result.isSameDimension = baseline.width === image.width && baseline.height === image.height; + if (pixelmatchResult.status === TestStatus.ok) { + return pixelmatchResult; + } + this.logger.debug('Pixel diff is being sent to VLM'); try { + const baseline = await this.staticService.getImage(data.baseline); + const image = await this.staticService.getImage(data.image); + const diffImage = pixelmatchResult.diffName ? await this.staticService.getImage(pixelmatchResult.diffName) : null; + + if (!baseline || !image || !diffImage) { + this.logger.warn('Missing images for VLM analysis, returning pixelmatch result'); + return pixelmatchResult; + } + const baselineBytes = new Uint8Array(PNG.sync.write(baseline)); const imageBytes = new Uint8Array(PNG.sync.write(image)); - const { pass, description } = await this.compareImagesWithVLM(baselineBytes, imageBytes, config); - result.vlmDescription = description; + const diffBytes = new Uint8Array(PNG.sync.write(diffImage)); + + const { pass, description } = await this.compareImagesWithVLM(baselineBytes, imageBytes, diffBytes, config); + + // Build result from pixelmatch, but override status based on VLM analysis + const result: DiffResult = { + ...pixelmatchResult, + vlmDescription: description, + }; if (pass) { result.status = TestStatus.ok; - result.pixelMisMatchCount = 0; - result.diffPercent = 0; - result.diffName = null; } else { result.status = TestStatus.unresolved; - result.pixelMisMatchCount = 0; - result.diffPercent = 0; - result.diffName = null; } + + return result; } catch (error) { this.logger.error(`VLM comparison failed: ${error.message}`, error.stack); - result.status = TestStatus.unresolved; - result.pixelMisMatchCount = 0; - result.diffPercent = 0; - result.diffName = null; - result.vlmDescription = `VLM analysis failed: ${error.message}`; + return { + ...pixelmatchResult, + vlmDescription: `VLM analysis failed: ${error.message}`, + }; } - - return result; } private async compareImagesWithVLM( baselineBytes: Uint8Array, imageBytes: Uint8Array, + diffBytes: Uint8Array, config: VlmConfig ): Promise<{ pass: boolean; description: string }> { const data = await this.ollamaService.generate({ @@ -112,11 +116,11 @@ export class VlmService implements ImageComparator { messages: [ { role: 'user', - content: `${config.prompt}\n${JSON_FORMAT_INSTRUCTION}`, - images: [baselineBytes, imageBytes], + content: config.prompt, + images: [baselineBytes, imageBytes, diffBytes], }, ], - format: 'json', + format: z.toJSONSchema(VlmComparisonResultSchema), options: { temperature: config.temperature, }, @@ -126,6 +130,8 @@ export class VlmService implements ImageComparator { const preferred = config.useThinking ? data.message.thinking : data.message.content; const fallback = config.useThinking ? data.message.content : data.message.thinking; const content = preferred || fallback; + + this.logger.debug(`${JSON.stringify(data)}`); this.logger.debug(`VLM Response: ${content}`); if (!content) { @@ -136,15 +142,12 @@ export class VlmService implements ImageComparator { } private parseVlmResponse(response: string): { pass: boolean; description: string } { - const parsed = JSON.parse(response) as VlmComparisonResult; - - if (typeof parsed.identical !== 'boolean') { - throw new TypeError('Missing or invalid "identical" field'); - } + const parsed = JSON.parse(response); + const validated = VlmComparisonResultSchema.parse(parsed); return { - pass: parsed.identical, - description: parsed.description || 'No description provided', + pass: validated.identical, + description: validated.description || 'No description provided', }; } } diff --git a/src/compare/libs/vlm/vlm.types.ts b/src/compare/libs/vlm/vlm.types.ts index 02e99357..b53992ff 100644 --- a/src/compare/libs/vlm/vlm.types.ts +++ b/src/compare/libs/vlm/vlm.types.ts @@ -1,7 +1,7 @@ export interface VlmConfig { /** * Ollama vision model to use for image comparison. - * @default "llava:7b" + * @default "gemma3:12b" */ model: string; diff --git a/src/test-runs/dto/testRun.dto.spec.ts b/src/test-runs/dto/testRun.dto.spec.ts index 88e21aa2..738bac04 100644 --- a/src/test-runs/dto/testRun.dto.spec.ts +++ b/src/test-runs/dto/testRun.dto.spec.ts @@ -36,4 +36,3 @@ describe('TestRunDto', () => { }); }); }); - From ef75307a8e015a44671aa9af2440456e2409fd9d Mon Sep 17 00:00:00 2001 From: Pavlo Strunkin Date: Thu, 18 Dec 2025 18:53:40 +0200 Subject: [PATCH 13/13] Update vlm.service.spec.ts --- src/compare/libs/vlm/vlm.service.spec.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/compare/libs/vlm/vlm.service.spec.ts b/src/compare/libs/vlm/vlm.service.spec.ts index 375842eb..cdf3fb9e 100644 --- a/src/compare/libs/vlm/vlm.service.spec.ts +++ b/src/compare/libs/vlm/vlm.service.spec.ts @@ -1,6 +1,7 @@ import { Test, TestingModule } from '@nestjs/testing'; import { TestStatus } from '@prisma/client'; import { PNG } from 'pngjs'; +import { z } from 'zod'; import { StaticService } from '../../../static/static.service'; import { NO_BASELINE_RESULT, EQUAL_RESULT } from '../consts'; import { DEFAULT_CONFIG, VlmService } from './vlm.service'; @@ -254,16 +255,22 @@ describe('VlmService', () => { { model: 'llava:13b', prompt: 'Custom context', temperature: 0.2 } ); + const VlmComparisonResultSchema = z.object({ + identical: z.boolean(), + description: z.string(), + }); + const expectedJsonSchema = z.toJSONSchema(VlmComparisonResultSchema); + expect(ollamaGenerateMock).toHaveBeenCalledWith({ model: 'llava:13b', messages: [ { role: 'user', - content: expect.stringContaining('Custom context'), + content: 'Custom context', images: expect.arrayContaining([expect.any(Uint8Array), expect.any(Uint8Array), expect.any(Uint8Array)]), }, ], - format: 'json', + format: expectedJsonSchema, options: { temperature: 0.2 }, }); });