Files
breakpilot-compliance/admin-compliance/__tests__/ingest-industry-compliance.test.ts
Benjamin Admin 71267e2a8a
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 33s
CI / test-python-backend-compliance (push) Successful in 26s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s
test: add tests for compliance advisor IFRS prompt and ingestion script
46 tests covering:
- COMPLIANCE_COLLECTIONS validation
- IFRS endorsement warning content (5 points, CELEX, EFRAG reference)
- Ingestion script structure (download_pdf, upload_file functions)
- IFRS/EFRAG/ENISA URLs and metadata validation
- Chunking config and verification phase

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 16:46:45 +01:00

168 lines
4.9 KiB
TypeScript

import { describe, it, expect } from 'vitest'
import { readFileSync } from 'fs'
import { resolve } from 'path'
/**
* Tests for the ingestion script ingest-industry-compliance.sh
* Validates script structure, URLs, metadata, and configuration.
*/
const SCRIPT_PATH = resolve(__dirname, '../../scripts/ingest-industry-compliance.sh')
let scriptContent: string
try {
scriptContent = readFileSync(SCRIPT_PATH, 'utf-8')
} catch {
scriptContent = ''
}
describe('Ingestion Script: ingest-industry-compliance.sh', () => {
it('should exist and be non-empty', () => {
expect(scriptContent.length).toBeGreaterThan(0)
})
describe('download_pdf function', () => {
it('should define download_pdf function', () => {
expect(scriptContent).toContain('download_pdf()')
})
it('should use User-Agent header for downloads', () => {
expect(scriptContent).toContain('Mozilla/5.0')
})
it('should follow redirects with -L flag', () => {
expect(scriptContent).toMatch(/curl.*-L/)
})
it('should skip already downloaded files', () => {
expect(scriptContent).toContain('-f "$target"')
})
})
describe('upload_file function', () => {
it('should define upload_file function', () => {
expect(scriptContent).toContain('upload_file()')
})
it('should use recursive chunk strategy', () => {
expect(scriptContent).toContain('chunk_strategy=recursive')
})
it('should use chunk_size=512', () => {
expect(scriptContent).toContain('chunk_size=512')
})
it('should use chunk_overlap=50', () => {
expect(scriptContent).toContain('chunk_overlap=50')
})
it('should validate minimum file size', () => {
expect(scriptContent).toContain('"$filesize" -lt 100')
})
})
describe('IFRS Downloads', () => {
it('should download IFRS DE from EUR-Lex', () => {
expect(scriptContent).toContain(
'https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R1803'
)
})
it('should download IFRS EN from EUR-Lex', () => {
expect(scriptContent).toContain(
'https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:32023R1803'
)
})
it('should save IFRS DE with correct filename', () => {
expect(scriptContent).toContain('ifrs_regulation_2023_1803_de.pdf')
})
it('should save IFRS EN with correct filename', () => {
expect(scriptContent).toContain('ifrs_regulation_2023_1803_en.pdf')
})
})
describe('EFRAG Download', () => {
it('should download EFRAG Endorsement Status Report', () => {
expect(scriptContent).toContain('efrag.org')
})
it('should save as efrag_endorsement_status_2025.pdf', () => {
expect(scriptContent).toContain('efrag_endorsement_status_2025.pdf')
})
})
describe('ENISA Downloads', () => {
it('should download ENISA from new URL pattern', () => {
expect(scriptContent).toContain('enisa.europa.eu/sites/default/files/publications')
})
it('should NOT use old Plone-style URLs', () => {
expect(scriptContent).not.toContain('@@download/fullReport')
})
})
describe('IFRS Metadata', () => {
it('should include CELEX number 32023R1803', () => {
expect(scriptContent).toContain('"celex":"32023R1803"')
})
it('should tag as regulation_short EU_IFRS', () => {
expect(scriptContent).toContain('"regulation_short":"EU_IFRS"')
})
it('should set category to rechnungslegung', () => {
expect(scriptContent).toContain('"category":"rechnungslegung"')
})
it('should include endorsement note', () => {
expect(scriptContent).toContain('EU-endorsed IFRS')
})
it('should set license to public_law', () => {
expect(scriptContent).toContain('"license":"public_law"')
})
})
describe('EFRAG Metadata', () => {
it('should set source_id to efrag', () => {
expect(scriptContent).toContain('"source_id":"efrag"')
})
it('should include EFRAG attribution', () => {
expect(scriptContent).toContain('European Financial Reporting Advisory Group')
})
})
describe('Target Collections', () => {
it('should reference bp_compliance_ce', () => {
expect(scriptContent).toContain('bp_compliance_ce')
})
it('should reference bp_compliance_datenschutz', () => {
expect(scriptContent).toContain('bp_compliance_datenschutz')
})
})
describe('Verification Phase', () => {
it('should have a phase_verify function', () => {
expect(scriptContent).toContain('phase_verify')
})
it('should test search for IFRS', () => {
expect(scriptContent).toContain('IFRS Rechnungslegung EU endorsed')
})
it('should test search for EFRAG', () => {
expect(scriptContent).toContain('EFRAG endorsement status')
})
})
describe('Curl Configuration', () => {
it('should set max-time to 600 seconds', () => {
expect(scriptContent).toContain('--max-time 600')
})
})
})