Compare commits

..

3 Commits

Author SHA1 Message Date
Sharang Parnerkar
b697963186 fix: use Alpine-compatible addgroup/adduser flags in Dockerfiles
Some checks failed
Deploy to Coolify / deploy (push) Has been cancelled
Replace --system/--gid/--uid (Debian syntax) with -S/-g/-u (BusyBox/Alpine).
Coolify ARG injection causes exit code 255 with Debian-style flags.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 22:38:31 +01:00
Sharang Parnerkar
ef6237ffdf refactor(coolify): externalize postgres, qdrant, S3
Some checks failed
Deploy to Coolify / deploy (push) Has been cancelled
- Replace bp-core-postgres with POSTGRES_HOST env var
- Replace bp-core-qdrant with QDRANT_URL env var
- Replace bp-core-minio with S3_ENDPOINT/S3_ACCESS_KEY/S3_SECRET_KEY

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 09:23:32 +01:00
Sharang Parnerkar
41a8f3b183 feat: add Coolify deployment configuration
Some checks failed
Deploy to Coolify / deploy (push) Has been cancelled
Add docker-compose.coolify.yml (8 services), .env.coolify.example,
and Gitea Action workflow for Coolify API deployment. Removes
core-health-check, paddleocr, transcription-worker, agent-core,
drive, and docs. Adds Traefik labels for *.breakpilot.ai domain
routing with Let's Encrypt SSL.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 10:43:15 +01:00
198 changed files with 5322 additions and 54600 deletions

View File

@@ -6,31 +6,22 @@
| Geraet | Rolle | Aufgaben |
|--------|-------|----------|
| **MacBook** | Entwicklung | Claude Terminal, Code-Entwicklung, Browser (Frontend-Tests) |
| **Mac Mini** | Server | Docker, alle Services, Tests, Builds, Deployment |
| **MacBook** | Client | Claude Terminal, Browser (Frontend-Tests) |
| **Mac Mini** | Server | Docker, alle Services, Code-Ausfuehrung, Tests, Git |
**WICHTIG:** Code wird direkt auf dem MacBook in diesem Repo bearbeitet. Docker und Services laufen auf dem Mac Mini.
**WICHTIG:** Die Entwicklung findet vollstaendig auf dem **Mac Mini** statt!
### Entwicklungsworkflow
### SSH-Verbindung
```bash
# 1. Code auf MacBook bearbeiten (dieses Verzeichnis)
# 2. Committen und pushen:
git push origin main && git push gitea main
ssh macmini
# Projektverzeichnis:
cd /Users/benjaminadmin/Projekte/breakpilot-lehrer
# 3. Auf Mac Mini pullen und Container neu bauen:
ssh macmini "git -C /Users/benjaminadmin/Projekte/breakpilot-lehrer pull --no-rebase origin main"
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml build --no-cache <service>"
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml up -d <service>"
# Einzelbefehle (BEVORZUGT):
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && <cmd>"
```
### SSH-Verbindung (fuer Docker/Tests)
**WICHTIG:** `cd` in SSH-Kommandos funktioniert NICHT zuverlaessig! Stattdessen:
- Git: `git -C /Users/benjaminadmin/Projekte/breakpilot-lehrer <cmd>`
- Docker: `/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml <cmd>`
- Logs: `/usr/local/bin/docker logs -f bp-lehrer-<service>`
---
## Voraussetzung
@@ -172,10 +163,10 @@ breakpilot-lehrer/
```bash
# Lehrer-Services starten (Core muss laufen!)
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml up -d"
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && /usr/local/bin/docker compose up -d"
# Einzelnen Service neu bauen
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml build --no-cache <service>"
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && /usr/local/bin/docker compose build --no-cache <service>"
# Logs
ssh macmini "/usr/local/bin/docker logs -f bp-lehrer-<service>"
@@ -185,7 +176,6 @@ ssh macmini "/usr/local/bin/docker ps --filter name=bp-lehrer"
```
**WICHTIG:** Docker-Pfad auf Mac Mini ist `/usr/local/bin/docker` (nicht im Standard-SSH-PATH).
**WICHTIG:** Immer `-f` mit vollem Pfad zur docker-compose.yml nutzen, `cd` in SSH funktioniert nicht!
### Frontend-Entwicklung

79
.env.coolify.example Normal file
View File

@@ -0,0 +1,79 @@
# =========================================================
# BreakPilot Lehrer — Coolify Environment Variables
# =========================================================
# Copy these into Coolify's environment variable UI
# for the breakpilot-lehrer Docker Compose resource.
# =========================================================
# --- External PostgreSQL (Coolify-managed, same as Core) ---
POSTGRES_HOST=<coolify-postgres-hostname>
POSTGRES_PORT=5432
POSTGRES_USER=breakpilot
POSTGRES_PASSWORD=CHANGE_ME_SAME_AS_CORE
POSTGRES_DB=breakpilot_db
# --- Security ---
JWT_SECRET=CHANGE_ME_SAME_AS_CORE
# --- External S3 Storage (same as Core) ---
S3_ENDPOINT=<s3-endpoint-host:port>
S3_ACCESS_KEY=CHANGE_ME_SAME_AS_CORE
S3_SECRET_KEY=CHANGE_ME_SAME_AS_CORE
S3_BUCKET=breakpilot-rag
S3_SECURE=true
# --- External Qdrant (Coolify-managed, same as Core) ---
QDRANT_URL=http://<coolify-qdrant-hostname>:6333
# --- Session ---
SESSION_TTL_HOURS=24
# --- SMTP (Real mail server) ---
SMTP_HOST=smtp.example.com
SMTP_PORT=587
SMTP_USERNAME=noreply@breakpilot.ai
SMTP_PASSWORD=CHANGE_ME_SMTP_PASSWORD
SMTP_FROM_NAME=BreakPilot
SMTP_FROM_ADDR=noreply@breakpilot.ai
# --- LLM / Ollama (optional) ---
OLLAMA_BASE_URL=
OLLAMA_URL=
OLLAMA_ENABLED=false
OLLAMA_DEFAULT_MODEL=
OLLAMA_VISION_MODEL=
OLLAMA_CORRECTION_MODEL=
OLLAMA_TIMEOUT=120
# --- Anthropic (optional) ---
ANTHROPIC_API_KEY=
# --- vast.ai GPU (optional) ---
VAST_API_KEY=
VAST_INSTANCE_ID=
# --- Game Settings ---
GAME_USE_DATABASE=true
GAME_REQUIRE_AUTH=true
GAME_REQUIRE_BILLING=true
GAME_LLM_MODEL=
# --- Frontend URLs (build args) ---
NEXT_PUBLIC_API_URL=https://api-lehrer.breakpilot.ai
NEXT_PUBLIC_KLAUSUR_SERVICE_URL=https://klausur.breakpilot.ai
NEXT_PUBLIC_VOICE_SERVICE_URL=wss://voice.breakpilot.ai
NEXT_PUBLIC_BILLING_API_URL=https://api-core.breakpilot.ai
NEXT_PUBLIC_APP_URL=https://app.breakpilot.ai
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=
# --- Edu Search ---
EDU_SEARCH_URL=
EDU_SEARCH_API_KEY=
OPENSEARCH_PASSWORD=CHANGE_ME_OPENSEARCH_PASSWORD
# --- Misc ---
CONTROL_API_KEY=
ALERTS_AGENT_ENABLED=false
PADDLEOCR_SERVICE_URL=
TROCR_SERVICE_URL=
CAMUNDA_URL=

View File

@@ -30,23 +30,6 @@ OLLAMA_VISION_MODEL=llama3.2-vision
OLLAMA_CORRECTION_MODEL=llama3.2
OLLAMA_TIMEOUT=120
# OCR-Pipeline: LLM-Review (Schritt 6)
# Kleine Modelle reichen fuer Zeichen-Korrekturen (0->O, 1->l, 5->S)
# Optionen: qwen3:0.6b, qwen3:1.7b, gemma3:1b, qwen3.5:35b-a3b
OLLAMA_REVIEW_MODEL=qwen3:0.6b
# Eintraege pro Ollama-Call. Groesser = weniger HTTP-Overhead.
OLLAMA_REVIEW_BATCH_SIZE=20
# OCR-Pipeline: Engine fuer Schritt 5 (Worterkennung)
# Optionen: auto (bevorzugt RapidOCR), rapid, tesseract,
# trocr-printed, trocr-handwritten, lighton
OCR_ENGINE=auto
# Klausur-HTR: Primaerem Modell fuer Handschriftenerkennung (qwen2.5vl bereits auf Mac Mini)
OLLAMA_HTR_MODEL=qwen2.5vl:32b
# HTR Fallback: genutzt wenn Ollama nicht erreichbar (auto-download ~340 MB)
HTR_FALLBACK_MODEL=trocr-large
# Anthropic (optional)
ANTHROPIC_API_KEY=

View File

@@ -0,0 +1,32 @@
name: Deploy to Coolify
on:
push:
branches:
- coolify
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- name: Wait for Core deployment
run: |
echo "Waiting 30s for Core services to stabilize..."
sleep 30
- name: Deploy via Coolify API
run: |
echo "Deploying breakpilot-lehrer to Coolify..."
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
-X POST \
-H "Authorization: Bearer ${{ secrets.COOLIFY_API_TOKEN }}" \
-H "Content-Type: application/json" \
-d '{"uuid": "${{ secrets.COOLIFY_RESOURCE_UUID }}", "force_rebuild": true}' \
"${{ secrets.COOLIFY_BASE_URL }}/api/v1/deploy")
echo "HTTP Status: $HTTP_STATUS"
if [ "$HTTP_STATUS" -ne 200 ] && [ "$HTTP_STATUS" -ne 201 ]; then
echo "Deployment failed with status $HTTP_STATUS"
exit 1
fi
echo "Deployment triggered successfully!"

View File

@@ -34,8 +34,8 @@ WORKDIR /app
ENV NODE_ENV=production
# Create non-root user
RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs
RUN addgroup -S -g 1001 nodejs
RUN adduser -S -u 1001 -G nodejs nextjs
# Copy built assets
COPY --from=builder /app/public ./public

View File

@@ -273,6 +273,52 @@ Dein Ziel ist die rechtzeitige Erkennung und Kommunikation relevanter Ereignisse
createdAt: '2024-12-01T00:00:00Z',
updatedAt: '2025-01-12T02:00:00Z'
},
'compliance-advisor': {
id: 'compliance-advisor',
name: 'Compliance Advisor',
description: 'DSGVO/Compliance-Berater fuer SDK-Nutzer',
soulFile: 'compliance-advisor.soul.md',
soulContent: `# Compliance Advisor Agent
## Identitaet
Du bist der BreakPilot Compliance-Berater. Du hilfst Nutzern des AI Compliance SDK,
Datenschutz- und Compliance-Fragen in verstaendlicher Sprache zu beantworten.
Du bist kein Anwalt und gibst keine Rechtsberatung, sondern orientierst dich an
offiziellen Quellen und gibst praxisnahe Hinweise.
## Kernprinzipien
- **Quellenbasiert**: Verweise immer auf konkrete Rechtsgrundlagen (DSGVO-Artikel, BDSG-Paragraphen)
- **Verstaendlich**: Erklaere rechtliche Konzepte in einfacher, praxisnaher Sprache
- **Ehrlich**: Bei Unsicherheit empfehle professionelle Rechtsberatung
- **Kontextbewusst**: Nutze das RAG-System fuer aktuelle Rechtstexte und Leitfaeden
- **Scope-bewusst**: Nutze alle verfuegbaren RAG-Quellen AUSSER NIBIS-Dokumenten
## Kompetenzbereich
- DSGVO Art. 1-99 + Erwaegsgruende
- BDSG (Bundesdatenschutzgesetz)
- AI Act (EU KI-Verordnung)
- TTDSG, ePrivacy-Richtlinie
- DSK-Kurzpapiere (Nr. 1-20)
- SDM V3.0, BSI-Grundschutz, BSI-TR-03161
- EDPB Guidelines, Bundes-/Laender-Muss-Listen
- ISO 27001/27701 (Ueberblick)
## Kommunikationsstil
- Sachlich, aber verstaendlich
- Deutsch als Hauptsprache
- Strukturierte Antworten mit Quellenangabe
- Praxisbeispiele wo hilfreich`,
color: '#6366f1',
status: 'running',
activeSessions: 0,
totalProcessed: 0,
avgResponseTime: 0,
errorRate: 0,
lastRestart: new Date().toISOString(),
version: '1.0.0',
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString()
},
'orchestrator': {
id: 'orchestrator',
name: 'Orchestrator',

View File

@@ -94,6 +94,19 @@ const mockAgents: AgentConfig[] = [
totalProcessed: 8934,
avgResponseTime: 12,
lastActivity: 'just now'
},
{
id: 'compliance-advisor',
name: 'Compliance Advisor',
description: 'DSGVO/Compliance-Berater fuer SDK-Nutzer',
soulFile: 'compliance-advisor.soul.md',
color: '#6366f1',
icon: 'message',
status: 'running',
activeSessions: 0,
totalProcessed: 0,
avgResponseTime: 0,
lastActivity: new Date().toISOString()
}
]

View File

@@ -179,6 +179,7 @@ export default function GPUInfrastructurePage() {
databases: ['PostgreSQL (Logs)'],
}}
relatedPages={[
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider testen' },
{ name: 'Test Quality (BQAS)', href: '/ai/test-quality', description: 'Golden Suite & Tests' },
{ name: 'Magic Help', href: '/ai/magic-help', description: 'TrOCR Testing' },
]}

View File

@@ -0,0 +1,503 @@
'use client'
/**
* LLM Comparison Tool
*
* Vergleicht Antworten von verschiedenen LLM-Providern:
* - OpenAI/ChatGPT
* - Claude
* - Self-hosted + Tavily
* - Self-hosted + EduSearch
*/
import { useState, useEffect, useCallback } from 'react'
import { PagePurpose } from '@/components/common/PagePurpose'
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
interface LLMResponse {
provider: string
model: string
response: string
latency_ms: number
tokens_used?: number
search_results?: Array<{
title: string
url: string
content: string
score?: number
}>
error?: string
timestamp: string
}
interface ComparisonResult {
comparison_id: string
prompt: string
system_prompt?: string
responses: LLMResponse[]
created_at: string
}
const providerColors: Record<string, { bg: string; border: string; text: string }> = {
openai: { bg: 'bg-emerald-50', border: 'border-emerald-300', text: 'text-emerald-700' },
claude: { bg: 'bg-orange-50', border: 'border-orange-300', text: 'text-orange-700' },
selfhosted_tavily: { bg: 'bg-blue-50', border: 'border-blue-300', text: 'text-blue-700' },
selfhosted_edusearch: { bg: 'bg-purple-50', border: 'border-purple-300', text: 'text-purple-700' },
}
const providerLabels: Record<string, string> = {
openai: 'OpenAI GPT-4o-mini',
claude: 'Claude 3.5 Sonnet',
selfhosted_tavily: 'Self-hosted + Tavily',
selfhosted_edusearch: 'Self-hosted + EduSearch',
}
export default function LLMComparePage() {
// State
const [prompt, setPrompt] = useState('')
const [systemPrompt, setSystemPrompt] = useState('Du bist ein hilfreicher Assistent fuer Lehrkraefte in Deutschland.')
// Provider toggles
const [enableOpenAI, setEnableOpenAI] = useState(true)
const [enableClaude, setEnableClaude] = useState(true)
const [enableTavily, setEnableTavily] = useState(true)
const [enableEduSearch, setEnableEduSearch] = useState(true)
// Parameters
const [model, setModel] = useState('llama3.2:3b')
const [temperature, setTemperature] = useState(0.7)
const [maxTokens, setMaxTokens] = useState(2048)
// Results
const [isLoading, setIsLoading] = useState(false)
const [result, setResult] = useState<ComparisonResult | null>(null)
const [history, setHistory] = useState<ComparisonResult[]>([])
const [error, setError] = useState<string | null>(null)
// UI State
const [showSettings, setShowSettings] = useState(false)
const [showHistory, setShowHistory] = useState(false)
// API Base URL
const API_URL = process.env.NEXT_PUBLIC_LLM_GATEWAY_URL || 'http://localhost:8082'
const API_KEY = process.env.NEXT_PUBLIC_LLM_API_KEY || 'dev-key'
// Load history
const loadHistory = useCallback(async () => {
try {
const response = await fetch(`${API_URL}/v1/comparison/history?limit=20`, {
headers: { Authorization: `Bearer ${API_KEY}` },
})
if (response.ok) {
const data = await response.json()
setHistory(data.comparisons || [])
}
} catch (e) {
console.error('Failed to load history:', e)
}
}, [API_URL, API_KEY])
useEffect(() => {
loadHistory()
}, [loadHistory])
const runComparison = async () => {
if (!prompt.trim()) {
setError('Bitte geben Sie einen Prompt ein')
return
}
setIsLoading(true)
setError(null)
setResult(null)
try {
const response = await fetch(`${API_URL}/v1/comparison/run`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${API_KEY}`,
},
body: JSON.stringify({
prompt,
system_prompt: systemPrompt || undefined,
enable_openai: enableOpenAI,
enable_claude: enableClaude,
enable_selfhosted_tavily: enableTavily,
enable_selfhosted_edusearch: enableEduSearch,
selfhosted_model: model,
temperature,
max_tokens: maxTokens,
}),
})
if (!response.ok) {
throw new Error(`API Error: ${response.status}`)
}
const data = await response.json()
setResult(data)
loadHistory()
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
} finally {
setIsLoading(false)
}
}
const ResponseCard = ({ response }: { response: LLMResponse }) => {
const colors = providerColors[response.provider] || {
bg: 'bg-slate-50',
border: 'border-slate-300',
text: 'text-slate-700',
}
const label = providerLabels[response.provider] || response.provider
return (
<div className={`rounded-xl border-2 ${colors.border} ${colors.bg} overflow-hidden`}>
<div className={`px-4 py-3 border-b ${colors.border} flex items-center justify-between`}>
<div>
<h3 className={`font-semibold ${colors.text}`}>{label}</h3>
<p className="text-xs text-slate-500">{response.model}</p>
</div>
<div className="text-right text-xs text-slate-500">
<div>{response.latency_ms}ms</div>
{response.tokens_used && <div>{response.tokens_used} tokens</div>}
</div>
</div>
<div className="p-4">
{response.error ? (
<div className="text-red-600 text-sm">
<strong>Fehler:</strong> {response.error}
</div>
) : (
<pre className="whitespace-pre-wrap text-sm text-slate-700 font-sans">
{response.response}
</pre>
)}
</div>
{response.search_results && response.search_results.length > 0 && (
<div className="px-4 pb-4">
<details className="text-xs">
<summary className="cursor-pointer text-slate-500 hover:text-slate-700">
{response.search_results.length} Suchergebnisse anzeigen
</summary>
<ul className="mt-2 space-y-2">
{response.search_results.map((sr, idx) => (
<li key={idx} className="bg-white rounded p-2 border border-slate-200">
<a
href={sr.url}
target="_blank"
rel="noopener noreferrer"
className="text-blue-600 hover:underline font-medium"
>
{sr.title || 'Untitled'}
</a>
<p className="text-slate-500 truncate">{sr.content}</p>
</li>
))}
</ul>
</details>
</div>
)}
</div>
)
}
return (
<div>
{/* Page Purpose */}
<PagePurpose
title="LLM Vergleich"
purpose="Vergleichen Sie Antworten verschiedener KI-Provider (OpenAI, Claude, Self-hosted) fuer Qualitaetssicherung. Optimieren Sie Parameter und System Prompts fuer beste Ergebnisse. Standalone-Werkzeug ohne direkten Datenfluss zur KI-Pipeline."
audience={['Entwickler', 'Data Scientists', 'QA']}
architecture={{
services: ['llm-gateway (Python)', 'Ollama', 'OpenAI API', 'Claude API'],
databases: ['PostgreSQL (History)', 'Qdrant (RAG)'],
}}
relatedPages={[
{ name: 'Test Quality (BQAS)', href: '/ai/test-quality', description: 'Golden Suite & Synthetic Tests' },
{ name: 'GPU Infrastruktur', href: '/ai/gpu', description: 'GPU-Ressourcen verwalten' },
{ name: 'Agent Management', href: '/ai/agents', description: 'Multi-Agent System' },
]}
collapsible={true}
defaultCollapsed={true}
/>
{/* KI-Werkzeuge Sidebar */}
<AIToolsSidebarResponsive currentTool="llm-compare" />
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
{/* Left Column: Input & Settings */}
<div className="lg:col-span-1 space-y-4">
{/* Prompt Input */}
<div className="bg-white rounded-xl border border-slate-200 p-4">
<h2 className="font-semibold text-slate-900 mb-3">Prompt</h2>
{/* System Prompt */}
<div className="mb-3">
<label className="block text-sm text-slate-600 mb-1">System Prompt</label>
<textarea
value={systemPrompt}
onChange={(e) => setSystemPrompt(e.target.value)}
rows={3}
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm resize-none"
placeholder="System Prompt (optional)"
/>
</div>
{/* User Prompt */}
<div className="mb-3">
<label className="block text-sm text-slate-600 mb-1">User Prompt</label>
<textarea
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
rows={4}
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm resize-none"
placeholder="z.B.: Erstelle ein Arbeitsblatt zum Thema Bruchrechnung fuer Klasse 6..."
/>
</div>
{/* Provider Toggles */}
<div className="mb-4">
<label className="block text-sm text-slate-600 mb-2">Provider</label>
<div className="grid grid-cols-2 gap-2">
<label className="flex items-center gap-2 text-sm">
<input
type="checkbox"
checked={enableOpenAI}
onChange={(e) => setEnableOpenAI(e.target.checked)}
className="rounded"
/>
OpenAI
</label>
<label className="flex items-center gap-2 text-sm">
<input
type="checkbox"
checked={enableClaude}
onChange={(e) => setEnableClaude(e.target.checked)}
className="rounded"
/>
Claude
</label>
<label className="flex items-center gap-2 text-sm">
<input
type="checkbox"
checked={enableTavily}
onChange={(e) => setEnableTavily(e.target.checked)}
className="rounded"
/>
Self + Tavily
</label>
<label className="flex items-center gap-2 text-sm">
<input
type="checkbox"
checked={enableEduSearch}
onChange={(e) => setEnableEduSearch(e.target.checked)}
className="rounded"
/>
Self + EduSearch
</label>
</div>
</div>
{/* Run Button */}
<button
onClick={runComparison}
disabled={isLoading || !prompt.trim()}
className="w-full py-3 bg-teal-600 text-white rounded-lg font-medium hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed"
>
{isLoading ? (
<span className="flex items-center justify-center gap-2">
<svg className="animate-spin w-5 h-5" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
</svg>
Vergleiche...
</span>
) : (
'Vergleich starten'
)}
</button>
{error && (
<div className="mt-3 p-3 bg-red-50 border border-red-200 rounded-lg text-red-700 text-sm">
{error}
</div>
)}
</div>
{/* Settings Panel */}
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
<button
onClick={() => setShowSettings(!showSettings)}
className="w-full px-4 py-3 flex items-center justify-between hover:bg-slate-50"
>
<span className="font-semibold text-slate-900">Parameter</span>
<svg
className={`w-5 h-5 transition-transform ${showSettings ? 'rotate-180' : ''}`}
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
>
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
</svg>
</button>
{showSettings && (
<div className="p-4 border-t border-slate-200 space-y-4">
<div>
<label className="block text-sm text-slate-600 mb-1">Self-hosted Modell</label>
<select
value={model}
onChange={(e) => setModel(e.target.value)}
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm"
>
<option value="llama3.2:3b">Llama 3.2 3B</option>
<option value="llama3.1:8b">Llama 3.1 8B</option>
<option value="mistral:7b">Mistral 7B</option>
<option value="qwen2.5:7b">Qwen 2.5 7B</option>
</select>
</div>
<div>
<label className="block text-sm text-slate-600 mb-1">
Temperature: {temperature.toFixed(2)}
</label>
<input
type="range"
min="0"
max="2"
step="0.1"
value={temperature}
onChange={(e) => setTemperature(parseFloat(e.target.value))}
className="w-full"
/>
</div>
<div>
<label className="block text-sm text-slate-600 mb-1">Max Tokens: {maxTokens}</label>
<input
type="range"
min="256"
max="4096"
step="256"
value={maxTokens}
onChange={(e) => setMaxTokens(parseInt(e.target.value))}
className="w-full"
/>
</div>
</div>
)}
</div>
{/* History Panel */}
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
<button
onClick={() => setShowHistory(!showHistory)}
className="w-full px-4 py-3 flex items-center justify-between hover:bg-slate-50"
>
<span className="font-semibold text-slate-900">Verlauf ({history.length})</span>
<svg
className={`w-5 h-5 transition-transform ${showHistory ? 'rotate-180' : ''}`}
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
>
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
</svg>
</button>
{showHistory && history.length > 0 && (
<div className="border-t border-slate-200 max-h-64 overflow-y-auto">
{history.map((h) => (
<button
key={h.comparison_id}
onClick={() => {
setResult(h)
setPrompt(h.prompt)
if (h.system_prompt) setSystemPrompt(h.system_prompt)
}}
className="w-full px-4 py-2 text-left hover:bg-slate-50 border-b border-slate-100 last:border-0"
>
<div className="text-sm text-slate-700 truncate">{h.prompt}</div>
<div className="text-xs text-slate-400">
{new Date(h.created_at).toLocaleString('de-DE')}
</div>
</button>
))}
</div>
)}
</div>
</div>
{/* Right Column: Results */}
<div className="lg:col-span-2">
{result ? (
<div className="space-y-4">
<div className="bg-white rounded-xl border border-slate-200 p-4">
<div className="flex items-center justify-between">
<div>
<h2 className="font-semibold text-slate-900">Ergebnisse</h2>
<p className="text-sm text-slate-500">ID: {result.comparison_id}</p>
</div>
<div className="text-sm text-slate-500">
{new Date(result.created_at).toLocaleString('de-DE')}
</div>
</div>
<div className="mt-2 p-3 bg-slate-50 rounded-lg">
<p className="text-sm text-slate-700">{result.prompt}</p>
</div>
</div>
<div className="grid grid-cols-1 xl:grid-cols-2 gap-4">
{result.responses.map((response, idx) => (
<ResponseCard key={`${response.provider}-${idx}`} response={response} />
))}
</div>
</div>
) : (
<div className="bg-white rounded-xl border border-slate-200 p-12 text-center">
<svg
className="w-16 h-16 mx-auto text-slate-300 mb-4"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={1.5}
d="M9 3v2m6-2v2M9 19v2m6-2v2M5 9H3m2 6H3m18-6h-2m2 6h-2M7 19h10a2 2 0 002-2V7a2 2 0 00-2-2H7a2 2 0 00-2 2v10a2 2 0 002 2zM9 9h6v6H9V9z"
/>
</svg>
<h3 className="text-lg font-medium text-slate-700 mb-2">LLM-Vergleich starten</h3>
<p className="text-slate-500 max-w-md mx-auto">
Geben Sie einen Prompt ein und klicken Sie auf &quot;Vergleich starten&quot;, um
die Antworten verschiedener LLM-Provider zu vergleichen.
</p>
</div>
)}
</div>
</div>
{/* Info Box */}
<div className="mt-8 bg-teal-50 border border-teal-200 rounded-xl p-6">
<div className="flex items-start gap-4">
<svg className="w-6 h-6 text-teal-600 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<div>
<h3 className="font-semibold text-teal-900">Qualitaetssicherung</h3>
<p className="text-sm text-teal-800 mt-1">
Dieses Tool dient zur Qualitaetssicherung der KI-Antworten. Vergleichen Sie verschiedene Provider,
um die optimalen Parameter und System Prompts zu finden. Die Ergebnisse werden fuer Audits gespeichert.
</p>
</div>
</div>
</div>
</div>
)
}

View File

@@ -685,6 +685,7 @@ export default function OCRComparePage() {
databases: ['PostgreSQL (Sessions)'],
}}
relatedPages={[
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider vergleichen' },
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Ground Truth erstellen' },
]}
collapsible={true}

View File

@@ -1,548 +0,0 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import { PagePurpose } from '@/components/common/PagePurpose'
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
import { GridEditor } from '@/components/grid-editor/GridEditor'
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
const KLAUSUR_API = '/klausur-api'
export default function OcrOverlayPage() {
const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi'>('pipeline')
const [currentStep, setCurrentStep] = useState(0)
const [sessionId, setSessionId] = useState<string | null>(null)
const [sessionName, setSessionName] = useState<string>('')
const [sessions, setSessions] = useState<SessionListItem[]>([])
const [loadingSessions, setLoadingSessions] = useState(true)
const [editingName, setEditingName] = useState<string | null>(null)
const [editNameValue, setEditNameValue] = useState('')
const [editingCategory, setEditingCategory] = useState<string | null>(null)
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
const [steps, setSteps] = useState<PipelineStep[]>(
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
...s,
status: i === 0 ? 'active' : 'pending',
})),
)
useEffect(() => {
loadSessions()
}, [])
const loadSessions = async () => {
setLoadingSessions(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
if (res.ok) {
const data = await res.json()
// Filter to only show top-level sessions (no sub-sessions)
setSessions((data.sessions || []).filter((s: SessionListItem) => !s.parent_session_id))
}
} catch (e) {
console.error('Failed to load sessions:', e)
} finally {
setLoadingSessions(false)
}
}
const openSession = useCallback(async (sid: string) => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
if (!res.ok) return
const data = await res.json()
setSessionId(sid)
setSessionName(data.name || data.filename || '')
setActiveCategory(data.document_category || undefined)
// Check if this session was processed with paddle_direct, kombi, or rapid_kombi
const ocrEngine = data.word_result?.ocr_engine
const isPaddleDirect = ocrEngine === 'paddle_direct'
const isKombi = ocrEngine === 'kombi' || ocrEngine === 'rapid_kombi'
if (isPaddleDirect || isKombi) {
const m = isKombi ? 'kombi' : 'paddle-direct'
const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
setMode(m)
// For Kombi: if grid_editor_result exists, jump to grid editor step (6)
// If structure_result exists, jump to grid editor (6)
// If word_result exists, jump to structure step (5)
const hasGrid = isKombi && data.grid_editor_result
const hasStructure = isKombi && data.structure_result
const hasWords = isKombi && data.word_result
const activeStep = hasGrid ? 6 : hasStructure ? 6 : hasWords ? 5 : 4
setSteps(
baseSteps.map((s, i) => ({
...s,
status: i < activeStep ? 'completed' : i === activeStep ? 'active' : 'pending',
})),
)
setCurrentStep(activeStep)
} else {
setMode('pipeline')
// Map DB step to overlay UI step
const dbStep = data.current_step || 1
const uiStep = dbStepToOverlayUi(dbStep)
setSteps(
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
...s,
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
})),
)
setCurrentStep(uiStep)
}
} catch (e) {
console.error('Failed to open session:', e)
}
}, [])
const deleteSession = useCallback(async (sid: string) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
setSessions((prev) => prev.filter((s) => s.id !== sid))
if (sessionId === sid) {
setSessionId(null)
setCurrentStep(0)
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
} catch (e) {
console.error('Failed to delete session:', e)
}
}, [sessionId, mode])
const renameSession = useCallback(async (sid: string, newName: string) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: newName }),
})
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s)))
if (sessionId === sid) setSessionName(newName)
} catch (e) {
console.error('Failed to rename session:', e)
}
setEditingName(null)
}, [sessionId])
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ document_category: category }),
})
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s)))
if (sessionId === sid) setActiveCategory(category)
} catch (e) {
console.error('Failed to update category:', e)
}
setEditingCategory(null)
}, [sessionId])
const handleStepClick = (index: number) => {
if (index <= currentStep || steps[index].status === 'completed') {
setCurrentStep(index)
}
}
const goToStep = (step: number) => {
setCurrentStep(step)
setSteps((prev) =>
prev.map((s, i) => ({
...s,
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
})),
)
}
const handleNext = () => {
if (currentStep >= steps.length - 1) {
// Last step completed — return to session list
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
setCurrentStep(0)
setSessionId(null)
loadSessions()
return
}
const nextStep = currentStep + 1
setSteps((prev) =>
prev.map((s, i) => {
if (i === currentStep) return { ...s, status: 'completed' }
if (i === nextStep) return { ...s, status: 'active' }
return s
}),
)
setCurrentStep(nextStep)
}
const handleOrientationComplete = (sid: string) => {
setSessionId(sid)
loadSessions()
handleNext()
}
const handleNewSession = () => {
setSessionId(null)
setSessionName('')
setCurrentStep(0)
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
const stepNames: Record<number, string> = {
1: 'Orientierung',
2: 'Begradigung',
3: 'Entzerrung',
4: 'Zuschneiden',
5: 'Zeilen',
6: 'Woerter',
7: 'Overlay',
}
const reprocessFromStep = useCallback(async (uiStep: number) => {
if (!sessionId) return
// Map overlay UI step to DB step
const dbStepMap: Record<number, number> = { 0: 2, 1: 3, 2: 4, 3: 5, 4: 7, 5: 8, 6: 9 }
const dbStep = dbStepMap[uiStep] || uiStep + 1
if (!confirm(`Ab Schritt ${uiStep + 1} (${stepNames[uiStep + 1] || '?'}) neu verarbeiten?`)) return
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ from_step: dbStep }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
console.error('Reprocess failed:', data.detail || res.status)
return
}
goToStep(uiStep)
} catch (e) {
console.error('Reprocess error:', e)
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId, goToStep])
const renderStep = () => {
if (mode === 'paddle-direct' || mode === 'kombi') {
switch (currentStep) {
case 0:
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
case 1:
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
case 2:
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
case 3:
return <StepCrop sessionId={sessionId} onNext={handleNext} />
case 4:
if (mode === 'kombi') {
return (
<PaddleDirectStep
sessionId={sessionId}
onNext={handleNext}
endpoint="paddle-kombi"
title="Kombi-Modus"
description="PP-OCRv5 und Tesseract laufen parallel. Koordinaten werden gewichtet gemittelt fuer optimale Positionierung."
icon="🔀"
buttonLabel="PP-OCRv5 + Tesseract starten"
runningLabel="PP-OCRv5 + Tesseract laufen..."
engineKey="kombi"
/>
)
}
return <PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
case 5:
return mode === 'kombi' ? (
<StepStructureDetection sessionId={sessionId} onNext={handleNext} />
) : null
case 6:
return mode === 'kombi' ? (
<GridEditor sessionId={sessionId} onNext={handleNext} />
) : null
default:
return null
}
}
switch (currentStep) {
case 0:
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
case 1:
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
case 2:
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
case 3:
return <StepCrop sessionId={sessionId} onNext={handleNext} />
case 4:
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
case 5:
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} skipHealGaps />
case 6:
return <OverlayReconstruction sessionId={sessionId} onNext={handleNext} />
default:
return null
}
}
return (
<div className="space-y-6">
<PagePurpose
title="OCR Overlay"
purpose="Ganzseitige Overlay-Rekonstruktion: Scan begradigen, Zeilen und Woerter erkennen, dann pixelgenau ueber das Bild legen. Ohne Spaltenerkennung — ideal fuer Arbeitsblaetter."
audience={['Entwickler']}
architecture={{
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
databases: ['PostgreSQL Sessions'],
}}
relatedPages={[
{ name: 'OCR Pipeline', href: '/ai/ocr-pipeline', description: 'Volle Pipeline mit Spalten' },
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
]}
defaultCollapsed
/>
{/* Session List */}
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
<div className="flex items-center justify-between mb-3">
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Sessions ({sessions.length})
</h3>
<button
onClick={handleNewSession}
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
>
+ Neue Session
</button>
</div>
{loadingSessions ? (
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
) : sessions.length === 0 ? (
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
) : (
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
{sessions.map((s) => {
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
return (
<div
key={s.id}
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
sessionId === s.id
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
}`}
>
{/* Thumbnail */}
<div
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
onClick={() => openSession(s.id)}
>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
alt=""
className="w-full h-full object-cover"
loading="lazy"
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
/>
</div>
{/* Info */}
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
{editingName === s.id ? (
<input
autoFocus
value={editNameValue}
onChange={(e) => setEditNameValue(e.target.value)}
onBlur={() => renameSession(s.id, editNameValue)}
onKeyDown={(e) => {
if (e.key === 'Enter') renameSession(s.id, editNameValue)
if (e.key === 'Escape') setEditingName(null)
}}
onClick={(e) => e.stopPropagation()}
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
/>
) : (
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
{s.name || s.filename}
</div>
)}
<button
onClick={(e) => {
e.stopPropagation()
navigator.clipboard.writeText(s.id)
const btn = e.currentTarget
btn.textContent = 'Kopiert!'
setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
}}
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
title={`Volle ID: ${s.id} — Klick zum Kopieren`}
>
ID: {s.id.slice(0, 8)}
</button>
<div className="text-xs text-gray-400 flex gap-2 mt-0.5">
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
</div>
</div>
{/* Category Badge */}
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
<button
onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
catInfo
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
}`}
title="Kategorie setzen"
>
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
</button>
</div>
{/* Actions */}
<div className="flex flex-col gap-0.5 flex-shrink-0">
<button
onClick={(e) => {
e.stopPropagation()
setEditNameValue(s.name || s.filename)
setEditingName(s.id)
}}
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
title="Umbenennen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
</svg>
</button>
<button
onClick={(e) => {
e.stopPropagation()
if (confirm('Session loeschen?')) deleteSession(s.id)
}}
className="p-1 text-gray-400 hover:text-red-500"
title="Loeschen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
</button>
</div>
{/* Category dropdown */}
{editingCategory === s.id && (
<div
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
onClick={(e) => e.stopPropagation()}
>
{DOCUMENT_CATEGORIES.map((cat) => (
<button
key={cat.value}
onClick={() => updateCategory(s.id, cat.value)}
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
s.document_category === cat.value
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}
>
{cat.icon} {cat.label}
</button>
))}
</div>
)}
</div>
)
})}
</div>
)}
</div>
{/* Active session info */}
{sessionId && sessionName && (
<div className="flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
<span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
{activeCategory && (() => {
const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
return cat ? <span className="text-xs px-2 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300">{cat.icon} {cat.label}</span> : null
})()}
</div>
)}
{/* Mode Toggle */}
<div className="flex items-center gap-1 bg-gray-100 dark:bg-gray-800 rounded-lg p-1 w-fit">
<button
onClick={() => {
if (mode === 'pipeline') return
setMode('pipeline')
setCurrentStep(0)
setSessionId(null)
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}}
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
mode === 'pipeline'
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
}`}
>
Pipeline (7 Schritte)
</button>
<button
onClick={() => {
if (mode === 'paddle-direct') return
setMode('paddle-direct')
setCurrentStep(0)
setSessionId(null)
setSteps(PADDLE_DIRECT_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}}
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
mode === 'paddle-direct'
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
}`}
>
PP-OCRv5 Direct (5 Schritte)
</button>
<button
onClick={() => {
if (mode === 'kombi') return
setMode('kombi')
setCurrentStep(0)
setSessionId(null)
setSteps(KOMBI_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}}
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
mode === 'kombi'
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
}`}
>
Kombi (7 Schritte)
</button>
</div>
<PipelineStepper
steps={steps}
currentStep={currentStep}
onStepClick={handleStepClick}
onReprocess={mode === 'pipeline' && sessionId != null ? reprocessFromStep : undefined}
/>
<div className="min-h-[400px]">{renderStep()}</div>
</div>
)
}

View File

@@ -1,87 +0,0 @@
import type { PipelineStep } from '../ocr-pipeline/types'
// Re-export types used by overlay components
export type {
PipelineStep,
PipelineStepStatus,
SessionListItem,
SessionInfo,
DocumentCategory,
DocumentTypeResult,
OrientationResult,
CropResult,
DeskewResult,
DewarpResult,
RowResult,
RowItem,
GridResult,
GridCell,
OcrWordBox,
WordBbox,
ColumnMeta,
} from '../ocr-pipeline/types'
export { DOCUMENT_CATEGORIES } from '../ocr-pipeline/types'
/**
* 7-step pipeline for full-page overlay reconstruction.
* Skips: Spalten (columns), LLM-Review (Korrektur), Ground-Truth (Validierung)
*/
export const OVERLAY_PIPELINE_STEPS: PipelineStep[] = [
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
{ id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' },
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
{ id: 'reconstruction', name: 'Overlay', icon: '🏗️', status: 'pending' },
]
/** Map from overlay UI step index to DB step number (1-indexed) */
export const OVERLAY_UI_TO_DB: Record<number, number> = {
0: 2, // orientation
1: 3, // deskew
2: 4, // dewarp
3: 5, // crop
4: 6, // rows (skip columns=6 in DB, rows=7 — but we reuse DB step numbering)
5: 7, // words
6: 9, // reconstruction
}
/**
* 5-step pipeline for Paddle Direct mode.
* Same preprocessing (orient/deskew/dewarp/crop), then PaddleOCR replaces rows+words+overlay.
*/
export const PADDLE_DIRECT_STEPS: PipelineStep[] = [
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
{ id: 'paddle-direct', name: 'PP-OCRv5 + Overlay', icon: '⚡', status: 'pending' },
]
/**
* 5-step pipeline for Kombi mode (PP-OCRv5 + Tesseract).
* Same preprocessing, then both engines run and results are merged.
*/
export const KOMBI_STEPS: PipelineStep[] = [
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
{ id: 'kombi', name: 'PP-OCRv5 + Tesseract', icon: '🔀', status: 'pending' },
{ id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' },
{ id: 'grid-editor', name: 'Tabelle', icon: '📊', status: 'pending' },
]
/** Map from DB step to overlay UI step index */
export function dbStepToOverlayUi(dbStep: number): number {
// DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt
if (dbStep <= 2) return 0 // orientation
if (dbStep === 3) return 1 // deskew
if (dbStep === 4) return 2 // dewarp
if (dbStep === 5) return 3 // crop
if (dbStep <= 7) return 4 // rows (skip columns)
if (dbStep === 8) return 5 // words
return 6 // reconstruction
}

View File

@@ -1,624 +0,0 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import { PagePurpose } from '@/components/common/PagePurpose'
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview'
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
import { BoxSessionTabs } from '@/components/ocr-pipeline/BoxSessionTabs'
import { PIPELINE_STEPS, DOCUMENT_CATEGORIES, type PipelineStep, type SessionListItem, type DocumentTypeResult, type DocumentCategory, type SubSession } from './types'
const KLAUSUR_API = '/klausur-api'
export default function OcrPipelinePage() {
const [currentStep, setCurrentStep] = useState(0)
const [sessionId, setSessionId] = useState<string | null>(null)
const [sessionName, setSessionName] = useState<string>('')
const [sessions, setSessions] = useState<SessionListItem[]>([])
const [loadingSessions, setLoadingSessions] = useState(true)
const [editingName, setEditingName] = useState<string | null>(null)
const [editNameValue, setEditNameValue] = useState('')
const [editingCategory, setEditingCategory] = useState<string | null>(null)
const [docTypeResult, setDocTypeResult] = useState<DocumentTypeResult | null>(null)
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
const [subSessions, setSubSessions] = useState<SubSession[]>([])
const [parentSessionId, setParentSessionId] = useState<string | null>(null)
const [steps, setSteps] = useState<PipelineStep[]>(
PIPELINE_STEPS.map((s, i) => ({
...s,
status: i === 0 ? 'active' : 'pending',
})),
)
// Load session list on mount
useEffect(() => {
loadSessions()
}, [])
const loadSessions = async () => {
setLoadingSessions(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
if (res.ok) {
const data = await res.json()
setSessions(data.sessions || [])
}
} catch (e) {
console.error('Failed to load sessions:', e)
} finally {
setLoadingSessions(false)
}
}
const openSession = useCallback(async (sid: string, keepSubSessions?: boolean) => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
if (!res.ok) return
const data = await res.json()
setSessionId(sid)
setSessionName(data.name || data.filename || '')
setActiveCategory(data.document_category || undefined)
// Sub-session handling
if (data.sub_sessions && data.sub_sessions.length > 0) {
setSubSessions(data.sub_sessions)
setParentSessionId(sid)
} else if (data.parent_session_id) {
// This is a sub-session — keep parent info but don't reset sub-session list
setParentSessionId(data.parent_session_id)
} else if (!keepSubSessions) {
setSubSessions([])
setParentSessionId(null)
}
// Restore doc type result if available
const savedDocType: DocumentTypeResult | null = data.doc_type_result || null
setDocTypeResult(savedDocType)
// Determine which step to jump to based on current_step
const dbStep = data.current_step || 1
// DB steps: 1=start, 2=orientation, 3=deskew, 4=dewarp, 5=crop, 6=columns, ...
// UI steps are 0-indexed: 0=orientation, 1=deskew, 2=dewarp, 3=crop, 4=columns, ...
let uiStep = Math.max(0, dbStep - 1)
const skipSteps = [...(savedDocType?.skip_steps || [])]
// Sub-sessions: image is already cropped, skip pre-processing steps
// Jump directly to columns (UI step 4) unless already further ahead
const isSubSession = !!data.parent_session_id
const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop']
if (isSubSession) {
for (const s of SUB_SESSION_SKIP) {
if (!skipSteps.includes(s)) skipSteps.push(s)
}
if (uiStep < 4) uiStep = 4 // columns step (index 4)
}
setSteps(
PIPELINE_STEPS.map((s, i) => ({
...s,
status: skipSteps.includes(s.id)
? 'skipped'
: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
})),
)
setCurrentStep(uiStep)
} catch (e) {
console.error('Failed to open session:', e)
}
}, [])
const deleteSession = useCallback(async (sid: string) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
setSessions((prev) => prev.filter((s) => s.id !== sid))
if (sessionId === sid) {
setSessionId(null)
setCurrentStep(0)
setDocTypeResult(null)
setSubSessions([])
setParentSessionId(null)
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
} catch (e) {
console.error('Failed to delete session:', e)
}
}, [sessionId])
const renameSession = useCallback(async (sid: string, newName: string) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: newName }),
})
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s)))
if (sessionId === sid) setSessionName(newName)
} catch (e) {
console.error('Failed to rename session:', e)
}
setEditingName(null)
}, [sessionId])
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ document_category: category }),
})
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s)))
if (sessionId === sid) setActiveCategory(category)
} catch (e) {
console.error('Failed to update category:', e)
}
setEditingCategory(null)
}, [sessionId])
const deleteAllSessions = useCallback(async () => {
if (!confirm('Alle Sessions loeschen? Dies kann nicht rueckgaengig gemacht werden.')) return
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, { method: 'DELETE' })
setSessions([])
setSessionId(null)
setCurrentStep(0)
setDocTypeResult(null)
setActiveCategory(undefined)
setSubSessions([])
setParentSessionId(null)
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
} catch (e) {
console.error('Failed to delete all sessions:', e)
}
}, [])
const handleStepClick = (index: number) => {
if (index <= currentStep || steps[index].status === 'completed') {
setCurrentStep(index)
}
}
const goToStep = (step: number) => {
setCurrentStep(step)
setSteps((prev) =>
prev.map((s, i) => ({
...s,
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
})),
)
}
const handleNext = () => {
if (currentStep >= steps.length - 1) {
// Last step completed
if (parentSessionId && sessionId !== parentSessionId) {
// Sub-session completed — update its status and stay in tab view
setSubSessions((prev) =>
prev.map((s) => s.id === sessionId ? { ...s, status: 'completed', current_step: 10 } : s)
)
// Switch back to parent
handleSessionChange(parentSessionId)
return
}
// Main session: return to session list
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
setCurrentStep(0)
setSessionId(null)
setSubSessions([])
setParentSessionId(null)
loadSessions()
return
}
// Find the next non-skipped step
const skipSteps = docTypeResult?.skip_steps || []
let nextStep = currentStep + 1
while (nextStep < steps.length && skipSteps.includes(PIPELINE_STEPS[nextStep]?.id)) {
nextStep++
}
if (nextStep >= steps.length) nextStep = steps.length - 1
setSteps((prev) =>
prev.map((s, i) => {
if (i === currentStep) return { ...s, status: 'completed' }
if (i === nextStep) return { ...s, status: 'active' }
// Mark skipped steps between current and next
if (i > currentStep && i < nextStep && skipSteps.includes(PIPELINE_STEPS[i]?.id)) {
return { ...s, status: 'skipped' }
}
return s
}),
)
setCurrentStep(nextStep)
}
const handleOrientationComplete = (sid: string) => {
setSessionId(sid)
// Reload session list to show the new session
loadSessions()
handleNext()
}
const handleCropNext = async () => {
// Auto-detect document type after crop (last image-processing step), then advance
if (sessionId) {
try {
const res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-type`,
{ method: 'POST' },
)
if (res.ok) {
const data: DocumentTypeResult = await res.json()
setDocTypeResult(data)
// Mark skipped steps immediately
const skipSteps = data.skip_steps || []
if (skipSteps.length > 0) {
setSteps((prev) =>
prev.map((s) =>
skipSteps.includes(s.id) ? { ...s, status: 'skipped' } : s,
),
)
}
}
} catch (e) {
console.error('Doc type detection failed:', e)
// Not critical — continue without it
}
}
handleNext()
}
const handleDocTypeChange = (newDocType: DocumentTypeResult['doc_type']) => {
if (!docTypeResult) return
// Build new skip_steps based on doc type
let skipSteps: string[] = []
if (newDocType === 'full_text') {
skipSteps = ['columns', 'rows']
}
// vocab_table and generic_table: no skips
const updated: DocumentTypeResult = {
...docTypeResult,
doc_type: newDocType,
skip_steps: skipSteps,
pipeline: newDocType === 'full_text' ? 'full_page' : 'cell_first',
}
setDocTypeResult(updated)
// Update step statuses
setSteps((prev) =>
prev.map((s) => {
if (skipSteps.includes(s.id)) return { ...s, status: 'skipped' as const }
if (s.status === 'skipped') return { ...s, status: 'pending' as const }
return s
}),
)
}
const handleNewSession = () => {
setSessionId(null)
setSessionName('')
setCurrentStep(0)
setDocTypeResult(null)
setSubSessions([])
setParentSessionId(null)
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
const handleSessionChange = useCallback((newSessionId: string) => {
openSession(newSessionId, true)
}, [openSession])
const handleBoxSessionsCreated = useCallback((subs: SubSession[]) => {
setSubSessions(subs)
if (sessionId) setParentSessionId(sessionId)
}, [sessionId])
const stepNames: Record<number, string> = {
1: 'Orientierung',
2: 'Begradigung',
3: 'Entzerrung',
4: 'Zuschneiden',
5: 'Spalten',
6: 'Zeilen',
7: 'Woerter',
8: 'Struktur',
9: 'Korrektur',
10: 'Rekonstruktion',
11: 'Validierung',
}
const reprocessFromStep = useCallback(async (uiStep: number) => {
if (!sessionId) return
const dbStep = uiStep + 1 // UI is 0-indexed, DB is 1-indexed
if (!confirm(`Ab Schritt ${dbStep} (${stepNames[dbStep] || '?'}) neu verarbeiten? Nachfolgende Daten werden geloescht.`)) return
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ from_step: dbStep }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
console.error('Reprocess failed:', data.detail || res.status)
return
}
// Reset UI steps
goToStep(uiStep)
} catch (e) {
console.error('Reprocess error:', e)
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId, goToStep])
const renderStep = () => {
switch (currentStep) {
case 0:
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
case 1:
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
case 2:
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
case 3:
return <StepCrop sessionId={sessionId} onNext={handleCropNext} />
case 4:
return <StepColumnDetection sessionId={sessionId} onNext={handleNext} onBoxSessionsCreated={handleBoxSessionsCreated} />
case 5:
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
case 6:
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
case 7:
return <StepStructureDetection sessionId={sessionId} onNext={handleNext} />
case 8:
return <StepLlmReview sessionId={sessionId} onNext={handleNext} />
case 9:
return <StepReconstruction sessionId={sessionId} onNext={handleNext} />
case 10:
return <StepGroundTruth sessionId={sessionId} onNext={handleNext} />
default:
return null
}
}
return (
<div className="space-y-6">
<PagePurpose
title="OCR Pipeline"
purpose="Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. Ziel: 10 Vokabelseiten fehlerfrei rekonstruieren."
audience={['Entwickler', 'Data Scientists']}
architecture={{
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
databases: ['PostgreSQL Sessions'],
}}
relatedPages={[
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Trainingsdaten' },
]}
defaultCollapsed
/>
{/* Session List */}
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
<div className="flex items-center justify-between mb-3">
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Sessions ({sessions.length})
</h3>
<div className="flex gap-2">
{sessions.length > 0 && (
<button
onClick={deleteAllSessions}
className="text-xs px-3 py-1.5 text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded-lg transition-colors"
title="Alle Sessions loeschen"
>
Alle loeschen
</button>
)}
<button
onClick={handleNewSession}
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
>
+ Neue Session
</button>
</div>
</div>
{loadingSessions ? (
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
) : sessions.length === 0 ? (
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
) : (
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
{sessions.map((s) => {
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
return (
<div
key={s.id}
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
sessionId === s.id
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
}`}
>
{/* Thumbnail */}
<div
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
onClick={() => openSession(s.id)}
>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
alt=""
className="w-full h-full object-cover"
loading="lazy"
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
/>
</div>
{/* Info */}
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
{editingName === s.id ? (
<input
autoFocus
value={editNameValue}
onChange={(e) => setEditNameValue(e.target.value)}
onBlur={() => renameSession(s.id, editNameValue)}
onKeyDown={(e) => {
if (e.key === 'Enter') renameSession(s.id, editNameValue)
if (e.key === 'Escape') setEditingName(null)
}}
onClick={(e) => e.stopPropagation()}
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
/>
) : (
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
{s.name || s.filename}
</div>
)}
{/* ID row */}
<button
onClick={(e) => {
e.stopPropagation()
navigator.clipboard.writeText(s.id)
const btn = e.currentTarget
btn.textContent = 'Kopiert!'
setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
}}
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
title={`Volle ID: ${s.id} — Klick zum Kopieren`}
>
ID: {s.id.slice(0, 8)}
</button>
<div className="text-xs text-gray-400 flex gap-2 mt-0.5">
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
<span>Schritt {s.current_step}: {stepNames[s.current_step] || '?'}</span>
</div>
</div>
{/* Badges */}
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
{/* Category Badge */}
<button
onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
catInfo
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
}`}
title="Kategorie setzen"
>
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
</button>
{/* Doc Type Badge (read-only) */}
{s.doc_type && (
<span className="text-[10px] px-1.5 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
{s.doc_type}
</span>
)}
</div>
{/* Action buttons */}
<div className="flex flex-col gap-0.5 flex-shrink-0">
<button
onClick={(e) => {
e.stopPropagation()
setEditNameValue(s.name || s.filename)
setEditingName(s.id)
}}
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
title="Umbenennen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
</svg>
</button>
<button
onClick={(e) => {
e.stopPropagation()
if (confirm('Session loeschen?')) deleteSession(s.id)
}}
className="p-1 text-gray-400 hover:text-red-500"
title="Loeschen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
</button>
</div>
{/* Category dropdown (inline) */}
{editingCategory === s.id && (
<div
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
onClick={(e) => e.stopPropagation()}
>
{DOCUMENT_CATEGORIES.map((cat) => (
<button
key={cat.value}
onClick={() => updateCategory(s.id, cat.value)}
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
s.document_category === cat.value
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}
>
{cat.icon} {cat.label}
</button>
))}
</div>
)}
</div>
)
})}
</div>
)}
</div>
{/* Active session info */}
{sessionId && sessionName && (
<div className="flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
<span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
{activeCategory && (() => {
const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
return cat ? <span className="text-xs px-2 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300">{cat.icon} {cat.label}</span> : null
})()}
{docTypeResult && (
<span className="text-xs px-2 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
{docTypeResult.doc_type}
</span>
)}
</div>
)}
<PipelineStepper
steps={steps}
currentStep={currentStep}
onStepClick={handleStepClick}
onReprocess={sessionId ? reprocessFromStep : undefined}
docTypeResult={docTypeResult}
onDocTypeChange={handleDocTypeChange}
/>
{subSessions.length > 0 && parentSessionId && sessionId && (
<BoxSessionTabs
parentSessionId={parentSessionId}
subSessions={subSessions}
activeSessionId={sessionId}
onSessionChange={handleSessionChange}
/>
)}
<div className="min-h-[400px]">{renderStep()}</div>
</div>
)
}

View File

@@ -1,403 +0,0 @@
export type PipelineStepStatus = 'pending' | 'active' | 'completed' | 'failed' | 'skipped'
export interface PipelineStep {
id: string
name: string
icon: string
status: PipelineStepStatus
}
export type DocumentCategory =
| 'vokabelseite' | 'buchseite' | 'arbeitsblatt' | 'klausurseite'
| 'mathearbeit' | 'statistik' | 'zeitung' | 'formular' | 'handschrift' | 'sonstiges'
export const DOCUMENT_CATEGORIES: { value: DocumentCategory; label: string; icon: string }[] = [
{ value: 'vokabelseite', label: 'Vokabelseite', icon: '📖' },
{ value: 'buchseite', label: 'Buchseite', icon: '📚' },
{ value: 'arbeitsblatt', label: 'Arbeitsblatt', icon: '📝' },
{ value: 'klausurseite', label: 'Klausurseite', icon: '📄' },
{ value: 'mathearbeit', label: 'Mathearbeit', icon: '🔢' },
{ value: 'statistik', label: 'Statistik', icon: '📊' },
{ value: 'zeitung', label: 'Zeitung', icon: '📰' },
{ value: 'formular', label: 'Formular', icon: '📋' },
{ value: 'handschrift', label: 'Handschrift', icon: '✍️' },
{ value: 'sonstiges', label: 'Sonstiges', icon: '📎' },
]
export interface SessionListItem {
id: string
name: string
filename: string
status: string
current_step: number
document_category?: DocumentCategory
doc_type?: string
created_at: string
updated_at?: string
parent_session_id?: string | null
box_index?: number | null
}
export interface SubSession {
id: string
name: string
box_index: number
current_step?: number
status?: string
}
export interface PipelineLogEntry {
step: string
completed_at: string
success: boolean
duration_ms?: number
metrics: Record<string, unknown>
}
export interface PipelineLog {
steps: PipelineLogEntry[]
}
export interface DocumentTypeResult {
doc_type: 'vocab_table' | 'full_text' | 'generic_table'
confidence: number
pipeline: 'cell_first' | 'full_page'
skip_steps: string[]
features?: Record<string, unknown>
duration_seconds?: number
}
export interface OrientationResult {
orientation_degrees: number
corrected: boolean
duration_seconds: number
}
export interface CropResult {
crop_applied: boolean
crop_rect?: { x: number; y: number; width: number; height: number }
crop_rect_pct?: { x: number; y: number; width: number; height: number }
original_size: { width: number; height: number }
cropped_size: { width: number; height: number }
detected_format?: string
format_confidence?: number
aspect_ratio?: number
border_fractions?: { top: number; bottom: number; left: number; right: number }
skipped?: boolean
duration_seconds?: number
}
export interface SessionInfo {
session_id: string
filename: string
name?: string
image_width: number
image_height: number
original_image_url: string
current_step?: number
document_category?: DocumentCategory
doc_type?: string
orientation_result?: OrientationResult
crop_result?: CropResult
deskew_result?: DeskewResult
dewarp_result?: DewarpResult
column_result?: ColumnResult
row_result?: RowResult
word_result?: GridResult
doc_type_result?: DocumentTypeResult
sub_sessions?: SubSession[]
parent_session_id?: string
box_index?: number
}
export interface DeskewResult {
session_id: string
angle_hough: number
angle_word_alignment: number
angle_iterative?: number
angle_residual?: number
angle_textline?: number
angle_applied: number
method_used: 'hough' | 'word_alignment' | 'manual' | 'iterative' | 'two_pass' | 'three_pass' | 'manual_combined'
confidence: number
duration_seconds: number
deskewed_image_url: string
binarized_image_url: string
}
export interface DeskewGroundTruth {
is_correct: boolean
corrected_angle?: number
notes?: string
}
export interface DewarpDetection {
method: string
shear_degrees: number
confidence: number
}
export interface DewarpResult {
session_id: string
method_used: string
shear_degrees: number
confidence: number
duration_seconds: number
dewarped_image_url: string
detections?: DewarpDetection[]
}
export interface DewarpGroundTruth {
is_correct: boolean
corrected_shear?: number
notes?: string
}
export interface PageRegion {
type: 'column_en' | 'column_de' | 'column_example' | 'page_ref'
| 'column_marker' | 'column_text' | 'column_ignore' | 'header' | 'footer'
x: number
y: number
width: number
height: number
classification_confidence?: number
classification_method?: string
}
export interface PageZone {
zone_type: 'content' | 'box'
y_start: number
y_end: number
box?: { x: number; y: number; width: number; height: number }
}
export interface ColumnResult {
columns: PageRegion[]
duration_seconds: number
zones?: PageZone[]
}
export interface ColumnGroundTruth {
is_correct: boolean
corrected_columns?: PageRegion[]
notes?: string
}
export interface ManualColumnDivider {
xPercent: number // Position in % of image width (0-100)
}
export type ColumnTypeKey = PageRegion['type']
export interface RowResult {
rows: RowItem[]
summary: Record<string, number>
total_rows: number
duration_seconds: number
}
export interface RowItem {
index: number
x: number
y: number
width: number
height: number
word_count: number
row_type: 'content' | 'header' | 'footer'
gap_before: number
}
export interface RowGroundTruth {
is_correct: boolean
corrected_rows?: RowItem[]
notes?: string
}
export interface StructureGraphic {
x: number
y: number
w: number
h: number
area: number
shape: string // image, illustration
color_name: string
color_hex: string
confidence: number
}
export interface StructureResult {
image_width: number
image_height: number
content_bounds: { x: number; y: number; w: number; h: number }
boxes: StructureBox[]
zones: StructureZone[]
graphics: StructureGraphic[]
color_pixel_counts: Record<string, number>
has_words: boolean
word_count: number
border_ghosts_removed?: number
duration_seconds: number
}
export interface StructureBox {
x: number
y: number
w: number
h: number
confidence: number
border_thickness: number
bg_color_name?: string
bg_color_hex?: string
}
export interface StructureZone {
index: number
zone_type: 'content' | 'box'
x: number
y: number
w: number
h: number
}
export interface WordBbox {
x: number
y: number
w: number
h: number
}
export interface OcrWordBox {
text: string
left: number // absolute image x in px
top: number // absolute image y in px
width: number // px
height: number // px
conf: number
color?: string // hex color of detected text, e.g. '#dc2626'
color_name?: string // 'black' | 'red' | 'blue' | 'green' | 'orange' | 'purple' | 'yellow'
recovered?: boolean // true if this word was recovered via color detection
}
export interface GridCell {
cell_id: string // "R03_C1"
row_index: number
col_index: number
col_type: string
text: string
confidence: number
bbox_px: WordBbox
bbox_pct: WordBbox
ocr_engine?: string
is_bold?: boolean
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
word_boxes?: OcrWordBox[] // per-word bounding boxes from OCR engine
}
export interface ColumnMeta {
index: number
type: string
x: number
width: number
}
export interface GridResult {
cells: GridCell[]
grid_shape: { rows: number; cols: number; total_cells: number }
columns_used: ColumnMeta[]
layout: 'vocab' | 'generic'
image_width: number
image_height: number
duration_seconds: number
ocr_engine?: string
vocab_entries?: WordEntry[] // Only when layout='vocab'
entries?: WordEntry[] // Backwards compat alias for vocab_entries
entry_count?: number
summary: {
total_cells: number
non_empty_cells: number
low_confidence: number
// Only when layout='vocab':
total_entries?: number
with_english?: number
with_german?: number
}
llm_review?: {
changes: { row_index: number; field: string; old: string; new: string }[]
model_used: string
duration_ms: number
entries_corrected: number
applied_count?: number
applied_at?: string
}
}
export interface WordEntry {
row_index: number
english: string
german: string
example: string
source_page?: string
marker?: string
confidence: number
bbox: WordBbox
bbox_en: WordBbox | null
bbox_de: WordBbox | null
bbox_ex: WordBbox | null
bbox_ref?: WordBbox | null
bbox_marker?: WordBbox | null
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
}
/** @deprecated Use GridResult instead */
export interface WordResult {
entries: WordEntry[]
entry_count: number
image_width: number
image_height: number
duration_seconds: number
ocr_engine?: string
summary: {
total_entries: number
with_english: number
with_german: number
low_confidence: number
}
}
export interface WordGroundTruth {
is_correct: boolean
corrected_entries?: WordEntry[]
notes?: string
}
export interface ImageRegion {
bbox_pct: { x: number; y: number; w: number; h: number }
prompt: string
description: string
image_b64: string | null
style: 'educational' | 'cartoon' | 'sketch' | 'clipart' | 'realistic'
}
export type ImageStyle = ImageRegion['style']
export const IMAGE_STYLES: { value: ImageStyle; label: string }[] = [
{ value: 'educational', label: 'Lehrbuch' },
{ value: 'cartoon', label: 'Cartoon' },
{ value: 'sketch', label: 'Skizze' },
{ value: 'clipart', label: 'Clipart' },
{ value: 'realistic', label: 'Realistisch' },
]
export const PIPELINE_STEPS: PipelineStep[] = [
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
{ id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' },
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
{ id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' },
{ id: 'llm-review', name: 'Korrektur', icon: '✏️', status: 'pending' },
{ id: 'reconstruction', name: 'Rekonstruktion', icon: '🏗️', status: 'pending' },
{ id: 'ground-truth', name: 'Validierung', icon: '✅', status: 'pending' },
]

View File

@@ -1,675 +0,0 @@
'use client'
import React, { useState, useEffect, useCallback, useRef } from 'react'
import { RAG_PDF_MAPPING } from './rag-pdf-mapping'
import { REGULATIONS_IN_RAG, REGULATION_INFO } from '../rag-constants'
interface ChunkBrowserQAProps {
apiProxy: string
}
type RegGroupKey = 'eu_regulation' | 'eu_directive' | 'de_law' | 'at_law' | 'ch_law' | 'national_law' | 'bsi_standard' | 'eu_guideline' | 'international_standard' | 'other'
const GROUP_LABELS: Record<RegGroupKey, string> = {
eu_regulation: 'EU Verordnungen',
eu_directive: 'EU Richtlinien',
de_law: 'DE Gesetze',
at_law: 'AT Gesetze',
ch_law: 'CH Gesetze',
national_law: 'Nationale Gesetze (EU)',
bsi_standard: 'BSI Standards',
eu_guideline: 'EDPB / Guidelines',
international_standard: 'Internationale Standards',
other: 'Sonstige',
}
const GROUP_ORDER: RegGroupKey[] = [
'eu_regulation', 'eu_directive', 'de_law', 'at_law', 'ch_law',
'national_law', 'bsi_standard', 'eu_guideline', 'international_standard', 'other',
]
const COLLECTIONS = [
'bp_compliance_gesetze',
'bp_compliance_ce',
'bp_compliance_datenschutz',
'bp_dsfa_corpus',
'bp_compliance_recht',
'bp_legal_templates',
'bp_nibis_eh',
]
export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) {
// Filter-Sidebar
const [selectedRegulation, setSelectedRegulation] = useState<string | null>(null)
const [regulationCounts, setRegulationCounts] = useState<Record<string, number>>({})
const [filterSearch, setFilterSearch] = useState('')
const [countsLoading, setCountsLoading] = useState(false)
// Dokument-Chunks (sequenziell)
const [docChunks, setDocChunks] = useState<Record<string, unknown>[]>([])
const [docChunkIndex, setDocChunkIndex] = useState(0)
const [docTotalChunks, setDocTotalChunks] = useState(0)
const [docLoading, setDocLoading] = useState(false)
const docChunksRef = useRef(docChunks)
docChunksRef.current = docChunks
// Split-View
const [splitViewActive, setSplitViewActive] = useState(true)
const [chunksPerPage, setChunksPerPage] = useState(6)
const [fullscreen, setFullscreen] = useState(false)
// Collection — default to bp_compliance_ce where we have PDFs downloaded
const [collection, setCollection] = useState('bp_compliance_ce')
// PDF existence check
const [pdfExists, setPdfExists] = useState<boolean | null>(null)
// Sidebar collapsed groups
const [collapsedGroups, setCollapsedGroups] = useState<Set<string>>(new Set())
// Build grouped regulations for sidebar
const regulationsInCollection = Object.entries(REGULATIONS_IN_RAG)
.filter(([, info]) => info.collection === collection)
.map(([code]) => code)
const groupedRegulations = React.useMemo(() => {
const groups: Record<RegGroupKey, { code: string; name: string; type: string }[]> = {
eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [],
national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [],
}
for (const code of regulationsInCollection) {
const reg = REGULATION_INFO.find(r => r.code === code)
const type = (reg?.type || 'other') as RegGroupKey
const groupKey = type in groups ? type : 'other'
groups[groupKey].push({
code,
name: reg?.name || code,
type: reg?.type || 'unknown',
})
}
return groups
}, [regulationsInCollection.join(',')])
// Load regulation counts for current collection
const loadRegulationCounts = useCallback(async (col: string) => {
const entries = Object.entries(REGULATIONS_IN_RAG)
.filter(([, info]) => info.collection === col && info.qdrant_id)
if (entries.length === 0) return
// Build qdrant_id -> our_code mapping
const qdrantIdToCode: Record<string, string[]> = {}
for (const [code, info] of entries) {
if (!qdrantIdToCode[info.qdrant_id]) qdrantIdToCode[info.qdrant_id] = []
qdrantIdToCode[info.qdrant_id].push(code)
}
const uniqueQdrantIds = Object.keys(qdrantIdToCode)
setCountsLoading(true)
try {
const params = new URLSearchParams({
action: 'regulation-counts-batch',
collection: col,
qdrant_ids: uniqueQdrantIds.join(','),
})
const res = await fetch(`${apiProxy}?${params}`)
if (res.ok) {
const data = await res.json()
// Map qdrant_id counts back to our codes
const mapped: Record<string, number> = {}
for (const [qid, count] of Object.entries(data.counts as Record<string, number>)) {
const codes = qdrantIdToCode[qid] || []
for (const code of codes) {
mapped[code] = count
}
}
setRegulationCounts(prev => ({ ...prev, ...mapped }))
}
} catch (error) {
console.error('Failed to load regulation counts:', error)
} finally {
setCountsLoading(false)
}
}, [apiProxy])
// Load all chunks for a regulation (paginated scroll)
const loadDocumentChunks = useCallback(async (regulationCode: string) => {
const ragInfo = REGULATIONS_IN_RAG[regulationCode]
if (!ragInfo || !ragInfo.qdrant_id) return
setDocLoading(true)
setDocChunks([])
setDocChunkIndex(0)
setDocTotalChunks(0)
const allChunks: Record<string, unknown>[] = []
let offset: string | null = null
try {
let safety = 0
do {
const params = new URLSearchParams({
action: 'scroll',
collection: ragInfo.collection,
limit: '100',
filter_key: 'regulation_id',
filter_value: ragInfo.qdrant_id,
})
if (offset) params.append('offset', offset)
const res = await fetch(`${apiProxy}?${params}`)
if (!res.ok) break
const data = await res.json()
const chunks = data.chunks || []
allChunks.push(...chunks)
offset = data.next_offset || null
safety++
} while (offset && safety < 200)
// Sort by chunk_index
allChunks.sort((a, b) => {
const ai = Number(a.chunk_index ?? a.chunk_id ?? 0)
const bi = Number(b.chunk_index ?? b.chunk_id ?? 0)
return ai - bi
})
setDocChunks(allChunks)
setDocTotalChunks(allChunks.length)
setDocChunkIndex(0)
} catch (error) {
console.error('Failed to load document chunks:', error)
} finally {
setDocLoading(false)
}
}, [apiProxy])
// Initial load
useEffect(() => {
loadRegulationCounts(collection)
}, [collection, loadRegulationCounts])
// Current chunk
const currentChunk = docChunks[docChunkIndex] || null
const prevChunk = docChunkIndex > 0 ? docChunks[docChunkIndex - 1] : null
const nextChunk = docChunkIndex < docChunks.length - 1 ? docChunks[docChunkIndex + 1] : null
// PDF page estimation — use pages metadata if available
const estimatePdfPage = (chunk: Record<string, unknown> | null, chunkIdx: number): number => {
if (chunk) {
// Try pages array from payload (e.g. [7] or [7,8])
const pages = chunk.pages as number[] | undefined
if (Array.isArray(pages) && pages.length > 0) return pages[0]
// Try page field
const page = chunk.page as number | undefined
if (typeof page === 'number' && page > 0) return page
}
const mapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null
const cpp = mapping?.chunksPerPage || chunksPerPage
return Math.floor(chunkIdx / cpp) + 1
}
const pdfPage = estimatePdfPage(currentChunk, docChunkIndex)
const pdfMapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null
const pdfUrl = pdfMapping ? `/rag-originals/${pdfMapping.filename}#page=${pdfPage}` : null
// Check PDF existence when regulation changes
useEffect(() => {
if (!selectedRegulation) { setPdfExists(null); return }
const mapping = RAG_PDF_MAPPING[selectedRegulation]
if (!mapping) { setPdfExists(false); return }
const url = `/rag-originals/${mapping.filename}`
fetch(url, { method: 'HEAD' })
.then(res => setPdfExists(res.ok))
.catch(() => setPdfExists(false))
}, [selectedRegulation])
// Handlers
const handleSelectRegulation = (code: string) => {
setSelectedRegulation(code)
loadDocumentChunks(code)
}
const handleCollectionChange = (col: string) => {
setCollection(col)
setSelectedRegulation(null)
setDocChunks([])
setDocChunkIndex(0)
setDocTotalChunks(0)
setRegulationCounts({})
}
const handlePrev = () => {
if (docChunkIndex > 0) setDocChunkIndex(i => i - 1)
}
const handleNext = () => {
if (docChunkIndex < docChunks.length - 1) setDocChunkIndex(i => i + 1)
}
const handleKeyDown = useCallback((e: KeyboardEvent) => {
if (e.key === 'Escape' && fullscreen) {
e.preventDefault()
setFullscreen(false)
} else if (e.key === 'ArrowLeft' || e.key === 'ArrowUp') {
e.preventDefault()
setDocChunkIndex(i => Math.max(0, i - 1))
} else if (e.key === 'ArrowRight' || e.key === 'ArrowDown') {
e.preventDefault()
setDocChunkIndex(i => Math.min(docChunksRef.current.length - 1, i + 1))
}
}, [fullscreen])
useEffect(() => {
if (fullscreen || (selectedRegulation && docChunks.length > 0)) {
window.addEventListener('keydown', handleKeyDown)
return () => window.removeEventListener('keydown', handleKeyDown)
}
}, [selectedRegulation, docChunks.length, handleKeyDown, fullscreen])
const toggleGroup = (group: string) => {
setCollapsedGroups(prev => {
const next = new Set(prev)
if (next.has(group)) next.delete(group)
else next.add(group)
return next
})
}
// Get text content from a chunk
const getChunkText = (chunk: Record<string, unknown> | null): string => {
if (!chunk) return ''
return String(chunk.chunk_text || chunk.text || chunk.content || '')
}
// Extract structural metadata for prominent display
const getStructuralInfo = (chunk: Record<string, unknown> | null): { article?: string; section?: string; pages?: string } => {
if (!chunk) return {}
const result: { article?: string; section?: string; pages?: string } = {}
// Article / paragraph
const article = chunk.article || chunk.artikel || chunk.paragraph || chunk.section_title
if (article) result.article = String(article)
// Section
const section = chunk.section || chunk.chapter || chunk.abschnitt || chunk.kapitel
if (section) result.section = String(section)
// Pages
const pages = chunk.pages as number[] | undefined
if (Array.isArray(pages) && pages.length > 0) {
result.pages = pages.length === 1 ? `S. ${pages[0]}` : `S. ${pages[0]}-${pages[pages.length - 1]}`
} else if (chunk.page) {
result.pages = `S. ${chunk.page}`
}
return result
}
// Overlap extraction
const getOverlapPrev = (): string => {
if (!prevChunk) return ''
const text = getChunkText(prevChunk)
return text.length > 150 ? '...' + text.slice(-150) : text
}
const getOverlapNext = (): string => {
if (!nextChunk) return ''
const text = getChunkText(nextChunk)
return text.length > 150 ? text.slice(0, 150) + '...' : text
}
// Filter sidebar items
const filteredRegulations = React.useMemo(() => {
if (!filterSearch.trim()) return groupedRegulations
const term = filterSearch.toLowerCase()
const filtered: typeof groupedRegulations = {
eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [],
national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [],
}
for (const [group, items] of Object.entries(groupedRegulations)) {
filtered[group as RegGroupKey] = items.filter(
r => r.code.toLowerCase().includes(term) || r.name.toLowerCase().includes(term)
)
}
return filtered
}, [groupedRegulations, filterSearch])
// Regulation name lookup
const getRegName = (code: string): string => {
const reg = REGULATION_INFO.find(r => r.code === code)
return reg?.name || code
}
// Important metadata keys to show prominently
const STRUCTURAL_KEYS = new Set([
'article', 'artikel', 'paragraph', 'section_title', 'section', 'chapter',
'abschnitt', 'kapitel', 'pages', 'page',
])
const HIDDEN_KEYS = new Set([
'text', 'content', 'chunk_text', 'id', 'embedding',
])
const structInfo = getStructuralInfo(currentChunk)
return (
<div
className={`flex flex-col ${fullscreen ? 'fixed inset-0 z-50 bg-slate-100 p-4' : ''}`}
style={fullscreen ? { height: '100vh' } : { height: 'calc(100vh - 220px)' }}
>
{/* Header bar — fixed height */}
<div className="flex-shrink-0 bg-white rounded-xl border border-slate-200 p-3 mb-3">
<div className="flex flex-wrap items-center gap-4">
<div>
<label className="block text-xs font-medium text-slate-500 mb-1">Collection</label>
<select
value={collection}
onChange={(e) => handleCollectionChange(e.target.value)}
className="px-3 py-1.5 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
>
{COLLECTIONS.map(c => (
<option key={c} value={c}>{c}</option>
))}
</select>
</div>
{selectedRegulation && (
<>
<div className="flex items-center gap-2">
<span className="text-sm font-semibold text-slate-900">
{selectedRegulation} {getRegName(selectedRegulation)}
</span>
{structInfo.article && (
<span className="px-2 py-0.5 bg-blue-100 text-blue-800 text-xs font-medium rounded">
{structInfo.article}
</span>
)}
{structInfo.pages && (
<span className="px-2 py-0.5 bg-slate-100 text-slate-600 text-xs rounded">
{structInfo.pages}
</span>
)}
</div>
<div className="flex items-center gap-2 ml-auto">
<button
onClick={handlePrev}
disabled={docChunkIndex === 0}
className="px-3 py-1.5 text-sm font-medium border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30 disabled:cursor-not-allowed"
>
&#9664; Zurueck
</button>
<span className="text-sm font-mono text-slate-600 min-w-[80px] text-center">
{docChunkIndex + 1} / {docTotalChunks}
</span>
<button
onClick={handleNext}
disabled={docChunkIndex >= docChunks.length - 1}
className="px-3 py-1.5 text-sm font-medium border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30 disabled:cursor-not-allowed"
>
Weiter &#9654;
</button>
<input
type="number"
min={1}
max={docTotalChunks}
value={docChunkIndex + 1}
onChange={(e) => {
const v = parseInt(e.target.value, 10)
if (!isNaN(v) && v >= 1 && v <= docTotalChunks) setDocChunkIndex(v - 1)
}}
className="w-16 px-2 py-1 border rounded text-xs text-center"
title="Springe zu Chunk Nr."
/>
</div>
<div className="flex items-center gap-2">
<label className="text-xs text-slate-500">Chunks/Seite:</label>
<select
value={chunksPerPage}
onChange={(e) => setChunksPerPage(Number(e.target.value))}
className="px-2 py-1 border rounded text-xs"
>
{[3, 4, 5, 6, 8, 10, 12, 15, 20].map(n => (
<option key={n} value={n}>{n}</option>
))}
</select>
<button
onClick={() => setSplitViewActive(!splitViewActive)}
className={`px-3 py-1 text-xs rounded-lg border ${
splitViewActive ? 'bg-teal-50 border-teal-300 text-teal-700' : 'bg-slate-50 border-slate-300 text-slate-600'
}`}
>
{splitViewActive ? 'Split-View an' : 'Split-View aus'}
</button>
<button
onClick={() => setFullscreen(!fullscreen)}
className={`px-3 py-1 text-xs rounded-lg border ${
fullscreen ? 'bg-indigo-50 border-indigo-300 text-indigo-700' : 'bg-slate-50 border-slate-300 text-slate-600'
}`}
title={fullscreen ? 'Vollbild beenden (Esc)' : 'Vollbild'}
>
{fullscreen ? '&#10005; Vollbild beenden' : '&#9974; Vollbild'}
</button>
</div>
</>
)}
</div>
</div>
{/* Main content: Sidebar + Content — fills remaining height */}
<div className="flex gap-3 flex-1 min-h-0">
{/* Sidebar — scrollable */}
<div className="w-56 flex-shrink-0 bg-white rounded-xl border border-slate-200 flex flex-col min-h-0">
<div className="flex-shrink-0 p-3 border-b border-slate-100">
<input
type="text"
value={filterSearch}
onChange={(e) => setFilterSearch(e.target.value)}
placeholder="Suche..."
className="w-full px-2 py-1.5 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
/>
{countsLoading && (
<div className="text-xs text-slate-400 mt-1 animate-pulse">Counts laden...</div>
)}
</div>
<div className="flex-1 overflow-y-auto min-h-0">
{GROUP_ORDER.map(group => {
const items = filteredRegulations[group]
if (items.length === 0) return null
const isCollapsed = collapsedGroups.has(group)
return (
<div key={group}>
<button
onClick={() => toggleGroup(group)}
className="w-full px-3 py-1.5 text-left text-xs font-semibold text-slate-500 bg-slate-50 hover:bg-slate-100 flex items-center justify-between sticky top-0 z-10"
>
<span>{GROUP_LABELS[group]}</span>
<span className="text-slate-400">{isCollapsed ? '+' : '-'}</span>
</button>
{!isCollapsed && items.map(reg => {
const count = regulationCounts[reg.code] ?? 0
const isSelected = selectedRegulation === reg.code
return (
<button
key={reg.code}
onClick={() => handleSelectRegulation(reg.code)}
className={`w-full px-3 py-1.5 text-left text-sm flex items-center justify-between hover:bg-teal-50 transition-colors ${
isSelected ? 'bg-teal-100 text-teal-900 font-medium' : 'text-slate-700'
}`}
>
<span className="truncate text-xs">{reg.name || reg.code}</span>
<span className={`text-xs tabular-nums flex-shrink-0 ml-1 ${count > 0 ? 'text-slate-500' : 'text-slate-300'}`}>
{count > 0 ? count.toLocaleString() : '—'}
</span>
</button>
)
})}
</div>
)
})}
</div>
</div>
{/* Content area — fills remaining width and height */}
{!selectedRegulation ? (
<div className="flex-1 flex items-center justify-center bg-white rounded-xl border border-slate-200">
<div className="text-center text-slate-400 space-y-2">
<div className="text-4xl">&#128269;</div>
<p className="text-sm">Dokument in der Sidebar auswaehlen, um QA zu starten.</p>
<p className="text-xs text-slate-300">Pfeiltasten: Chunk vor/zurueck</p>
</div>
</div>
) : docLoading ? (
<div className="flex-1 flex items-center justify-center bg-white rounded-xl border border-slate-200">
<div className="text-center text-slate-500 space-y-2">
<div className="animate-spin text-3xl">&#9881;</div>
<p className="text-sm">Chunks werden geladen...</p>
<p className="text-xs text-slate-400">
{selectedRegulation}: {REGULATIONS_IN_RAG[selectedRegulation]?.chunks.toLocaleString() || '?'} Chunks erwartet
</p>
</div>
</div>
) : (
<div className={`flex-1 grid gap-3 min-h-0 ${splitViewActive ? 'grid-cols-2' : 'grid-cols-1'}`}>
{/* Chunk-Text Panel — fixed height, internal scroll */}
<div className="bg-white rounded-xl border border-slate-200 flex flex-col min-h-0 overflow-hidden">
{/* Panel header */}
<div className="flex-shrink-0 px-4 py-2 bg-slate-50 border-b border-slate-100 flex items-center justify-between">
<span className="text-sm font-medium text-slate-700">Chunk-Text</span>
<div className="flex items-center gap-2">
{structInfo.article && (
<span className="px-2 py-0.5 bg-blue-50 text-blue-700 text-xs font-medium rounded border border-blue-200">
{structInfo.article}
</span>
)}
{structInfo.section && (
<span className="px-2 py-0.5 bg-purple-50 text-purple-700 text-xs rounded border border-purple-200">
{structInfo.section}
</span>
)}
<span className="text-xs text-slate-400 tabular-nums">
#{docChunkIndex} / {docTotalChunks - 1}
</span>
</div>
</div>
{/* Scrollable content */}
<div className="flex-1 overflow-y-auto min-h-0 p-4 space-y-3">
{/* Overlap from previous chunk */}
{prevChunk && (
<div className="text-xs text-slate-400 bg-amber-50 border-l-2 border-amber-300 px-3 py-2 rounded-r">
<div className="font-medium text-amber-600 mb-1">&#8593; Ende vorheriger Chunk #{docChunkIndex - 1}</div>
<p className="whitespace-pre-wrap break-words leading-relaxed">{getOverlapPrev()}</p>
</div>
)}
{/* Current chunk text */}
{currentChunk ? (
<div className="text-sm text-slate-800 whitespace-pre-wrap break-words leading-relaxed border-l-2 border-teal-400 pl-3">
{getChunkText(currentChunk)}
</div>
) : (
<div className="text-sm text-slate-400 italic">Kein Chunk-Text vorhanden.</div>
)}
{/* Overlap from next chunk */}
{nextChunk && (
<div className="text-xs text-slate-400 bg-amber-50 border-l-2 border-amber-300 px-3 py-2 rounded-r">
<div className="font-medium text-amber-600 mb-1">&#8595; Anfang naechster Chunk #{docChunkIndex + 1}</div>
<p className="whitespace-pre-wrap break-words leading-relaxed">{getOverlapNext()}</p>
</div>
)}
{/* Metadata */}
{currentChunk && (
<div className="mt-4 pt-3 border-t border-slate-100">
<div className="text-xs font-medium text-slate-500 mb-2">Metadaten</div>
<div className="grid grid-cols-2 gap-x-4 gap-y-1 text-xs">
{Object.entries(currentChunk)
.filter(([k]) => !HIDDEN_KEYS.has(k))
.sort(([a], [b]) => {
// Structural keys first
const aStruct = STRUCTURAL_KEYS.has(a) ? 0 : 1
const bStruct = STRUCTURAL_KEYS.has(b) ? 0 : 1
return aStruct - bStruct || a.localeCompare(b)
})
.map(([k, v]) => (
<div key={k} className={`flex gap-1 ${STRUCTURAL_KEYS.has(k) ? 'col-span-2 font-medium' : ''}`}>
<span className="font-medium text-slate-500 flex-shrink-0">{k}:</span>
<span className="text-slate-700 break-all">
{Array.isArray(v) ? v.join(', ') : String(v)}
</span>
</div>
))}
</div>
{/* Chunk quality indicator */}
<div className="mt-3 pt-2 border-t border-slate-50">
<div className="text-xs text-slate-400">
Chunk-Laenge: {getChunkText(currentChunk).length} Zeichen
{getChunkText(currentChunk).length < 50 && (
<span className="ml-2 text-orange-500 font-medium">&#9888; Sehr kurz</span>
)}
{getChunkText(currentChunk).length > 2000 && (
<span className="ml-2 text-orange-500 font-medium">&#9888; Sehr lang</span>
)}
</div>
</div>
</div>
)}
</div>
</div>
{/* PDF-Viewer Panel */}
{splitViewActive && (
<div className="bg-white rounded-xl border border-slate-200 flex flex-col min-h-0 overflow-hidden">
<div className="flex-shrink-0 px-4 py-2 bg-slate-50 border-b border-slate-100 flex items-center justify-between">
<span className="text-sm font-medium text-slate-700">Original-PDF</span>
<div className="flex items-center gap-2">
<span className="text-xs text-slate-400">
Seite ~{pdfPage}
{pdfMapping?.totalPages ? ` / ${pdfMapping.totalPages}` : ''}
</span>
{pdfUrl && (
<a
href={pdfUrl.split('#')[0]}
target="_blank"
rel="noopener noreferrer"
className="text-xs text-teal-600 hover:text-teal-800 underline"
>
Oeffnen &#8599;
</a>
)}
</div>
</div>
<div className="flex-1 min-h-0 relative">
{pdfUrl && pdfExists ? (
<iframe
key={`${selectedRegulation}-${pdfPage}`}
src={pdfUrl}
className="absolute inset-0 w-full h-full border-0"
title="Original PDF"
/>
) : (
<div className="flex items-center justify-center h-full text-slate-400 text-sm p-4">
<div className="text-center space-y-2">
<div className="text-3xl">&#128196;</div>
{!pdfMapping ? (
<>
<p>Kein PDF-Mapping fuer {selectedRegulation}.</p>
<p className="text-xs">rag-pdf-mapping.ts ergaenzen.</p>
</>
) : pdfExists === false ? (
<>
<p className="font-medium text-orange-600">PDF nicht vorhanden</p>
<p className="text-xs">Datei <code className="bg-slate-100 px-1 rounded">{pdfMapping.filename}</code> fehlt in ~/rag-originals/</p>
<p className="text-xs mt-1">Bitte manuell herunterladen und dort ablegen.</p>
</>
) : (
<p>PDF wird geprueft...</p>
)}
</div>
</div>
)}
</div>
</div>
)}
</div>
)}
</div>
</div>
)
}

View File

@@ -1,126 +0,0 @@
export interface RagPdfMapping {
filename: string
totalPages?: number
chunksPerPage?: number
language: string
}
export const RAG_PDF_MAPPING: Record<string, RagPdfMapping> = {
// EU Verordnungen
GDPR: { filename: 'GDPR_DE.pdf', language: 'de', totalPages: 88 },
EPRIVACY: { filename: 'EPRIVACY_DE.pdf', language: 'de' },
SCC: { filename: 'SCC_DE.pdf', language: 'de' },
SCC_FULL_TEXT: { filename: 'SCC_FULL_TEXT_DE.pdf', language: 'de' },
AIACT: { filename: 'AIACT_DE.pdf', language: 'de', totalPages: 144 },
CRA: { filename: 'CRA_DE.pdf', language: 'de' },
NIS2: { filename: 'NIS2_DE.pdf', language: 'de' },
DGA: { filename: 'DGA_DE.pdf', language: 'de' },
DSA: { filename: 'DSA_DE.pdf', language: 'de' },
PLD: { filename: 'PLD_DE.pdf', language: 'de' },
E_COMMERCE_RL: { filename: 'E_COMMERCE_RL_DE.pdf', language: 'de' },
VERBRAUCHERRECHTE_RL: { filename: 'VERBRAUCHERRECHTE_RL_DE.pdf', language: 'de' },
DIGITALE_INHALTE_RL: { filename: 'DIGITALE_INHALTE_RL_DE.pdf', language: 'de' },
DMA: { filename: 'DMA_DE.pdf', language: 'de' },
DPF: { filename: 'DPF_DE.pdf', language: 'de' },
EUCSA: { filename: 'EUCSA_DE.pdf', language: 'de' },
DATAACT: { filename: 'DATAACT_DE.pdf', language: 'de' },
DORA: { filename: 'DORA_DE.pdf', language: 'de' },
PSD2: { filename: 'PSD2_DE.pdf', language: 'de' },
AMLR: { filename: 'AMLR_DE.pdf', language: 'de' },
MiCA: { filename: 'MiCA_DE.pdf', language: 'de' },
EHDS: { filename: 'EHDS_DE.pdf', language: 'de' },
EAA: { filename: 'EAA_DE.pdf', language: 'de' },
DSM: { filename: 'DSM_DE.pdf', language: 'de' },
GPSR: { filename: 'GPSR_DE.pdf', language: 'de' },
MACHINERY_REG: { filename: 'MACHINERY_REG_DE.pdf', language: 'de' },
BLUE_GUIDE: { filename: 'BLUE_GUIDE_DE.pdf', language: 'de' },
// DE Gesetze
TDDDG: { filename: 'TDDDG_DE.pdf', language: 'de' },
BDSG_FULL: { filename: 'BDSG_FULL_DE.pdf', language: 'de' },
DE_DDG: { filename: 'DE_DDG.pdf', language: 'de' },
DE_BGB_AGB: { filename: 'DE_BGB_AGB.pdf', language: 'de' },
DE_EGBGB: { filename: 'DE_EGBGB.pdf', language: 'de' },
DE_HGB_RET: { filename: 'DE_HGB_RET.pdf', language: 'de' },
DE_AO_RET: { filename: 'DE_AO_RET.pdf', language: 'de' },
DE_UWG: { filename: 'DE_UWG.pdf', language: 'de' },
DE_TKG: { filename: 'DE_TKG.pdf', language: 'de' },
DE_PANGV: { filename: 'DE_PANGV.pdf', language: 'de' },
DE_DLINFOV: { filename: 'DE_DLINFOV.pdf', language: 'de' },
DE_BETRVG: { filename: 'DE_BETRVG.pdf', language: 'de' },
DE_GESCHGEHG: { filename: 'DE_GESCHGEHG.pdf', language: 'de' },
DE_BSIG: { filename: 'DE_BSIG.pdf', language: 'de' },
DE_USTG_RET: { filename: 'DE_USTG_RET.pdf', language: 'de' },
// BSI Standards
'BSI-TR-03161-1': { filename: 'BSI-TR-03161-1.pdf', language: 'de' },
'BSI-TR-03161-2': { filename: 'BSI-TR-03161-2.pdf', language: 'de' },
'BSI-TR-03161-3': { filename: 'BSI-TR-03161-3.pdf', language: 'de' },
// AT Gesetze
AT_DSG: { filename: 'AT_DSG.pdf', language: 'de' },
AT_DSG_FULL: { filename: 'AT_DSG_FULL.pdf', language: 'de' },
AT_ECG: { filename: 'AT_ECG.pdf', language: 'de' },
AT_TKG: { filename: 'AT_TKG.pdf', language: 'de' },
AT_KSCHG: { filename: 'AT_KSCHG.pdf', language: 'de' },
AT_FAGG: { filename: 'AT_FAGG.pdf', language: 'de' },
AT_UGB_RET: { filename: 'AT_UGB_RET.pdf', language: 'de' },
AT_BAO_RET: { filename: 'AT_BAO_RET.pdf', language: 'de' },
AT_MEDIENG: { filename: 'AT_MEDIENG.pdf', language: 'de' },
AT_ABGB_AGB: { filename: 'AT_ABGB_AGB.pdf', language: 'de' },
AT_UWG: { filename: 'AT_UWG.pdf', language: 'de' },
// CH Gesetze
CH_DSG: { filename: 'CH_DSG.pdf', language: 'de' },
CH_DSV: { filename: 'CH_DSV.pdf', language: 'de' },
CH_OR_AGB: { filename: 'CH_OR_AGB.pdf', language: 'de' },
CH_UWG: { filename: 'CH_UWG.pdf', language: 'de' },
CH_FMG: { filename: 'CH_FMG.pdf', language: 'de' },
CH_GEBUV: { filename: 'CH_GEBUV.pdf', language: 'de' },
CH_ZERTES: { filename: 'CH_ZERTES.pdf', language: 'de' },
CH_ZGB_PERS: { filename: 'CH_ZGB_PERS.pdf', language: 'de' },
// LI
LI_DSG: { filename: 'LI_DSG.pdf', language: 'de' },
// Nationale DSG (andere EU)
ES_LOPDGDD: { filename: 'ES_LOPDGDD.pdf', language: 'es' },
IT_CODICE_PRIVACY: { filename: 'IT_CODICE_PRIVACY.pdf', language: 'it' },
NL_UAVG: { filename: 'NL_UAVG.pdf', language: 'nl' },
FR_CNIL_GUIDE: { filename: 'FR_CNIL_GUIDE.pdf', language: 'fr' },
IE_DPA_2018: { filename: 'IE_DPA_2018.pdf', language: 'en' },
UK_DPA_2018: { filename: 'UK_DPA_2018.pdf', language: 'en' },
UK_GDPR: { filename: 'UK_GDPR.pdf', language: 'en' },
NO_PERSONOPPLYSNINGSLOVEN: { filename: 'NO_PERSONOPPLYSNINGSLOVEN.pdf', language: 'no' },
SE_DATASKYDDSLAG: { filename: 'SE_DATASKYDDSLAG.pdf', language: 'sv' },
PL_UODO: { filename: 'PL_UODO.pdf', language: 'pl' },
CZ_ZOU: { filename: 'CZ_ZOU.pdf', language: 'cs' },
HU_INFOTV: { filename: 'HU_INFOTV.pdf', language: 'hu' },
BE_DPA_LAW: { filename: 'BE_DPA_LAW.pdf', language: 'nl' },
FI_TIETOSUOJALAKI: { filename: 'FI_TIETOSUOJALAKI.pdf', language: 'fi' },
DK_DATABESKYTTELSESLOVEN: { filename: 'DK_DATABESKYTTELSESLOVEN.pdf', language: 'da' },
LU_DPA_LAW: { filename: 'LU_DPA_LAW.pdf', language: 'fr' },
// DE Gesetze (zusaetzlich)
TMG_KOMPLETT: { filename: 'TMG_KOMPLETT.pdf', language: 'de' },
DE_URHG: { filename: 'DE_URHG.pdf', language: 'de' },
// EDPB Guidelines
EDPB_GUIDELINES_5_2020: { filename: 'EDPB_GUIDELINES_5_2020.pdf', language: 'en' },
EDPB_GUIDELINES_7_2020: { filename: 'EDPB_GUIDELINES_7_2020.pdf', language: 'en' },
EDPB_GUIDELINES_1_2020: { filename: 'EDPB_GUIDELINES_1_2020.pdf', language: 'en' },
EDPB_GUIDELINES_1_2022: { filename: 'EDPB_GUIDELINES_1_2022.pdf', language: 'en' },
EDPB_GUIDELINES_2_2023: { filename: 'EDPB_GUIDELINES_2_2023.pdf', language: 'en' },
EDPB_GUIDELINES_2_2024: { filename: 'EDPB_GUIDELINES_2_2024.pdf', language: 'en' },
EDPB_GUIDELINES_4_2019: { filename: 'EDPB_GUIDELINES_4_2019.pdf', language: 'en' },
EDPB_GUIDELINES_9_2022: { filename: 'EDPB_GUIDELINES_9_2022.pdf', language: 'en' },
EDPB_DPIA_LIST: { filename: 'EDPB_DPIA_LIST.pdf', language: 'en' },
EDPB_LEGITIMATE_INTEREST: { filename: 'EDPB_LEGITIMATE_INTEREST.pdf', language: 'en' },
// EDPS
EDPS_DPIA_LIST: { filename: 'EDPS_DPIA_LIST.pdf', language: 'en' },
// Frameworks
ENISA_SECURE_BY_DESIGN: { filename: 'ENISA_SECURE_BY_DESIGN.pdf', language: 'en' },
ENISA_SUPPLY_CHAIN: { filename: 'ENISA_SUPPLY_CHAIN.pdf', language: 'en' },
ENISA_THREAT_LANDSCAPE: { filename: 'ENISA_THREAT_LANDSCAPE.pdf', language: 'en' },
ENISA_ICS_SCADA: { filename: 'ENISA_ICS_SCADA.pdf', language: 'en' },
ENISA_CYBERSECURITY_2024: { filename: 'ENISA_CYBERSECURITY_2024.pdf', language: 'en' },
NIST_SSDF: { filename: 'NIST_SSDF.pdf', language: 'en' },
NIST_CSF_2: { filename: 'NIST_CSF_2.pdf', language: 'en' },
OECD_AI_PRINCIPLES: { filename: 'OECD_AI_PRINCIPLES.pdf', language: 'en' },
// EU-IFRS / EFRAG
EU_IFRS_DE: { filename: 'EU_IFRS_DE.pdf', language: 'de' },
EU_IFRS_EN: { filename: 'EU_IFRS_EN.pdf', language: 'en' },
EFRAG_ENDORSEMENT: { filename: 'EFRAG_ENDORSEMENT.pdf', language: 'en' },
}

View File

@@ -11,8 +11,6 @@ import React, { useState, useEffect, useCallback } from 'react'
import Link from 'next/link'
import { PagePurpose } from '@/components/common/PagePurpose'
import { AIModuleSidebarResponsive } from '@/components/ai/AIModuleSidebar'
import { REGULATIONS_IN_RAG } from './rag-constants'
import { ChunkBrowserQA } from './components/ChunkBrowserQA'
// API uses local proxy route to klausur-service
const API_PROXY = '/api/legal-corpus'
@@ -75,7 +73,7 @@ interface DsfaCorpusStatus {
type RegulationCategory = 'regulations' | 'dsfa' | 'nibis' | 'templates'
// Tab definitions
type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'chunks' | 'data' | 'ingestion' | 'pipeline'
type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'data' | 'ingestion' | 'pipeline'
// Custom document type
interface CustomDocument {
@@ -1013,264 +1011,8 @@ const REGULATIONS = [
keyTopics: ['Bussgeldberechnung', 'Schweregrad', 'Milderungsgruende', 'Bussgeldrahmen'],
effectiveDate: '2022'
},
// =====================================================================
// Neu ingestierte EU-Richtlinien (Februar 2026)
// =====================================================================
{
code: 'E_COMMERCE_RL',
name: 'E-Commerce-Richtlinie',
fullName: 'Richtlinie 2000/31/EG ueber den elektronischen Geschaeftsverkehr',
type: 'eu_directive',
expected: 30,
description: 'EU-Richtlinie ueber den elektronischen Geschaeftsverkehr (E-Commerce). Regelt Herkunftslandprinzip, Informationspflichten, Haftungsprivilegien fuer Vermittler (Mere Conduit, Caching, Hosting).',
relevantFor: ['Online-Dienste', 'E-Commerce', 'Hosting-Anbieter', 'Plattformen'],
keyTopics: ['Herkunftslandprinzip', 'Haftungsprivileg', 'Informationspflichten', 'Spam-Verbot', 'Vermittlerhaftung'],
effectiveDate: '17. Juli 2000'
},
{
code: 'VERBRAUCHERRECHTE_RL',
name: 'Verbraucherrechte-Richtlinie',
fullName: 'Richtlinie 2011/83/EU ueber die Rechte der Verbraucher',
type: 'eu_directive',
expected: 25,
description: 'EU-weite Harmonisierung der Verbraucherrechte bei Fernabsatz und aussergeschaeftlichen Vertraegen. 14-Tage-Widerrufsrecht, Informationspflichten, digitale Inhalte.',
relevantFor: ['Online-Shops', 'E-Commerce', 'Fernabsatz', 'Dienstleister'],
keyTopics: ['Widerrufsrecht 14 Tage', 'Informationspflichten', 'Fernabsatzvertraege', 'Digitale Inhalte'],
effectiveDate: '13. Juni 2014'
},
{
code: 'DIGITALE_INHALTE_RL',
name: 'Digitale-Inhalte-Richtlinie',
fullName: 'Richtlinie (EU) 2019/770 ueber digitale Inhalte und Dienstleistungen',
type: 'eu_directive',
expected: 20,
description: 'Gewaehrleistungsrecht fuer digitale Inhalte und Dienstleistungen. Regelt Maengelhaftung, Updates, Vertragsmaessigkeit und Kuendigungsrechte bei digitalen Produkten.',
relevantFor: ['SaaS-Anbieter', 'App-Entwickler', 'Cloud-Dienste', 'Streaming-Anbieter', 'Software-Hersteller'],
keyTopics: ['Digitale Gewaehrleistung', 'Update-Pflicht', 'Vertragsmaessigkeit', 'Kuendigungsrecht', 'Datenportabilitaet'],
effectiveDate: '1. Januar 2022'
},
{
code: 'DMA',
name: 'Digital Markets Act',
fullName: 'Verordnung (EU) 2022/1925 - Digital Markets Act',
type: 'eu_regulation',
expected: 50,
description: 'Reguliert digitale Gatekeeper-Plattformen. Stellt Verhaltensregeln fuer grosse Plattformen auf (Apple, Google, Meta, Amazon, Microsoft). Verbietet Selbstbevorzugung und erzwingt Interoperabilitaet.',
relevantFor: ['Grosse Plattformen', 'App-Stores', 'Suchmaschinen', 'Social Media', 'Messenger-Dienste'],
keyTopics: ['Gatekeeper-Pflichten', 'Interoperabilitaet', 'Selbstbevorzugung', 'App-Store-Regeln', 'Datenportabilitaet'],
effectiveDate: '2. Mai 2023'
},
// === Industrie-Compliance (2026-02-28) ===
{
code: 'MACHINERY_REG',
name: 'Maschinenverordnung',
fullName: 'Verordnung (EU) 2023/1230 ueber Maschinen (Machinery Regulation)',
type: 'eu_regulation',
expected: 100,
description: 'Loest die alte Maschinenrichtlinie 2006/42/EG ab. Regelt Sicherheitsanforderungen fuer Maschinen und zugehoerige Produkte, CE-Kennzeichnung, Konformitaetsbewertung und Marktaufsicht. Neu: Cybersecurity-Anforderungen fuer vernetzte Maschinen.',
relevantFor: ['Maschinenbau', 'Industrie 4.0', 'Automatisierung', 'Hersteller', 'Importeure'],
keyTopics: ['CE-Kennzeichnung', 'Konformitaetsbewertung', 'Risikobeurteilung', 'Cybersecurity', 'Betriebsanleitung'],
effectiveDate: '20. Januar 2027'
},
{
code: 'BLUE_GUIDE',
name: 'Blue Guide',
fullName: 'Leitfaden fuer die Umsetzung der EU-Produktvorschriften (Blue Guide 2022)',
type: 'eu_guideline',
expected: 200,
description: 'Umfassender Leitfaden der EU-Kommission zur Umsetzung von Produktvorschriften. Erklaert CE-Kennzeichnung, Konformitaetsbewertungsverfahren, notifizierte Stellen, Marktaufsicht und den New Legislative Framework.',
relevantFor: ['Hersteller', 'Importeure', 'Haendler', 'Notifizierte Stellen', 'Marktaufsichtsbehoerden'],
keyTopics: ['CE-Kennzeichnung', 'Konformitaetserklaerung', 'Notifizierte Stellen', 'Marktaufsicht', 'New Legislative Framework'],
effectiveDate: '29. Juni 2022'
},
{
code: 'ENISA_SECURE_BY_DESIGN',
name: 'ENISA Secure by Design',
fullName: 'ENISA Secure Software Development Best Practices',
type: 'eu_guideline',
expected: 50,
description: 'ENISA-Leitfaden fuer sichere Softwareentwicklung. Beschreibt Best Practices fuer Security by Design, sichere Entwicklungsprozesse und Schwachstellenmanagement.',
relevantFor: ['Softwareentwickler', 'DevOps', 'IT-Sicherheit', 'Produktmanagement'],
keyTopics: ['Security by Design', 'SDLC', 'Schwachstellenmanagement', 'Secure Coding', 'Threat Modeling'],
effectiveDate: '2023'
},
{
code: 'ENISA_SUPPLY_CHAIN',
name: 'ENISA Supply Chain Security',
fullName: 'ENISA Threat Landscape for Supply Chain Attacks',
type: 'eu_guideline',
expected: 50,
description: 'ENISA-Analyse der Bedrohungslandschaft fuer Supply-Chain-Angriffe. Beschreibt Angriffsvektoren, Taxonomie und Empfehlungen zur Absicherung von Software-Lieferketten.',
relevantFor: ['IT-Sicherheit', 'Beschaffung', 'Softwareentwickler', 'CISO'],
keyTopics: ['Supply Chain Security', 'SolarWinds', 'SBOM', 'Lieferantenrisiko', 'Third-Party Risk'],
effectiveDate: '2021'
},
{
code: 'NIST_SSDF',
name: 'NIST SSDF',
fullName: 'NIST SP 800-218 — Secure Software Development Framework (SSDF)',
type: 'international_standard',
expected: 40,
description: 'NIST-Framework fuer sichere Softwareentwicklung. Definiert Praktiken und Aufgaben in vier Gruppen: Prepare, Protect, Produce, Respond. Weit verbreitet als Referenz fuer Software Supply Chain Security.',
relevantFor: ['Softwareentwickler', 'DevSecOps', 'IT-Sicherheit', 'Compliance-Manager'],
keyTopics: ['SSDF', 'Secure SDLC', 'Software Supply Chain', 'Vulnerability Management', 'Code Review'],
effectiveDate: '3. Februar 2022'
},
{
code: 'NIST_CSF_2',
name: 'NIST CSF 2.0',
fullName: 'NIST Cybersecurity Framework (CSF) 2.0',
type: 'international_standard',
expected: 50,
description: 'Version 2.0 des NIST Cybersecurity Framework. Neue Kernfunktion "Govern" ergaenzt Identify, Protect, Detect, Respond, Recover. Erweitert den Anwendungsbereich ueber kritische Infrastruktur hinaus auf alle Organisationen.',
relevantFor: ['CISO', 'IT-Sicherheit', 'Risikomanagement', 'Geschaeftsfuehrung', 'Alle Branchen'],
keyTopics: ['Govern', 'Identify', 'Protect', 'Detect', 'Respond', 'Recover', 'Cybersecurity'],
effectiveDate: '26. Februar 2024'
},
{
code: 'OECD_AI_PRINCIPLES',
name: 'OECD AI Principles',
fullName: 'OECD Recommendation on Artificial Intelligence (AI Principles)',
type: 'international_standard',
expected: 20,
description: 'OECD-Empfehlung zu Kuenstlicher Intelligenz. Definiert fuenf Prinzipien fuer verantwortungsvolle KI: Inklusives Wachstum, Menschenzentrierte Werte, Transparenz, Robustheit und Rechenschaftspflicht. Von 46 Laendern angenommen.',
relevantFor: ['KI-Entwickler', 'Policy-Maker', 'Ethik-Kommissionen', 'Geschaeftsfuehrung'],
keyTopics: ['AI Ethics', 'Transparenz', 'Accountability', 'Trustworthy AI', 'Human-Centered AI'],
effectiveDate: '22. Mai 2019'
},
{
code: 'EU_IFRS',
name: 'EU-IFRS',
fullName: 'Verordnung (EU) 2023/1803 — International Financial Reporting Standards',
type: 'eu_regulation',
expected: 500,
description: 'Konsolidierte Fassung der von der EU uebernommenen IFRS/IAS/IFRIC/SIC. Rechtsverbindlich fuer boersennotierte EU-Unternehmen. Enthalt IFRS 1-17, IAS 1-41, IFRIC 1-23 und SIC 7-32 in der EU-endorsed Fassung (Stand Okt 2023). ACHTUNG: Neuere IASB-Standards sind moeglicherweise noch nicht EU-endorsed.',
relevantFor: ['Rechnungswesen', 'Wirtschaftspruefer', 'boersennotierte Unternehmen', 'Finanzberichterstattung', 'CFO'],
keyTopics: ['IFRS 16 Leasing', 'IFRS 9 Finanzinstrumente', 'IAS 1 Darstellung', 'IFRS 15 Erloese', 'IFRS 17 Versicherungsvertraege', 'Konsolidierung'],
effectiveDate: '16. Oktober 2023'
},
{
code: 'EFRAG_ENDORSEMENT',
name: 'EFRAG Endorsement Status',
fullName: 'EFRAG EU Endorsement Status Report (Dezember 2025)',
type: 'eu_guideline',
expected: 30,
description: 'Uebersicht des European Financial Reporting Advisory Group (EFRAG) ueber den EU-Endorsement-Stand aller IFRS/IAS-Standards. Zeigt welche Standards von der EU uebernommen wurden und welche noch ausstehend sind. Relevant fuer internationale Ausschreibungen und Compliance-Pruefung.',
relevantFor: ['Rechnungswesen', 'Wirtschaftspruefer', 'Compliance Officer', 'internationale Ausschreibungen'],
keyTopics: ['EU Endorsement', 'IFRS 18', 'IFRS S1/S2 Sustainability', 'Endorsement Status', 'IASB Updates'],
effectiveDate: '18. Dezember 2025'
},
]
// Source URLs for original documents (click to view original)
const REGULATION_SOURCES: Record<string, string> = {
// EU Verordnungen/Richtlinien (EUR-Lex)
GDPR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32016R0679',
EPRIVACY: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32002L0058',
SCC: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32021D0914',
DPF: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023D1795',
AIACT: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024R1689',
CRA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024R2847',
NIS2: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022L2555',
EUCSA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019R0881',
DATAACT: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R2854',
DGA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R0868',
DSA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R2065',
EAA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019L0882',
DSM: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019L0790',
PLD: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024L2853',
GPSR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R0988',
DORA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R2554',
PSD2: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32015L2366',
AMLR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024R1624',
MiCA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1114',
EHDS: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32025R0327',
SCC_FULL_TEXT: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32021D0914',
E_COMMERCE_RL: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32000L0031',
VERBRAUCHERRECHTE_RL: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32011L0083',
DIGITALE_INHALTE_RL: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019L0770',
DMA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R1925',
MACHINERY_REG: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1230',
BLUE_GUIDE: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:52022XC0629(04)',
EU_IFRS: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1803',
// EDPB Guidelines
EDPB_GUIDELINES_2_2019: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-22019-processing-personal-data-under-article-61b_en',
EDPB_GUIDELINES_3_2019: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-32019-processing-personal-data-through-video_en',
EDPB_GUIDELINES_5_2020: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-052020-consent-under-regulation-2016679_en',
EDPB_GUIDELINES_7_2020: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-072020-concepts-controller-and-processor-gdpr_en',
EDPB_GUIDELINES_1_2022: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-042022-calculation-administrative-fines-under-gdpr_en',
// BSI Technische Richtlinien
'BSI-TR-03161-1': 'https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Publikationen/TechnischeRichtlinien/TR03161/BSI-TR-03161-1.html',
'BSI-TR-03161-2': 'https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Publikationen/TechnischeRichtlinien/TR03161/BSI-TR-03161-2.html',
'BSI-TR-03161-3': 'https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Publikationen/TechnischeRichtlinien/TR03161/BSI-TR-03161-3.html',
// Nationale Datenschutzgesetze
AT_DSG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001597',
BDSG_FULL: 'https://www.gesetze-im-internet.de/bdsg_2018/',
CH_DSG: 'https://www.fedlex.admin.ch/eli/cc/2022/491/de',
LI_DSG: 'https://www.gesetze.li/konso/2018.272',
BE_DPA_LAW: 'https://www.autoriteprotectiondonnees.be/citoyen/la-loi-du-30-juillet-2018',
NL_UAVG: 'https://wetten.overheid.nl/BWBR0040940/',
FR_CNIL_GUIDE: 'https://www.cnil.fr/fr/rgpd-par-ou-commencer',
ES_LOPDGDD: 'https://www.boe.es/buscar/act.php?id=BOE-A-2018-16673',
IT_CODICE_PRIVACY: 'https://www.garanteprivacy.it/home/docweb/-/docweb-display/docweb/9042678',
IE_DPA_2018: 'https://www.irishstatutebook.ie/eli/2018/act/7/enacted/en/html',
UK_DPA_2018: 'https://www.legislation.gov.uk/ukpga/2018/12/contents',
UK_GDPR: 'https://www.legislation.gov.uk/eur/2016/679/contents',
NO_PERSONOPPLYSNINGSLOVEN: 'https://lovdata.no/dokument/NL/lov/2018-06-15-38',
SE_DATASKYDDSLAG: 'https://www.riksdagen.se/sv/dokument-och-lagar/dokument/svensk-forfattningssamling/lag-2018218-med-kompletterande-bestammelser_sfs-2018-218/',
FI_TIETOSUOJALAKI: 'https://www.finlex.fi/fi/laki/ajantasa/2018/20181050',
PL_UODO: 'https://isap.sejm.gov.pl/isap.nsf/DocDetails.xsp?id=WDU20180001000',
CZ_ZOU: 'https://www.zakonyprolidi.cz/cs/2019-110',
HU_INFOTV: 'https://net.jogtar.hu/jogszabaly?docid=a1100112.tv',
LU_DPA_LAW: 'https://legilux.public.lu/eli/etat/leg/loi/2018/08/01/a686/jo',
DK_DATABESKYTTELSESLOVEN: 'https://www.retsinformation.dk/eli/lta/2018/502',
// Deutschland — Weitere Gesetze
TDDDG: 'https://www.gesetze-im-internet.de/tdddg/',
DE_DDG: 'https://www.gesetze-im-internet.de/ddg/',
DE_BGB_AGB: 'https://www.gesetze-im-internet.de/bgb/__305.html',
DE_EGBGB: 'https://www.gesetze-im-internet.de/bgbeg/art_246.html',
DE_UWG: 'https://www.gesetze-im-internet.de/uwg_2004/',
DE_HGB_RET: 'https://www.gesetze-im-internet.de/hgb/__257.html',
DE_AO_RET: 'https://www.gesetze-im-internet.de/ao_1977/__147.html',
DE_TKG: 'https://www.gesetze-im-internet.de/tkg_2021/',
DE_PANGV: 'https://www.gesetze-im-internet.de/pangv_2022/',
DE_DLINFOV: 'https://www.gesetze-im-internet.de/dlinfov/',
DE_BETRVG: 'https://www.gesetze-im-internet.de/betrvg/__87.html',
DE_GESCHGEHG: 'https://www.gesetze-im-internet.de/geschgehg/',
DE_BSIG: 'https://www.gesetze-im-internet.de/bsig_2009/',
DE_USTG_RET: 'https://www.gesetze-im-internet.de/ustg_1980/__14b.html',
// Oesterreich — Weitere Gesetze
AT_ECG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=20001703',
AT_TKG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=20007898',
AT_KSCHG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10002462',
AT_FAGG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=20008783',
AT_UGB_RET: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001702',
AT_BAO_RET: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10003940',
AT_MEDIENG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10000719',
AT_ABGB_AGB: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001622',
AT_UWG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10002665',
// Schweiz
CH_DSV: 'https://www.fedlex.admin.ch/eli/cc/2022/568/de',
CH_OR_AGB: 'https://www.fedlex.admin.ch/eli/cc/27/317_321_377/de',
CH_UWG: 'https://www.fedlex.admin.ch/eli/cc/1988/223_223_223/de',
CH_FMG: 'https://www.fedlex.admin.ch/eli/cc/1997/2187_2187_2187/de',
CH_GEBUV: 'https://www.fedlex.admin.ch/eli/cc/2002/249/de',
CH_ZERTES: 'https://www.fedlex.admin.ch/eli/cc/2016/752/de',
CH_ZGB_PERS: 'https://www.fedlex.admin.ch/eli/cc/24/233_245_233/de',
// Industrie-Compliance
ENISA_SECURE_BY_DESIGN: 'https://www.enisa.europa.eu/publications/secure-development-best-practices',
ENISA_SUPPLY_CHAIN: 'https://www.enisa.europa.eu/publications/threat-landscape-for-supply-chain-attacks',
NIST_SSDF: 'https://csrc.nist.gov/pubs/sp/800/218/final',
NIST_CSF_2: 'https://www.nist.gov/cyberframework',
OECD_AI_PRINCIPLES: 'https://legalinstruments.oecd.org/en/instruments/OECD-LEGAL-0449',
// IFRS / EFRAG
EU_IFRS_DE: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1803',
EU_IFRS_EN: 'https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32023R1803',
EFRAG_ENDORSEMENT: 'https://www.efrag.org/activities/endorsement-status-report',
// Full-text Datenschutzgesetz AT
AT_DSG_FULL: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001597',
}
// License info for each regulation
const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string }> = {
GDPR: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk der EU — frei verwendbar' },
@@ -1321,18 +1063,6 @@ const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string
EDPB_GUIDELINES_3_2019: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
EDPB_GUIDELINES_5_2020: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
EDPB_GUIDELINES_7_2020: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
// Industrie-Compliance (2026-02-28)
MACHINERY_REG: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
BLUE_GUIDE: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Leitfaden — amtliches Werk der Kommission' },
ENISA_SECURE_BY_DESIGN: { license: 'CC-BY-4.0', licenseNote: 'ENISA Publication — CC BY 4.0' },
ENISA_SUPPLY_CHAIN: { license: 'CC-BY-4.0', licenseNote: 'ENISA Publication — CC BY 4.0' },
NIST_SSDF: { license: 'PUBLIC_DOMAIN', licenseNote: 'US Government Work — Public Domain' },
NIST_CSF_2: { license: 'PUBLIC_DOMAIN', licenseNote: 'US Government Work — Public Domain' },
OECD_AI_PRINCIPLES: { license: 'PUBLIC_DOMAIN', licenseNote: 'OECD Legal Instrument — Reuse Notice' },
// EU-IFRS / EFRAG (2026-02-28)
EU_IFRS_DE: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
EU_IFRS_EN: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
EFRAG_ENDORSEMENT: { license: 'PUBLIC_DOMAIN', licenseNote: 'EFRAG — oeffentliches Dokument' },
// DACH National Laws — Deutschland
DE_DDG: { license: 'PUBLIC_DOMAIN', licenseNote: 'Deutsches Bundesgesetz — amtliches Werk (§5 UrhG)' },
DE_BGB_AGB: { license: 'PUBLIC_DOMAIN', licenseNote: 'Deutsches Bundesgesetz — amtliches Werk (§5 UrhG)' },
@@ -1369,35 +1099,6 @@ const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string
LU_DPA_LAW: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Luxemburg — frei verwendbar' },
DK_DATABESKYTTELSESLOVEN: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Daenemark — frei verwendbar' },
EDPB_GUIDELINES_1_2022: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
// Neue EU-Richtlinien (Februar 2026 ingestiert)
E_COMMERCE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
VERBRAUCHERRECHTE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
DIGITALE_INHALTE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
DMA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
}
// REGULATIONS_IN_RAG is imported from ./rag-constants.ts
// Helper: Check if regulation is in RAG
const isInRag = (code: string): boolean => code in REGULATIONS_IN_RAG
// Helper: Get known chunk count for a regulation
const getKnownChunks = (code: string): number => REGULATIONS_IN_RAG[code]?.chunks || 0
// Known collection totals (updated: 2026-03-12)
// Note: bp_compliance_datenschutz expanded via edpb-crawler.py (55 EDPB/WP29/EDPS documents).
// bp_dsfa_corpus expanded with 20 DSFA Muss-Listen (BfDI + DSK + 16 Bundeslaender).
// bp_compliance_gesetze: +5263 Chunks durch Phase H Verbraucherschutz (Run #701, inkl. BDSG/DDG/TKG/HGB/AO Duplikate)
const COLLECTION_TOTALS = {
bp_compliance_gesetze: 63567, // 58304 + 5263 (Phase H)
bp_compliance_ce: 18183,
bp_legal_templates: 7689,
bp_compliance_datenschutz: 17459,
bp_dsfa_corpus: 8666,
bp_compliance_recht: 1425,
bp_nibis_eh: 7996,
total_legal: 81750, // gesetze + ce
total_all: 124985,
}
// License display labels
@@ -1451,10 +1152,7 @@ const INDUSTRY_REGULATION_MAP: Record<string, string[]> = {
all: ['GDPR', 'EPRIVACY', 'TDDDG'],
health: ['GDPR', 'TDDDG', 'BSI-TR-03161-1', 'BSI-TR-03161-2', 'BSI-TR-03161-3', 'NIS2', 'AIACT', 'PLD', 'EHDS'],
finance: ['GDPR', 'TDDDG', 'NIS2', 'EUCSA', 'DSA', 'AIACT', 'DPF', 'DORA', 'PSD2', 'AMLR', 'MiCA'],
ecommerce: ['GDPR', 'TDDDG', 'DSA', 'GPSR', 'EAA', 'PLD', 'DPF', 'PSD2',
'DE_PANGV', 'DE_VSBG', 'DE_PRODHAFTG', 'DE_VERPACKG', 'DE_ELEKTROG', 'DE_BFSG', 'DE_UWG',
'E_COMMERCE_RL', 'VERBRAUCHERRECHTE_RL', 'WARENKAUF_RL', 'KLAUSEL_RL', 'UNLAUTERE_PRAKTIKEN_RL',
'PREISANGABEN_RL', 'OMNIBUS_RL', 'DIGITALE_INHALTE_RL'],
ecommerce: ['GDPR', 'TDDDG', 'DSA', 'GPSR', 'EAA', 'PLD', 'DPF', 'PSD2'],
tech: ['GDPR', 'TDDDG', 'CRA', 'AIACT', 'DPF', 'SCC', 'DATAACT', 'DSM', 'MiCA'],
iot: ['GDPR', 'CRA', 'GPSR', 'PLD', 'DATAACT', 'AIACT'],
ai: ['GDPR', 'AIACT', 'PLD', 'DSM', 'DATAACT'],
@@ -1514,15 +1212,6 @@ const THEMATIC_GROUPS = [
regulations: ['EHDS', 'BSI-TR-03161-1', 'BSI-TR-03161-2', 'BSI-TR-03161-3'],
description: 'Gesundheitsdatenraum, DiGA-Sicherheit, Patientenrechte'
},
{
id: 'verbraucherschutz',
name: 'Verbraucherschutz & E-Commerce',
color: 'bg-amber-500',
regulations: ['DE_PANGV', 'DE_VSBG', 'DE_PRODHAFTG', 'DE_UWG', 'DE_BFSG',
'WARENKAUF_RL', 'KLAUSEL_RL', 'UNLAUTERE_PRAKTIKEN_RL', 'PREISANGABEN_RL',
'OMNIBUS_RL', 'E_COMMERCE_RL', 'VERBRAUCHERRECHTE_RL', 'DIGITALE_INHALTE_RL'],
description: 'Widerrufsrecht, Preisangaben, Fernabsatz, AGB-Recht, Barrierefreiheit'
},
]
// Key overlaps and intersections
@@ -1755,8 +1444,6 @@ export default function RAGPage() {
const [autoRefresh, setAutoRefresh] = useState(true)
const [elapsedTime, setElapsedTime] = useState<string>('')
// Chunk browser state is now in ChunkBrowserQA component
// DSFA corpus state
const [dsfaSources, setDsfaSources] = useState<DsfaSource[]>([])
const [dsfaStatus, setDsfaStatus] = useState<DsfaCorpusStatus | null>(null)
@@ -2002,8 +1689,6 @@ export default function RAGPage() {
return () => clearInterval(interval)
}, [pipelineState?.started_at, pipelineState?.status])
// Chunk browser functions are now in ChunkBrowserQA component
const handleSearch = async () => {
if (!searchQuery.trim()) return
@@ -2089,7 +1774,6 @@ export default function RAGPage() {
{ id: 'regulations' as TabId, name: 'Regulierungen', icon: '📜' },
{ id: 'map' as TabId, name: 'Landkarte', icon: '🗺️' },
{ id: 'search' as TabId, name: 'Suche', icon: '🔍' },
{ id: 'chunks' as TabId, name: 'Chunk-Browser', icon: '🧩' },
{ id: 'data' as TabId, name: 'Daten', icon: '📁' },
{ id: 'ingestion' as TabId, name: 'Ingestion', icon: '⚙️' },
{ id: 'pipeline' as TabId, name: 'Pipeline', icon: '🔄' },
@@ -2120,7 +1804,7 @@ export default function RAGPage() {
{/* Page Purpose */}
<PagePurpose
title="Daten & RAG"
purpose={`Verwalten und durchsuchen Sie 7 RAG-Collections mit ${REGULATIONS.length} Regulierungen (${Object.keys(REGULATIONS_IN_RAG).length} im RAG). Legal Corpus, DSFA Corpus (70+ Quellen), NiBiS EH (Bildungsinhalte) und Legal Templates. Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur.`}
purpose="Verwalten und durchsuchen Sie 4 RAG-Collections: Legal Corpus (24 Regulierungen), DSFA Corpus (70+ Quellen inkl. internationaler Datenschutzgesetze), NiBiS EH (Bildungsinhalte) und Legal Templates (Dokumentvorlagen). Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur."
audience={['DSB', 'Compliance Officer', 'Entwickler']}
gdprArticles={['§5 UrhG (Amtliche Werke)', 'Art. 5 DSGVO (Rechenschaftspflicht)']}
architecture={{
@@ -2142,8 +1826,8 @@ export default function RAGPage() {
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6">
<div className="bg-white rounded-xl p-4 border border-slate-200">
<p className="text-xs font-medium text-blue-600 uppercase mb-1">Legal Corpus</p>
<p className="text-2xl font-bold text-slate-900">{COLLECTION_TOTALS.total_legal.toLocaleString()}</p>
<p className="text-xs text-slate-500">Chunks &middot; {Object.keys(REGULATIONS_IN_RAG).length}/{REGULATIONS.length} im RAG</p>
<p className="text-2xl font-bold text-slate-900">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
<p className="text-xs text-slate-500">Chunks &middot; {REGULATIONS.length} Regulierungen</p>
</div>
<div className="bg-white rounded-xl p-4 border border-slate-200">
<p className="text-xs font-medium text-purple-600 uppercase mb-1">DSFA Corpus</p>
@@ -2152,12 +1836,12 @@ export default function RAGPage() {
</div>
<div className="bg-white rounded-xl p-4 border border-slate-200">
<p className="text-xs font-medium text-emerald-600 uppercase mb-1">NiBiS EH</p>
<p className="text-2xl font-bold text-slate-900">7.996</p>
<p className="text-2xl font-bold text-slate-900">28.662</p>
<p className="text-xs text-slate-500">Chunks &middot; Bildungs-Erwartungshorizonte</p>
</div>
<div className="bg-white rounded-xl p-4 border border-slate-200">
<p className="text-xs font-medium text-orange-600 uppercase mb-1">Legal Templates</p>
<p className="text-2xl font-bold text-slate-900">7.689</p>
<p className="text-2xl font-bold text-slate-900">824</p>
<p className="text-xs text-slate-500">Chunks &middot; Dokumentvorlagen</p>
</div>
</div>
@@ -2192,8 +1876,8 @@ export default function RAGPage() {
className="p-4 rounded-lg border border-blue-200 bg-blue-50 hover:bg-blue-100 transition-colors text-left"
>
<p className="text-xs font-medium text-blue-600 uppercase">Gesetze & Regulierungen</p>
<p className="text-2xl font-bold text-slate-900 mt-1">{COLLECTION_TOTALS.total_legal.toLocaleString()}</p>
<p className="text-xs text-slate-500 mt-1">{Object.keys(REGULATIONS_IN_RAG).length}/{REGULATIONS.length} im RAG</p>
<p className="text-2xl font-bold text-slate-900 mt-1">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
<p className="text-xs text-slate-500 mt-1">{REGULATIONS.length} Regulierungen (EU, DE, BSI)</p>
</button>
<button
onClick={() => { setRegulationCategory('dsfa'); setActiveTab('regulations') }}
@@ -2205,12 +1889,12 @@ export default function RAGPage() {
</button>
<div className="p-4 rounded-lg border border-emerald-200 bg-emerald-50 text-left">
<p className="text-xs font-medium text-emerald-600 uppercase">NiBiS EH</p>
<p className="text-2xl font-bold text-slate-900 mt-1">7.996</p>
<p className="text-2xl font-bold text-slate-900 mt-1">28.662</p>
<p className="text-xs text-slate-500 mt-1">Chunks &middot; Bildungs-Erwartungshorizonte</p>
</div>
<div className="p-4 rounded-lg border border-orange-200 bg-orange-50 text-left">
<p className="text-xs font-medium text-orange-600 uppercase">Legal Templates</p>
<p className="text-2xl font-bold text-slate-900 mt-1">7.689</p>
<p className="text-2xl font-bold text-slate-900 mt-1">824</p>
<p className="text-xs text-slate-500 mt-1">Chunks &middot; Dokumentvorlagen (VVT, TOM, DSFA)</p>
</div>
</div>
@@ -2220,13 +1904,12 @@ export default function RAGPage() {
<div className="grid grid-cols-1 md:grid-cols-4 gap-4">
{Object.entries(TYPE_LABELS).map(([type, label]) => {
const regs = REGULATIONS.filter((r) => r.type === type)
const inRagCount = regs.filter((r) => isInRag(r.code)).length
const totalChunks = regs.reduce((sum, r) => sum + getKnownChunks(r.code), 0)
const totalChunks = regs.reduce((sum, r) => sum + getRegulationChunks(r.code), 0)
return (
<div key={type} className="bg-white rounded-xl p-4 border border-slate-200">
<div className="flex items-center gap-2 mb-2">
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[type]}`}>{label}</span>
<span className="text-slate-500 text-sm">{inRagCount}/{regs.length} im RAG</span>
<span className="text-slate-500 text-sm">{regs.length} Dok.</span>
</div>
<p className="text-xl font-bold text-slate-900">{totalChunks.toLocaleString()} Chunks</p>
</div>
@@ -2240,25 +1923,20 @@ export default function RAGPage() {
<h3 className="font-semibold text-slate-900">Top Regulierungen (nach Chunks)</h3>
</div>
<div className="divide-y">
{[...REGULATIONS].sort((a, b) => getKnownChunks(b.code) - getKnownChunks(a.code))
.slice(0, 10)
{REGULATIONS.sort((a, b) => getRegulationChunks(b.code) - getRegulationChunks(a.code))
.slice(0, 5)
.map((reg) => {
const chunks = getKnownChunks(reg.code)
const chunks = getRegulationChunks(reg.code)
return (
<div key={reg.code} className="px-4 py-3 flex items-center justify-between">
<div className="flex items-center gap-3">
{isInRag(reg.code) ? (
<span className="text-green-500 text-sm"></span>
) : (
<span className="text-red-400 text-sm"></span>
)}
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
{TYPE_LABELS[reg.type]}
</span>
<span className="font-medium text-slate-900">{reg.name}</span>
<span className="text-slate-500 text-sm">({reg.code})</span>
</div>
<span className={`font-bold ${chunks > 0 ? 'text-teal-600' : 'text-slate-300'}`}>{chunks > 0 ? chunks.toLocaleString() + ' Chunks' : '—'}</span>
<span className="font-bold text-teal-600">{chunks.toLocaleString()} Chunks</span>
</div>
)
})}
@@ -2317,13 +1995,7 @@ export default function RAGPage() {
{regulationCategory === 'regulations' && (
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
<div className="px-4 py-3 border-b bg-slate-50 flex items-center justify-between">
<h3 className="font-semibold text-slate-900">
Alle {REGULATIONS.length} Regulierungen
<span className="ml-2 text-sm font-normal text-slate-500">
({REGULATIONS.filter(r => isInRag(r.code)).length} im RAG,{' '}
{REGULATIONS.filter(r => !isInRag(r.code)).length} ausstehend)
</span>
</h3>
<h3 className="font-semibold text-slate-900">Alle {REGULATIONS.length} Regulierungen</h3>
<button
onClick={fetchStatus}
className="text-sm text-teal-600 hover:text-teal-700"
@@ -2335,7 +2007,6 @@ export default function RAGPage() {
<table className="w-full">
<thead className="bg-slate-50 border-b">
<tr>
<th className="px-4 py-3 text-center text-xs font-medium text-slate-500 uppercase w-12">RAG</th>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Code</th>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Typ</th>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Name</th>
@@ -2346,10 +2017,17 @@ export default function RAGPage() {
</thead>
<tbody className="divide-y">
{REGULATIONS.map((reg) => {
const chunks = getKnownChunks(reg.code)
const inRag = isInRag(reg.code)
let statusColor = inRag ? 'text-green-500' : 'text-red-500'
let statusIcon = inRag ? '✓' : '❌'
const chunks = getRegulationChunks(reg.code)
const ratio = chunks / (reg.expected * 10) // Rough estimate: 10 chunks per requirement
let statusColor = 'text-red-500'
let statusIcon = '❌'
if (ratio > 0.5) {
statusColor = 'text-green-500'
statusIcon = '✓'
} else if (ratio > 0.1) {
statusColor = 'text-yellow-500'
statusIcon = '⚠'
}
const isExpanded = expandedRegulation === reg.code
return (
@@ -2358,13 +2036,6 @@ export default function RAGPage() {
onClick={() => setExpandedRegulation(isExpanded ? null : reg.code)}
className="hover:bg-slate-50 cursor-pointer transition-colors"
>
<td className="px-4 py-3 text-center">
{isInRag(reg.code) ? (
<span className="inline-flex items-center justify-center w-6 h-6 bg-green-100 text-green-600 rounded-full text-xs font-bold" title="Im RAG vorhanden"></span>
) : (
<span className="inline-flex items-center justify-center w-6 h-6 bg-red-50 text-red-400 rounded-full text-xs font-bold" title="Nicht im RAG"></span>
)}
</td>
<td className="px-4 py-3 font-mono font-medium text-teal-600">
<span className="inline-flex items-center gap-2">
<span className={`transform transition-transform ${isExpanded ? 'rotate-90' : ''}`}></span>
@@ -2377,20 +2048,13 @@ export default function RAGPage() {
</span>
</td>
<td className="px-4 py-3 text-slate-900">{reg.name}</td>
<td className="px-4 py-3 text-right font-bold">
<span className={chunks > 0 && chunks < 10 && reg.expected >= 10 ? 'text-amber-600' : ''}>
{chunks.toLocaleString()}
{chunks > 0 && chunks < 10 && reg.expected >= 10 && (
<span className="ml-1 inline-block w-4 h-4 text-[10px] leading-4 text-center bg-amber-100 text-amber-700 rounded-full" title="Verdaechtig niedrig — Ingestion pruefen"></span>
)}
</span>
</td>
<td className="px-4 py-3 text-right font-bold">{chunks.toLocaleString()}</td>
<td className="px-4 py-3 text-right text-slate-500">{reg.expected}</td>
<td className={`px-4 py-3 text-center ${statusColor}`}>{statusIcon}</td>
</tr>
{isExpanded && (
<tr key={`${reg.code}-detail`} className="bg-slate-50">
<td colSpan={7} className="px-4 py-4">
<td colSpan={6} className="px-4 py-4">
<div className="bg-white rounded-lg border border-slate-200 p-4 space-y-3">
<div>
<h4 className="font-semibold text-slate-900 mb-1">{reg.fullName}</h4>
@@ -2430,28 +2094,16 @@ export default function RAGPage() {
</span>
)}
</div>
<div className="flex items-center gap-3">
{REGULATION_SOURCES[reg.code] && (
<a
href={REGULATION_SOURCES[reg.code]}
target="_blank"
rel="noopener noreferrer"
onClick={(e) => e.stopPropagation()}
className="text-blue-600 hover:text-blue-700 font-medium"
>
Originalquelle
</a>
)}
<button
onClick={(e) => {
e.stopPropagation()
setActiveTab('chunks')
}}
className="text-teal-600 hover:text-teal-700 font-medium"
>
In Chunks suchen
</button>
</div>
<button
onClick={(e) => {
e.stopPropagation()
setSearchQuery(reg.name)
setActiveTab('search')
}}
className="text-teal-600 hover:text-teal-700 font-medium"
>
In Chunks suchen
</button>
</div>
</div>
</td>
@@ -2580,7 +2232,7 @@ export default function RAGPage() {
<div className="grid grid-cols-3 gap-4 mb-4">
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
<p className="text-sm text-emerald-600 font-medium">Chunks</p>
<p className="text-2xl font-bold text-slate-900">7.996</p>
<p className="text-2xl font-bold text-slate-900">28.662</p>
</div>
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
<p className="text-sm text-emerald-600 font-medium">Vector Size</p>
@@ -2612,7 +2264,7 @@ export default function RAGPage() {
<div className="grid grid-cols-3 gap-4 mb-4">
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
<p className="text-sm text-orange-600 font-medium">Chunks</p>
<p className="text-2xl font-bold text-slate-900">7.689</p>
<p className="text-2xl font-bold text-slate-900">824</p>
</div>
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
<p className="text-sm text-orange-600 font-medium">Vector Size</p>
@@ -2680,28 +2332,20 @@ export default function RAGPage() {
</div>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
{regs.map((reg) => {
const regInRag = isInRag(reg.code)
return (
{regs.map((reg) => (
<div
key={reg.code}
className={`bg-white p-3 rounded-lg border ${regInRag ? 'border-green-200' : 'border-slate-200'}`}
className="bg-white p-3 rounded-lg border border-slate-200"
>
<div className="flex items-center gap-2 mb-1">
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
{reg.code}
</span>
{regInRag ? (
<span className="px-1.5 py-0.5 text-[10px] font-bold bg-green-100 text-green-600 rounded">RAG</span>
) : (
<span className="px-1.5 py-0.5 text-[10px] font-bold bg-red-50 text-red-400 rounded"></span>
)}
</div>
<div className="font-medium text-sm text-slate-900">{reg.name}</div>
<div className="text-xs text-slate-500 mt-1 line-clamp-2">{reg.description}</div>
</div>
)
})}
))}
</div>
</>
)
@@ -2728,22 +2372,17 @@ export default function RAGPage() {
<div className="flex flex-wrap gap-2">
{group.regulations.map((code) => {
const reg = REGULATIONS.find(r => r.code === code)
const codeInRag = isInRag(code)
return (
<span
key={code}
className={`px-3 py-1.5 rounded-full text-sm font-medium cursor-pointer ${
codeInRag
? 'bg-green-100 text-green-700 hover:bg-green-200'
: 'bg-slate-100 text-slate-700 hover:bg-slate-200'
}`}
className="px-3 py-1.5 bg-slate-100 rounded-full text-sm font-medium text-slate-700 hover:bg-slate-200 cursor-pointer"
onClick={() => {
setActiveTab('regulations')
setExpandedRegulation(code)
}}
title={`${reg?.fullName || code}${codeInRag ? ' (im RAG)' : ' (nicht im RAG)'}`}
title={reg?.fullName || code}
>
{codeInRag ? '✓ ' : '✗ '}{code}
{code}
</span>
)
})}
@@ -2767,13 +2406,9 @@ export default function RAGPage() {
{intersection.regulations.map((code) => (
<span
key={code}
className={`px-2 py-0.5 text-xs font-medium rounded ${
isInRag(code)
? 'bg-green-100 text-green-700'
: 'bg-red-50 text-red-500'
}`}
className="px-2 py-0.5 text-xs font-medium bg-teal-100 text-teal-700 rounded"
>
{isInRag(code) ? '✓ ' : '✗ '}{code}
{code}
</span>
))}
</div>
@@ -2808,15 +2443,8 @@ export default function RAGPage() {
<tbody className="divide-y">
{REGULATIONS.map((reg) => (
<tr key={reg.code} className="hover:bg-slate-50">
<td className="px-2 py-2 font-medium sticky left-0 bg-white">
<span className="flex items-center gap-1">
{isInRag(reg.code) ? (
<span className="text-green-500 text-[10px]"></span>
) : (
<span className="text-red-300 text-[10px]"></span>
)}
<span className="text-teal-600">{reg.code}</span>
</span>
<td className="px-2 py-2 font-medium text-teal-600 sticky left-0 bg-white">
{reg.code}
</td>
{INDUSTRIES.filter(i => i.id !== 'all').map((industry) => {
const applies = INDUSTRY_REGULATION_MAP[industry.id]?.includes(reg.code)
@@ -2903,32 +2531,26 @@ export default function RAGPage() {
</div>
</div>
{/* RAG Coverage Overview */}
{/* Integrated Regulations */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<div className="flex items-center gap-3 mb-4">
<span className="text-2xl"></span>
<div>
<h3 className="font-semibold text-slate-900">RAG-Abdeckung ({Object.keys(REGULATIONS_IN_RAG).length} von {REGULATIONS.length} Regulierungen)</h3>
<p className="text-sm text-slate-500">Stand: Maerz 2026 Alle im RAG-System verfuegbaren Regulierungen (inkl. Verbraucherschutz Phase H)</p>
<h3 className="font-semibold text-slate-900">Neu integrierte Regulierungen</h3>
<p className="text-sm text-slate-500">Jetzt im RAG-System verfuegbar (Stand: Januar 2025)</p>
</div>
</div>
<div className="flex flex-wrap gap-2">
{REGULATIONS.filter(r => isInRag(r.code)).map((reg) => (
<span key={reg.code} className="px-2.5 py-1 text-xs font-medium bg-green-100 text-green-700 rounded-full border border-green-200">
{reg.code}
</span>
))}
</div>
<div className="mt-4 pt-4 border-t border-slate-100">
<p className="text-xs font-medium text-slate-500 mb-2">Noch nicht im RAG:</p>
<div className="flex flex-wrap gap-2">
{REGULATIONS.filter(r => !isInRag(r.code)).map((reg) => (
<span key={reg.code} className="px-2.5 py-1 text-xs font-medium bg-red-50 text-red-400 rounded-full border border-red-100">
{reg.code}
<div className="grid grid-cols-2 md:grid-cols-5 gap-3">
{INTEGRATED_REGULATIONS.map((reg) => (
<div key={reg.code} className="rounded-lg border border-green-200 bg-green-50 p-3 text-center">
<span className="px-2 py-1 text-sm font-bold bg-green-100 text-green-700 rounded">
{reg.code}
</span>
))}
</div>
<p className="text-xs text-slate-600 mt-2">{reg.name}</p>
<p className="text-xs text-green-600 mt-1">Im RAG</p>
</div>
))}
</div>
</div>
@@ -3092,10 +2714,6 @@ export default function RAGPage() {
</div>
)}
{activeTab === 'chunks' && (
<ChunkBrowserQA apiProxy={API_PROXY} />
)}
{activeTab === 'data' && (
<div className="space-y-6">
{/* Upload Document */}
@@ -3281,7 +2899,7 @@ export default function RAGPage() {
<span className="flex items-center gap-2 text-teal-600">
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
Ingestion laeuft...
</span>
@@ -3351,7 +2969,7 @@ export default function RAGPage() {
{pipelineStarting ? (
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
) : (
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
@@ -3370,7 +2988,7 @@ export default function RAGPage() {
{pipelineLoading ? (
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
) : (
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
@@ -3403,7 +3021,7 @@ export default function RAGPage() {
<>
<svg className="animate-spin h-5 w-5" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
Startet...
</>
@@ -3440,7 +3058,7 @@ export default function RAGPage() {
{pipelineState.status === 'running' && (
<svg className="w-6 h-6 text-blue-600 animate-spin" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
)}
{pipelineState.status === 'failed' && (

View File

@@ -1,414 +0,0 @@
/**
* Shared RAG constants used by both page.tsx and ChunkBrowserQA.
* REGULATIONS_IN_RAG maps regulation codes to their Qdrant collection, chunk count, and qdrant_id.
* The qdrant_id is the actual `regulation_id` value stored in Qdrant payloads.
* REGULATION_INFO provides minimal metadata (code, name, type) for all regulations.
*/
export interface RagRegulationEntry {
collection: string
chunks: number
qdrant_id: string // The actual regulation_id value in Qdrant payload
}
export const REGULATIONS_IN_RAG: Record<string, RagRegulationEntry> = {
// === EU Verordnungen/Richtlinien (bp_compliance_ce) ===
GDPR: { collection: 'bp_compliance_ce', chunks: 423, qdrant_id: 'eu_2016_679' },
EPRIVACY: { collection: 'bp_compliance_ce', chunks: 134, qdrant_id: 'eu_2002_58' },
SCC: { collection: 'bp_compliance_ce', chunks: 330, qdrant_id: 'eu_2021_914' },
SCC_FULL_TEXT: { collection: 'bp_compliance_ce', chunks: 330, qdrant_id: 'eu_2021_914' },
AIACT: { collection: 'bp_compliance_ce', chunks: 726, qdrant_id: 'eu_2024_1689' },
CRA: { collection: 'bp_compliance_ce', chunks: 429, qdrant_id: 'eu_2024_2847' },
NIS2: { collection: 'bp_compliance_ce', chunks: 342, qdrant_id: 'eu_2022_2555' },
DGA: { collection: 'bp_compliance_ce', chunks: 508, qdrant_id: 'eu_2022_868' },
DSA: { collection: 'bp_compliance_ce', chunks: 1106, qdrant_id: 'eu_2022_2065' },
PLD: { collection: 'bp_compliance_ce', chunks: 44, qdrant_id: 'eu_1985_374' },
E_COMMERCE_RL: { collection: 'bp_compliance_ce', chunks: 197, qdrant_id: 'eu_2000_31' },
VERBRAUCHERRECHTE_RL: { collection: 'bp_compliance_ce', chunks: 266, qdrant_id: 'eu_2011_83' },
DIGITALE_INHALTE_RL: { collection: 'bp_compliance_ce', chunks: 321, qdrant_id: 'eu_2019_770' },
// Verbraucherschutz EU-Richtlinien (Phase H2 Ingestion)
WARENKAUF_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'sgd' },
KLAUSEL_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'uctd' },
UNLAUTERE_PRAKTIKEN_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'ucpd' },
PREISANGABEN_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'pid' },
OMNIBUS_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'omn' },
BATTERIE_VO: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'battvo' },
DMA: { collection: 'bp_compliance_ce', chunks: 701, qdrant_id: 'eu_2022_1925' },
DPF: { collection: 'bp_compliance_ce', chunks: 2464, qdrant_id: 'dpf' },
EUCSA: { collection: 'bp_compliance_ce', chunks: 558, qdrant_id: 'eucsa' },
DATAACT: { collection: 'bp_compliance_ce', chunks: 809, qdrant_id: 'dataact' },
DORA: { collection: 'bp_compliance_ce', chunks: 823, qdrant_id: 'dora' },
PSD2: { collection: 'bp_compliance_ce', chunks: 796, qdrant_id: 'psd2' },
AMLR: { collection: 'bp_compliance_ce', chunks: 1182, qdrant_id: 'amlr' },
MiCA: { collection: 'bp_compliance_ce', chunks: 1640, qdrant_id: 'mica' },
EHDS: { collection: 'bp_compliance_ce', chunks: 1212, qdrant_id: 'ehds' },
EAA: { collection: 'bp_compliance_ce', chunks: 433, qdrant_id: 'eaa' },
DSM: { collection: 'bp_compliance_ce', chunks: 416, qdrant_id: 'dsm' },
GPSR: { collection: 'bp_compliance_ce', chunks: 509, qdrant_id: 'gpsr' },
MACHINERY_REG: { collection: 'bp_compliance_ce', chunks: 1271, qdrant_id: 'eu_2023_1230' },
BLUE_GUIDE: { collection: 'bp_compliance_ce', chunks: 2271, qdrant_id: 'eu_blue_guide_2022' },
EU_IFRS_DE: { collection: 'bp_compliance_ce', chunks: 34388, qdrant_id: 'eu_2023_1803' },
EU_IFRS_EN: { collection: 'bp_compliance_ce', chunks: 34388, qdrant_id: 'eu_2023_1803' },
// International standards in bp_compliance_ce
NIST_SSDF: { collection: 'bp_compliance_ce', chunks: 111, qdrant_id: 'nist_sp_800_218' },
NIST_CSF_2: { collection: 'bp_compliance_ce', chunks: 67, qdrant_id: 'nist_csf_2_0' },
OECD_AI_PRINCIPLES: { collection: 'bp_compliance_ce', chunks: 34, qdrant_id: 'oecd_ai_principles' },
ENISA_SECURE_BY_DESIGN: { collection: 'bp_compliance_ce', chunks: 97, qdrant_id: 'cisa_secure_by_design' },
ENISA_SUPPLY_CHAIN: { collection: 'bp_compliance_ce', chunks: 110, qdrant_id: 'enisa_supply_chain_good_practices' },
ENISA_THREAT_LANDSCAPE: { collection: 'bp_compliance_ce', chunks: 118, qdrant_id: 'enisa_threat_landscape_supply_chain' },
ENISA_ICS_SCADA: { collection: 'bp_compliance_ce', chunks: 195, qdrant_id: 'enisa_ics_scada_dependencies' },
ENISA_CYBERSECURITY_2024: { collection: 'bp_compliance_ce', chunks: 22, qdrant_id: 'enisa_cybersecurity_state_2024' },
// === DE Gesetze (bp_compliance_gesetze) ===
TDDDG: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'tdddg_25' },
TMG_KOMPLETT: { collection: 'bp_compliance_gesetze', chunks: 108, qdrant_id: 'tmg_komplett' },
BDSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 1056, qdrant_id: 'bdsg_2018_komplett' },
DE_DDG: { collection: 'bp_compliance_gesetze', chunks: 40, qdrant_id: 'ddg_5' },
DE_BGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 4024, qdrant_id: 'bgb_komplett' },
DE_EGBGB: { collection: 'bp_compliance_gesetze', chunks: 36, qdrant_id: 'egbgb_widerruf' },
DE_HGB_RET: { collection: 'bp_compliance_gesetze', chunks: 11363, qdrant_id: 'hgb_komplett' },
DE_AO_RET: { collection: 'bp_compliance_gesetze', chunks: 9669, qdrant_id: 'ao_komplett' },
DE_TKG: { collection: 'bp_compliance_gesetze', chunks: 1631, qdrant_id: 'de_tkg' },
DE_DLINFOV: { collection: 'bp_compliance_gesetze', chunks: 21, qdrant_id: 'de_dlinfov' },
DE_BETRVG: { collection: 'bp_compliance_gesetze', chunks: 498, qdrant_id: 'de_betrvg' },
DE_GESCHGEHG: { collection: 'bp_compliance_gesetze', chunks: 63, qdrant_id: 'de_geschgehg' },
DE_USTG_RET: { collection: 'bp_compliance_gesetze', chunks: 1071, qdrant_id: 'de_ustg_ret' },
DE_URHG: { collection: 'bp_compliance_gesetze', chunks: 626, qdrant_id: 'urhg_komplett' },
// === DE Verbraucherschutz-Gesetze (bp_compliance_gesetze) — Phase H1 (Run #701) ===
DE_PANGV: { collection: 'bp_compliance_gesetze', chunks: 99, qdrant_id: 'pangv' },
DE_VSBG: { collection: 'bp_compliance_gesetze', chunks: 113, qdrant_id: 'vsbg' },
DE_PRODHAFTG: { collection: 'bp_compliance_gesetze', chunks: 26, qdrant_id: 'prodhaftg' },
DE_VERPACKG: { collection: 'bp_compliance_gesetze', chunks: 338, qdrant_id: 'verpackg' },
DE_ELEKTROG: { collection: 'bp_compliance_gesetze', chunks: 344, qdrant_id: 'elektrog' },
DE_BATTDG: { collection: 'bp_compliance_gesetze', chunks: 307, qdrant_id: 'battdg' },
DE_BFSG: { collection: 'bp_compliance_gesetze', chunks: 221, qdrant_id: 'bfsg' },
DE_UWG: { collection: 'bp_compliance_gesetze', chunks: 157, qdrant_id: 'uwg' },
DE_GEWO: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'gewo' }, // Pending: Re-run noetig (Timeout)
// BGB in Teilen (statt 2.7MB komplett)
DE_BGB_AGB_305: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_agb' }, // §§ 305-310
DE_BGB_FERNABSATZ: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_fernabsatz' }, // §§ 312-312k
DE_BGB_KAUFRECHT: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_kaufrecht' }, // §§ 433-480
DE_BGB_WIDERRUF: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_widerruf' }, // §§ 355-361
DE_BGB_DIGITAL: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_digital' }, // §§ 327-327u
DE_EGBGB_WIDERRUF: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'egbgb' }, // Muster-Widerrufsbelehrung
// === BSI Standards (bp_compliance_gesetze) ===
'BSI-TR-03161-1': { collection: 'bp_compliance_gesetze', chunks: 138, qdrant_id: 'bsi_tr_03161_1' },
'BSI-TR-03161-2': { collection: 'bp_compliance_gesetze', chunks: 124, qdrant_id: 'bsi_tr_03161_2' },
'BSI-TR-03161-3': { collection: 'bp_compliance_gesetze', chunks: 121, qdrant_id: 'bsi_tr_03161_3' },
// === AT Gesetze (bp_compliance_gesetze) ===
AT_DSG: { collection: 'bp_compliance_gesetze', chunks: 805, qdrant_id: 'at_dsg' },
AT_DSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 6, qdrant_id: 'at_dsg_full' },
AT_ECG: { collection: 'bp_compliance_gesetze', chunks: 120, qdrant_id: 'at_ecg' },
AT_TKG: { collection: 'bp_compliance_gesetze', chunks: 4348, qdrant_id: 'at_tkg' },
AT_KSCHG: { collection: 'bp_compliance_gesetze', chunks: 402, qdrant_id: 'at_kschg' },
AT_FAGG: { collection: 'bp_compliance_gesetze', chunks: 2, qdrant_id: 'at_fagg' },
AT_UGB_RET: { collection: 'bp_compliance_gesetze', chunks: 2828, qdrant_id: 'at_ugb_ret' },
AT_BAO_RET: { collection: 'bp_compliance_gesetze', chunks: 2246, qdrant_id: 'at_bao_ret' },
AT_MEDIENG: { collection: 'bp_compliance_gesetze', chunks: 571, qdrant_id: 'at_medieng' },
AT_ABGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 2521, qdrant_id: 'at_abgb_agb' },
AT_UWG: { collection: 'bp_compliance_gesetze', chunks: 403, qdrant_id: 'at_uwg' },
// === CH Gesetze (bp_compliance_gesetze) ===
CH_DSG: { collection: 'bp_compliance_gesetze', chunks: 180, qdrant_id: 'ch_revdsg' },
CH_DSV: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_dsv' },
CH_OR_AGB: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_or_agb' },
CH_GEBUV: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_gebuv' },
CH_ZERTES: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_zertes' },
CH_ZGB_PERS: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_zgb_pers' },
// === Nationale Gesetze (andere EU) in bp_compliance_gesetze ===
ES_LOPDGDD: { collection: 'bp_compliance_gesetze', chunks: 782, qdrant_id: 'es_lopdgdd' },
IT_CODICE_PRIVACY: { collection: 'bp_compliance_gesetze', chunks: 59, qdrant_id: 'it_codice_privacy' },
NL_UAVG: { collection: 'bp_compliance_gesetze', chunks: 523, qdrant_id: 'nl_uavg' },
FR_CNIL_GUIDE: { collection: 'bp_compliance_gesetze', chunks: 562, qdrant_id: 'fr_loi_informatique' },
IE_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 64, qdrant_id: 'ie_dpa_2018' },
UK_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 156, qdrant_id: 'uk_dpa_2018' },
UK_GDPR: { collection: 'bp_compliance_gesetze', chunks: 45, qdrant_id: 'uk_gdpr' },
NO_PERSONOPPLYSNINGSLOVEN: { collection: 'bp_compliance_gesetze', chunks: 41, qdrant_id: 'no_pol' },
SE_DATASKYDDSLAG: { collection: 'bp_compliance_gesetze', chunks: 56, qdrant_id: 'se_dataskyddslag' },
PL_UODO: { collection: 'bp_compliance_gesetze', chunks: 39, qdrant_id: 'pl_ustawa' },
CZ_ZOU: { collection: 'bp_compliance_gesetze', chunks: 238, qdrant_id: 'cz_zakon' },
HU_INFOTV: { collection: 'bp_compliance_gesetze', chunks: 747, qdrant_id: 'hu_info_tv' },
LU_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 2, qdrant_id: 'lu_dpa_law' },
// === EDPB Guidelines (bp_compliance_datenschutz) — alt (ingest-legal-corpus.sh) ===
EDPB_GUIDELINES_5_2020: { collection: 'bp_compliance_datenschutz', chunks: 236, qdrant_id: 'edpb_05_2020' },
EDPB_GUIDELINES_7_2020: { collection: 'bp_compliance_datenschutz', chunks: 347, qdrant_id: 'edpb_guidelines_7_2020' },
EDPB_GUIDELINES_1_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_01_2020' },
EDPB_GUIDELINES_1_2022: { collection: 'bp_compliance_datenschutz', chunks: 510, qdrant_id: 'edpb_01_2022' },
EDPB_GUIDELINES_2_2023: { collection: 'bp_compliance_datenschutz', chunks: 94, qdrant_id: 'edpb_02_2023' },
EDPB_GUIDELINES_2_2024: { collection: 'bp_compliance_datenschutz', chunks: 79, qdrant_id: 'edpb_02_2024' },
EDPB_GUIDELINES_4_2019: { collection: 'bp_compliance_datenschutz', chunks: 202, qdrant_id: 'edpb_04_2019' },
EDPB_GUIDELINES_9_2022: { collection: 'bp_compliance_datenschutz', chunks: 243, qdrant_id: 'edpb_09_2022' },
EDPB_DPIA_LIST: { collection: 'bp_compliance_datenschutz', chunks: 29, qdrant_id: 'edpb_dpia_list' },
EDPB_LEGITIMATE_INTEREST: { collection: 'bp_compliance_datenschutz', chunks: 672, qdrant_id: 'edpb_legitimate_interest' },
EDPS_DPIA_LIST: { collection: 'bp_compliance_datenschutz', chunks: 73, qdrant_id: 'edps_dpia_list' },
// === EDPB Guidelines (bp_compliance_datenschutz) — neu (edpb-crawler.py) ===
EDPB_ACCESS_01_2022: { collection: 'bp_compliance_datenschutz', chunks: 1020, qdrant_id: 'edpb_access_01_2022' },
EDPB_ARTICLE48_02_2024: { collection: 'bp_compliance_datenschutz', chunks: 158, qdrant_id: 'edpb_article48_02_2024' },
EDPB_BCR_01_2022: { collection: 'bp_compliance_datenschutz', chunks: 384, qdrant_id: 'edpb_bcr_01_2022' },
EDPB_BREACH_09_2022: { collection: 'bp_compliance_datenschutz', chunks: 486, qdrant_id: 'edpb_breach_09_2022' },
EDPB_CERTIFICATION_01_2018: { collection: 'bp_compliance_datenschutz', chunks: 160, qdrant_id: 'edpb_certification_01_2018' },
EDPB_CERTIFICATION_01_2019: { collection: 'bp_compliance_datenschutz', chunks: 160, qdrant_id: 'edpb_certification_01_2019' },
EDPB_CONNECTED_VEHICLES_01_2020: { collection: 'bp_compliance_datenschutz', chunks: 482, qdrant_id: 'edpb_connected_vehicles_01_2020' },
EDPB_CONSENT_05_2020: { collection: 'bp_compliance_datenschutz', chunks: 247, qdrant_id: 'edpb_consent_05_2020' },
EDPB_CONTROLLER_PROCESSOR_07_2020: { collection: 'bp_compliance_datenschutz', chunks: 694, qdrant_id: 'edpb_controller_processor_07_2020' },
EDPB_COOKIE_TASKFORCE_2023: { collection: 'bp_compliance_datenschutz', chunks: 78, qdrant_id: 'edpb_cookie_taskforce_2023' },
EDPB_DARK_PATTERNS_03_2022: { collection: 'bp_compliance_datenschutz', chunks: 413, qdrant_id: 'edpb_dark_patterns_03_2022' },
EDPB_DPBD_04_2019: { collection: 'bp_compliance_datenschutz', chunks: 216, qdrant_id: 'edpb_dpbd_04_2019' },
EDPB_DPIA_LIST_RECOMMENDATION: { collection: 'bp_compliance_datenschutz', chunks: 31, qdrant_id: 'edpb_dpia_list_recommendation' },
EDPB_EPRIVACY_02_2023: { collection: 'bp_compliance_datenschutz', chunks: 188, qdrant_id: 'edpb_eprivacy_02_2023' },
EDPB_FACIAL_RECOGNITION_05_2022: { collection: 'bp_compliance_datenschutz', chunks: 396, qdrant_id: 'edpb_facial_recognition_05_2022' },
EDPB_FINES_04_2022: { collection: 'bp_compliance_datenschutz', chunks: 346, qdrant_id: 'edpb_fines_04_2022' },
EDPB_GEOLOCATION_04_2020: { collection: 'bp_compliance_datenschutz', chunks: 108, qdrant_id: 'edpb_geolocation_04_2020' },
EDPB_GL_2_2019: { collection: 'bp_compliance_datenschutz', chunks: 107, qdrant_id: 'edpb_gl_2_2019' },
EDPB_HEALTH_DATA_03_2020: { collection: 'bp_compliance_datenschutz', chunks: 182, qdrant_id: 'edpb_health_data_03_2020' },
EDPB_LEGAL_BASIS_02_2019: { collection: 'bp_compliance_datenschutz', chunks: 107, qdrant_id: 'edpb_legal_basis_02_2019' },
EDPB_LEGITIMATE_INTEREST_01_2024: { collection: 'bp_compliance_datenschutz', chunks: 336, qdrant_id: 'edpb_legitimate_interest_01_2024' },
EDPB_RTBF_05_2019: { collection: 'bp_compliance_datenschutz', chunks: 111, qdrant_id: 'edpb_rtbf_05_2019' },
EDPB_RRO_09_2020: { collection: 'bp_compliance_datenschutz', chunks: 82, qdrant_id: 'edpb_rro_09_2020' },
EDPB_SOCIAL_MEDIA_08_2020: { collection: 'bp_compliance_datenschutz', chunks: 333, qdrant_id: 'edpb_social_media_08_2020' },
EDPB_TRANSFERS_01_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_transfers_01_2020' },
EDPB_TRANSFERS_07_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_transfers_07_2020' },
EDPB_VIDEO_03_2019: { collection: 'bp_compliance_datenschutz', chunks: 204, qdrant_id: 'edpb_video_03_2019' },
EDPB_VVA_02_2021: { collection: 'bp_compliance_datenschutz', chunks: 273, qdrant_id: 'edpb_vva_02_2021' },
// === EDPS Guidance (bp_compliance_datenschutz) ===
EDPS_DIGITAL_ETHICS_2018: { collection: 'bp_compliance_datenschutz', chunks: 404, qdrant_id: 'edps_digital_ethics_2018' },
EDPS_GENAI_ORIENTATIONS_2024: { collection: 'bp_compliance_datenschutz', chunks: 274, qdrant_id: 'edps_genai_orientations_2024' },
// === WP29 Endorsed (bp_compliance_datenschutz) ===
WP242_PORTABILITY: { collection: 'bp_compliance_datenschutz', chunks: 141, qdrant_id: 'wp242_portability' },
WP243_DPO: { collection: 'bp_compliance_datenschutz', chunks: 54, qdrant_id: 'wp243_dpo' },
WP244_PROFILING: { collection: 'bp_compliance_datenschutz', chunks: 247, qdrant_id: 'wp244_profiling' },
WP248_DPIA: { collection: 'bp_compliance_datenschutz', chunks: 288, qdrant_id: 'wp248_dpia' },
WP250_BREACH: { collection: 'bp_compliance_datenschutz', chunks: 201, qdrant_id: 'wp250_breach' },
WP259_CONSENT: { collection: 'bp_compliance_datenschutz', chunks: 496, qdrant_id: 'wp259_consent' },
WP260_TRANSPARENCY: { collection: 'bp_compliance_datenschutz', chunks: 558, qdrant_id: 'wp260_transparency' },
// === DSFA Muss-Listen (bp_dsfa_corpus) ===
DSFA_BFDI_BUND: { collection: 'bp_dsfa_corpus', chunks: 17, qdrant_id: 'dsfa_bfdi_bund' },
DSFA_DSK_GEMEINSAM: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_dsk_gemeinsam' },
DSFA_BW: { collection: 'bp_dsfa_corpus', chunks: 41, qdrant_id: 'dsfa_bw' },
DSFA_BY: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_by' },
DSFA_BE_OE: { collection: 'bp_dsfa_corpus', chunks: 31, qdrant_id: 'dsfa_be_oe' },
DSFA_BE_NOE: { collection: 'bp_dsfa_corpus', chunks: 48, qdrant_id: 'dsfa_be_noe' },
DSFA_BB_OE: { collection: 'bp_dsfa_corpus', chunks: 43, qdrant_id: 'dsfa_bb_oe' },
DSFA_BB_NOE: { collection: 'bp_dsfa_corpus', chunks: 53, qdrant_id: 'dsfa_bb_noe' },
DSFA_HB: { collection: 'bp_dsfa_corpus', chunks: 44, qdrant_id: 'dsfa_hb' },
DSFA_HH_OE: { collection: 'bp_dsfa_corpus', chunks: 58, qdrant_id: 'dsfa_hh_oe' },
DSFA_HH_NOE: { collection: 'bp_dsfa_corpus', chunks: 53, qdrant_id: 'dsfa_hh_noe' },
DSFA_MV: { collection: 'bp_dsfa_corpus', chunks: 32, qdrant_id: 'dsfa_mv' },
DSFA_NI: { collection: 'bp_dsfa_corpus', chunks: 47, qdrant_id: 'dsfa_ni' },
DSFA_RP: { collection: 'bp_dsfa_corpus', chunks: 25, qdrant_id: 'dsfa_rp' },
DSFA_SL: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_sl' },
DSFA_SN: { collection: 'bp_dsfa_corpus', chunks: 18, qdrant_id: 'dsfa_sn' },
DSFA_ST_OE: { collection: 'bp_dsfa_corpus', chunks: 57, qdrant_id: 'dsfa_st_oe' },
DSFA_ST_NOE: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_st_noe' },
DSFA_SH: { collection: 'bp_dsfa_corpus', chunks: 44, qdrant_id: 'dsfa_sh' },
DSFA_TH: { collection: 'bp_dsfa_corpus', chunks: 48, qdrant_id: 'dsfa_th' },
}
/**
* Minimal regulation info for sidebar display.
* Full REGULATIONS array with descriptions remains in page.tsx.
*/
export interface RegulationInfo {
code: string
name: string
type: string
}
export const REGULATION_INFO: RegulationInfo[] = [
// EU Verordnungen
{ code: 'GDPR', name: 'DSGVO', type: 'eu_regulation' },
{ code: 'EPRIVACY', name: 'ePrivacy-Richtlinie', type: 'eu_directive' },
{ code: 'SCC', name: 'Standardvertragsklauseln', type: 'eu_regulation' },
{ code: 'SCC_FULL_TEXT', name: 'SCC Volltext', type: 'eu_regulation' },
{ code: 'DPF', name: 'EU-US Data Privacy Framework', type: 'eu_regulation' },
{ code: 'AIACT', name: 'EU AI Act', type: 'eu_regulation' },
{ code: 'CRA', name: 'Cyber Resilience Act', type: 'eu_regulation' },
{ code: 'NIS2', name: 'NIS2-Richtlinie', type: 'eu_directive' },
{ code: 'EUCSA', name: 'EU Cybersecurity Act', type: 'eu_regulation' },
{ code: 'DATAACT', name: 'Data Act', type: 'eu_regulation' },
{ code: 'DGA', name: 'Data Governance Act', type: 'eu_regulation' },
{ code: 'DSA', name: 'Digital Services Act', type: 'eu_regulation' },
{ code: 'DMA', name: 'Digital Markets Act', type: 'eu_regulation' },
{ code: 'EAA', name: 'European Accessibility Act', type: 'eu_directive' },
{ code: 'DSM', name: 'DSM-Urheberrechtsrichtlinie', type: 'eu_directive' },
{ code: 'PLD', name: 'Produkthaftungsrichtlinie', type: 'eu_directive' },
{ code: 'GPSR', name: 'General Product Safety', type: 'eu_regulation' },
{ code: 'WARENKAUF_RL', name: 'Warenkauf-RL', type: 'eu_directive' },
{ code: 'KLAUSEL_RL', name: 'Klausel-RL', type: 'eu_directive' },
{ code: 'UNLAUTERE_PRAKTIKEN_RL', name: 'UGP-RL', type: 'eu_directive' },
{ code: 'PREISANGABEN_RL', name: 'Preisangaben-RL', type: 'eu_directive' },
{ code: 'OMNIBUS_RL', name: 'Omnibus-RL', type: 'eu_directive' },
{ code: 'BATTERIE_VO', name: 'Batterieverordnung', type: 'eu_regulation' },
{ code: 'E_COMMERCE_RL', name: 'E-Commerce-Richtlinie', type: 'eu_directive' },
{ code: 'VERBRAUCHERRECHTE_RL', name: 'Verbraucherrechte-RL', type: 'eu_directive' },
{ code: 'DIGITALE_INHALTE_RL', name: 'Digitale-Inhalte-RL', type: 'eu_directive' },
// Financial
{ code: 'DORA', name: 'DORA', type: 'eu_regulation' },
{ code: 'PSD2', name: 'PSD2', type: 'eu_directive' },
{ code: 'AMLR', name: 'AML-Verordnung', type: 'eu_regulation' },
{ code: 'MiCA', name: 'MiCA', type: 'eu_regulation' },
{ code: 'EHDS', name: 'EHDS', type: 'eu_regulation' },
{ code: 'MACHINERY_REG', name: 'Maschinenverordnung', type: 'eu_regulation' },
{ code: 'BLUE_GUIDE', name: 'Blue Guide', type: 'eu_regulation' },
{ code: 'EU_IFRS_DE', name: 'EU-IFRS (DE)', type: 'eu_regulation' },
{ code: 'EU_IFRS_EN', name: 'EU-IFRS (EN)', type: 'eu_regulation' },
// DE Gesetze
{ code: 'TDDDG', name: 'TDDDG', type: 'de_law' },
{ code: 'TMG_KOMPLETT', name: 'TMG', type: 'de_law' },
{ code: 'BDSG_FULL', name: 'BDSG', type: 'de_law' },
{ code: 'DE_DDG', name: 'DDG', type: 'de_law' },
{ code: 'DE_BGB_AGB', name: 'BGB/AGB', type: 'de_law' },
{ code: 'DE_EGBGB', name: 'EGBGB', type: 'de_law' },
{ code: 'DE_HGB_RET', name: 'HGB', type: 'de_law' },
{ code: 'DE_AO_RET', name: 'AO', type: 'de_law' },
{ code: 'DE_TKG', name: 'TKG', type: 'de_law' },
{ code: 'DE_DLINFOV', name: 'DL-InfoV', type: 'de_law' },
{ code: 'DE_BETRVG', name: 'BetrVG', type: 'de_law' },
{ code: 'DE_GESCHGEHG', name: 'GeschGehG', type: 'de_law' },
{ code: 'DE_USTG_RET', name: 'UStG', type: 'de_law' },
{ code: 'DE_URHG', name: 'UrhG', type: 'de_law' },
// DE Verbraucherschutz
{ code: 'DE_PANGV', name: 'PAngV', type: 'de_law' },
{ code: 'DE_VSBG', name: 'VSBG', type: 'de_law' },
{ code: 'DE_PRODHAFTG', name: 'ProdHaftG', type: 'de_law' },
{ code: 'DE_VERPACKG', name: 'VerpackG', type: 'de_law' },
{ code: 'DE_ELEKTROG', name: 'ElektroG', type: 'de_law' },
{ code: 'DE_BATTDG', name: 'BattDG', type: 'de_law' },
{ code: 'DE_BFSG', name: 'BFSG', type: 'de_law' },
{ code: 'DE_UWG', name: 'UWG', type: 'de_law' },
{ code: 'DE_GEWO', name: 'GewO', type: 'de_law' },
{ code: 'DE_BGB_AGB_305', name: 'BGB AGB-Recht §§305-310', type: 'de_law' },
{ code: 'DE_BGB_FERNABSATZ', name: 'BGB Fernabsatz §§312-312k', type: 'de_law' },
{ code: 'DE_BGB_KAUFRECHT', name: 'BGB Kaufrecht §§433-480', type: 'de_law' },
{ code: 'DE_BGB_WIDERRUF', name: 'BGB Widerruf §§355-361', type: 'de_law' },
{ code: 'DE_BGB_DIGITAL', name: 'BGB Digital §§327-327u', type: 'de_law' },
{ code: 'DE_EGBGB_WIDERRUF', name: 'EGBGB Widerrufsbelehrung', type: 'de_law' },
// BSI
{ code: 'BSI-TR-03161-1', name: 'BSI-TR Teil 1', type: 'bsi_standard' },
{ code: 'BSI-TR-03161-2', name: 'BSI-TR Teil 2', type: 'bsi_standard' },
{ code: 'BSI-TR-03161-3', name: 'BSI-TR Teil 3', type: 'bsi_standard' },
// AT
{ code: 'AT_DSG', name: 'DSG Oesterreich', type: 'at_law' },
{ code: 'AT_DSG_FULL', name: 'DSG Volltext', type: 'at_law' },
{ code: 'AT_ECG', name: 'ECG', type: 'at_law' },
{ code: 'AT_TKG', name: 'TKG AT', type: 'at_law' },
{ code: 'AT_KSCHG', name: 'KSchG', type: 'at_law' },
{ code: 'AT_FAGG', name: 'FAGG', type: 'at_law' },
{ code: 'AT_UGB_RET', name: 'UGB', type: 'at_law' },
{ code: 'AT_BAO_RET', name: 'BAO', type: 'at_law' },
{ code: 'AT_MEDIENG', name: 'MedienG', type: 'at_law' },
{ code: 'AT_ABGB_AGB', name: 'ABGB/AGB', type: 'at_law' },
{ code: 'AT_UWG', name: 'UWG AT', type: 'at_law' },
// CH
{ code: 'CH_DSG', name: 'DSG Schweiz', type: 'ch_law' },
{ code: 'CH_DSV', name: 'DSV', type: 'ch_law' },
{ code: 'CH_OR_AGB', name: 'OR/AGB', type: 'ch_law' },
{ code: 'CH_GEBUV', name: 'GeBuV', type: 'ch_law' },
{ code: 'CH_ZERTES', name: 'ZertES', type: 'ch_law' },
{ code: 'CH_ZGB_PERS', name: 'ZGB', type: 'ch_law' },
// Andere EU nationale
{ code: 'ES_LOPDGDD', name: 'LOPDGDD Spanien', type: 'national_law' },
{ code: 'IT_CODICE_PRIVACY', name: 'Codice Privacy Italien', type: 'national_law' },
{ code: 'NL_UAVG', name: 'UAVG Niederlande', type: 'national_law' },
{ code: 'FR_CNIL_GUIDE', name: 'CNIL Guide RGPD', type: 'national_law' },
{ code: 'IE_DPA_2018', name: 'DPA 2018 Ireland', type: 'national_law' },
{ code: 'UK_DPA_2018', name: 'DPA 2018 UK', type: 'national_law' },
{ code: 'UK_GDPR', name: 'UK GDPR', type: 'national_law' },
{ code: 'NO_PERSONOPPLYSNINGSLOVEN', name: 'Personopplysningsloven', type: 'national_law' },
{ code: 'SE_DATASKYDDSLAG', name: 'Dataskyddslag Schweden', type: 'national_law' },
{ code: 'PL_UODO', name: 'UODO Polen', type: 'national_law' },
{ code: 'CZ_ZOU', name: 'Zakon Tschechien', type: 'national_law' },
{ code: 'HU_INFOTV', name: 'Infotv. Ungarn', type: 'national_law' },
{ code: 'LU_DPA_LAW', name: 'Datenschutzgesetz Luxemburg', type: 'national_law' },
// EDPB Guidelines (alt)
{ code: 'EDPB_GUIDELINES_5_2020', name: 'EDPB GL Einwilligung', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_7_2020', name: 'EDPB GL C/P Konzepte', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_1_2020', name: 'EDPB GL Fahrzeuge', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_1_2022', name: 'EDPB GL Bussgelder', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_2_2023', name: 'EDPB GL Art. 37 Scope', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_2_2024', name: 'EDPB GL Art. 48', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_4_2019', name: 'EDPB GL Art. 25 DPbD', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_9_2022', name: 'EDPB GL Datenschutzverletzung', type: 'eu_guideline' },
{ code: 'EDPB_DPIA_LIST', name: 'EDPB DPIA-Liste', type: 'eu_guideline' },
{ code: 'EDPB_LEGITIMATE_INTEREST', name: 'EDPB Berecht. Interesse', type: 'eu_guideline' },
{ code: 'EDPS_DPIA_LIST', name: 'EDPS DPIA-Liste', type: 'eu_guideline' },
// EDPB Guidelines (neu — Crawler)
{ code: 'EDPB_ACCESS_01_2022', name: 'EDPB GL Auskunftsrecht', type: 'eu_guideline' },
{ code: 'EDPB_ARTICLE48_02_2024', name: 'EDPB GL Art. 48', type: 'eu_guideline' },
{ code: 'EDPB_BCR_01_2022', name: 'EDPB GL BCR', type: 'eu_guideline' },
{ code: 'EDPB_BREACH_09_2022', name: 'EDPB GL Datenpannen', type: 'eu_guideline' },
{ code: 'EDPB_CERTIFICATION_01_2018', name: 'EDPB GL Zertifizierung', type: 'eu_guideline' },
{ code: 'EDPB_CERTIFICATION_01_2019', name: 'EDPB GL Zertifizierung 2019', type: 'eu_guideline' },
{ code: 'EDPB_CONNECTED_VEHICLES_01_2020', name: 'EDPB GL Vernetzte Fahrzeuge', type: 'eu_guideline' },
{ code: 'EDPB_CONSENT_05_2020', name: 'EDPB GL Consent', type: 'eu_guideline' },
{ code: 'EDPB_CONTROLLER_PROCESSOR_07_2020', name: 'EDPB GL Verantwortliche/Auftragsverarbeiter', type: 'eu_guideline' },
{ code: 'EDPB_COOKIE_TASKFORCE_2023', name: 'EDPB Cookie-Banner Taskforce', type: 'eu_guideline' },
{ code: 'EDPB_DARK_PATTERNS_03_2022', name: 'EDPB GL Dark Patterns', type: 'eu_guideline' },
{ code: 'EDPB_DPBD_04_2019', name: 'EDPB GL Data Protection by Design', type: 'eu_guideline' },
{ code: 'EDPB_DPIA_LIST_RECOMMENDATION', name: 'EDPB DPIA-Empfehlung', type: 'eu_guideline' },
{ code: 'EDPB_EPRIVACY_02_2023', name: 'EDPB GL ePrivacy', type: 'eu_guideline' },
{ code: 'EDPB_FACIAL_RECOGNITION_05_2022', name: 'EDPB GL Gesichtserkennung', type: 'eu_guideline' },
{ code: 'EDPB_FINES_04_2022', name: 'EDPB GL Bussgeldberechnung', type: 'eu_guideline' },
{ code: 'EDPB_GEOLOCATION_04_2020', name: 'EDPB GL Geolokalisierung', type: 'eu_guideline' },
{ code: 'EDPB_GL_2_2019', name: 'EDPB GL Video-Ueberwachung', type: 'eu_guideline' },
{ code: 'EDPB_HEALTH_DATA_03_2020', name: 'EDPB GL Gesundheitsdaten', type: 'eu_guideline' },
{ code: 'EDPB_LEGAL_BASIS_02_2019', name: 'EDPB GL Rechtsgrundlage Art. 6(1)(b)', type: 'eu_guideline' },
{ code: 'EDPB_LEGITIMATE_INTEREST_01_2024', name: 'EDPB GL Berecht. Interesse 2024', type: 'eu_guideline' },
{ code: 'EDPB_RTBF_05_2019', name: 'EDPB GL Recht auf Vergessenwerden', type: 'eu_guideline' },
{ code: 'EDPB_RRO_09_2020', name: 'EDPB GL Relevant & Reasoned Objection', type: 'eu_guideline' },
{ code: 'EDPB_SOCIAL_MEDIA_08_2020', name: 'EDPB GL Social Media Targeting', type: 'eu_guideline' },
{ code: 'EDPB_TRANSFERS_01_2020', name: 'EDPB GL Uebermittlungen Art. 49', type: 'eu_guideline' },
{ code: 'EDPB_TRANSFERS_07_2020', name: 'EDPB GL Drittlandtransfers', type: 'eu_guideline' },
{ code: 'EDPB_VIDEO_03_2019', name: 'EDPB GL Videoueberwachung', type: 'eu_guideline' },
{ code: 'EDPB_VVA_02_2021', name: 'EDPB GL Virtuelle Sprachassistenten', type: 'eu_guideline' },
// EDPS
{ code: 'EDPS_DIGITAL_ETHICS_2018', name: 'EDPS Digitale Ethik', type: 'eu_guideline' },
{ code: 'EDPS_GENAI_ORIENTATIONS_2024', name: 'EDPS GenAI Orientierungen', type: 'eu_guideline' },
// WP29 Endorsed
{ code: 'WP242_PORTABILITY', name: 'WP242 Datenportabilitaet', type: 'wp29_endorsed' },
{ code: 'WP243_DPO', name: 'WP243 Datenschutzbeauftragter', type: 'wp29_endorsed' },
{ code: 'WP244_PROFILING', name: 'WP244 Profiling', type: 'wp29_endorsed' },
{ code: 'WP248_DPIA', name: 'WP248 DSFA', type: 'wp29_endorsed' },
{ code: 'WP250_BREACH', name: 'WP250 Datenpannen', type: 'wp29_endorsed' },
{ code: 'WP259_CONSENT', name: 'WP259 Einwilligung', type: 'wp29_endorsed' },
{ code: 'WP260_TRANSPARENCY', name: 'WP260 Transparenz', type: 'wp29_endorsed' },
// DSFA Muss-Listen
{ code: 'DSFA_BFDI_BUND', name: 'DSFA BfDI Bund', type: 'dsfa_mussliste' },
{ code: 'DSFA_DSK_GEMEINSAM', name: 'DSFA DSK Gemeinsam', type: 'dsfa_mussliste' },
{ code: 'DSFA_BW', name: 'DSFA Baden-Wuerttemberg', type: 'dsfa_mussliste' },
{ code: 'DSFA_BY', name: 'DSFA Bayern', type: 'dsfa_mussliste' },
{ code: 'DSFA_BE_OE', name: 'DSFA Berlin oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_BE_NOE', name: 'DSFA Berlin nicht-oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_BB_OE', name: 'DSFA Brandenburg oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_BB_NOE', name: 'DSFA Brandenburg nicht-oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_HB', name: 'DSFA Bremen', type: 'dsfa_mussliste' },
{ code: 'DSFA_HH_OE', name: 'DSFA Hamburg oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_HH_NOE', name: 'DSFA Hamburg nicht-oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_MV', name: 'DSFA Mecklenburg-Vorpommern', type: 'dsfa_mussliste' },
{ code: 'DSFA_NI', name: 'DSFA Niedersachsen', type: 'dsfa_mussliste' },
{ code: 'DSFA_RP', name: 'DSFA Rheinland-Pfalz', type: 'dsfa_mussliste' },
{ code: 'DSFA_SL', name: 'DSFA Saarland', type: 'dsfa_mussliste' },
{ code: 'DSFA_SN', name: 'DSFA Sachsen', type: 'dsfa_mussliste' },
{ code: 'DSFA_ST_OE', name: 'DSFA Sachsen-Anhalt oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_ST_NOE', name: 'DSFA Sachsen-Anhalt nicht-oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_SH', name: 'DSFA Schleswig-Holstein', type: 'dsfa_mussliste' },
{ code: 'DSFA_TH', name: 'DSFA Thueringen', type: 'dsfa_mussliste' },
// International Standards
{ code: 'NIST_SSDF', name: 'NIST SSDF', type: 'international_standard' },
{ code: 'NIST_CSF_2', name: 'NIST CSF 2.0', type: 'international_standard' },
{ code: 'OECD_AI_PRINCIPLES', name: 'OECD AI Principles', type: 'international_standard' },
{ code: 'ENISA_SECURE_BY_DESIGN', name: 'CISA Secure by Design', type: 'international_standard' },
{ code: 'ENISA_SUPPLY_CHAIN', name: 'ENISA Supply Chain', type: 'international_standard' },
{ code: 'ENISA_THREAT_LANDSCAPE', name: 'ENISA Threat Landscape', type: 'international_standard' },
{ code: 'ENISA_ICS_SCADA', name: 'ENISA ICS/SCADA', type: 'international_standard' },
{ code: 'ENISA_CYBERSECURITY_2024', name: 'ENISA Cybersecurity 2024', type: 'international_standard' },
]

View File

@@ -1430,6 +1430,7 @@ export default function TestQualityPage() {
databases: ['Qdrant', 'PostgreSQL'],
}}
relatedPages={[
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'Provider-Vergleich' },
{ name: 'GPU Infrastruktur', href: '/ai/gpu', description: 'GPU-Ressourcen verwalten' },
{ name: 'RAG Management', href: '/ai/rag', description: 'Training Data & RAG Pipelines' },
]}

View File

@@ -141,6 +141,7 @@ export default function VoiceMatrixPage() {
}}
relatedPages={[
{ name: 'Matrix & Jitsi', href: '/communication/matrix', description: 'Kommunikation Monitoring' },
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider vergleichen' },
{ name: 'GPU Infrastruktur', href: '/infrastructure/gpu', description: 'GPU fuer Voice-Service' },
]}
collapsible={true}

View File

@@ -24,6 +24,7 @@ export default function DevelopmentPage() {
}}
relatedPages={[
{ name: 'GPU Infrastruktur', href: '/infrastructure/gpu', description: 'GPU fuer Voice/Game' },
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'LLM fuer Voice/Game' },
]}
collapsible={true}
defaultCollapsed={false}

View File

@@ -149,6 +149,7 @@ const ADMIN_SCREENS: ScreenDefinition[] = [
{ id: 'admin-obligations', name: 'Pflichten', description: 'NIS2, DSGVO, AI Act', category: 'sdk', icon: '⚡', url: '/sdk/obligations' },
// === KI & AUTOMATISIERUNG (Teal #14b8a6) ===
{ id: 'admin-llm-compare', name: 'LLM Vergleich', description: 'KI-Provider Vergleich', category: 'ai', icon: '🤖', url: '/ai/llm-compare' },
{ id: 'admin-rag', name: 'Daten & RAG', description: 'Training Data & RAG', category: 'ai', icon: '🗄️', url: '/ai/rag' },
{ id: 'admin-ocr-labeling', name: 'OCR-Labeling', description: 'Handschrift-Training', category: 'ai', icon: '✍️', url: '/ai/ocr-labeling' },
{ id: 'admin-magic-help', name: 'Magic Help', description: 'TrOCR Handschrift-OCR', category: 'ai', icon: '🪄', url: '/ai/magic-help' },
@@ -195,6 +196,7 @@ const ADMIN_CONNECTIONS: ConnectionDef[] = [
{ source: 'admin-dashboard', target: 'admin-backlog', label: 'Go-Live' },
{ source: 'admin-dashboard', target: 'admin-compliance-hub', label: 'Compliance' },
{ source: 'admin-onboarding', target: 'admin-consent' },
{ source: 'admin-onboarding', target: 'admin-llm-compare' },
{ source: 'admin-rbac', target: 'admin-consent' },
// === DSGVO FLOW ===
@@ -222,6 +224,7 @@ const ADMIN_CONNECTIONS: ConnectionDef[] = [
{ source: 'admin-dsms', target: 'admin-compliance-workflow' },
// === KI & AUTOMATISIERUNG FLOW ===
{ source: 'admin-llm-compare', target: 'admin-rag', label: 'Daten' },
{ source: 'admin-rag', target: 'admin-quality' },
{ source: 'admin-rag', target: 'admin-agents' },
{ source: 'admin-ocr-labeling', target: 'admin-magic-help', label: 'Training' },

View File

@@ -0,0 +1,665 @@
'use client'
import { useState, useEffect } from 'react'
import {
GitBranch,
Terminal,
Server,
Database,
CheckCircle2,
ArrowRight,
Laptop,
HardDrive,
RefreshCw,
Clock,
Shield,
Users,
FileCode,
Play,
Eye,
Download,
AlertTriangle,
Info,
Container
} from 'lucide-react'
interface WorkflowStep {
id: number
title: string
description: string
command?: string
icon: React.ReactNode
location: 'macbook' | 'macmini'
}
interface BackupInfo {
lastRun: string | null
nextRun: string
status: 'ok' | 'warning' | 'error'
}
export default function WorkflowPage() {
const [activeStep, setActiveStep] = useState<number>(1)
const [backupInfo, setBackupInfo] = useState<BackupInfo>({
lastRun: null,
nextRun: '02:00 Uhr',
status: 'ok'
})
const workflowSteps: WorkflowStep[] = [
{
id: 1,
title: 'Code bearbeiten',
description: 'Arbeite mit Claude Code im Terminal. Beschreibe was du brauchst und Claude schreibt den Code.',
command: 'claude',
icon: <Terminal className="h-6 w-6" />,
location: 'macbook'
},
{
id: 2,
title: 'Änderungen stagen',
description: 'Füge die geänderten Dateien zum nächsten Commit hinzu.',
command: 'git add <dateien>',
icon: <FileCode className="h-6 w-6" />,
location: 'macbook'
},
{
id: 3,
title: 'Commit erstellen',
description: 'Erstelle einen Commit mit einer aussagekräftigen Nachricht.',
command: 'git commit -m "feat: neue Funktion"',
icon: <GitBranch className="h-6 w-6" />,
location: 'macbook'
},
{
id: 4,
title: 'Push zum Server',
description: 'Sende die Änderungen an den Mac Mini. Dies startet automatisch die CI/CD Pipeline.',
command: 'git push origin main',
icon: <ArrowRight className="h-6 w-6" />,
location: 'macbook'
},
{
id: 5,
title: 'CI/CD Pipeline',
description: 'Woodpecker führt automatisch Tests aus und baut die Container.',
command: '(automatisch)',
icon: <RefreshCw className="h-6 w-6" />,
location: 'macmini'
},
{
id: 6,
title: 'Integration Tests',
description: 'Docker Compose Test-Umgebung mit Backend, DB und Consent-Service fuer vollstaendige E2E-Tests.',
command: 'docker compose -f docker-compose.test.yml up -d',
icon: <Container className="h-6 w-6" />,
location: 'macmini'
},
{
id: 7,
title: 'Frontend testen',
description: 'Teste die Änderungen im Browser auf dem Mac Mini.',
command: 'http://macmini:3000',
icon: <Eye className="h-6 w-6" />,
location: 'macbook'
}
]
const services = [
{ name: 'Website', url: 'http://macmini:3000', port: 3000, status: 'running' },
{ name: 'Admin v2', url: 'http://macmini:3002', port: 3002, status: 'running' },
{ name: 'Studio v2', url: 'http://macmini:3001', port: 3001, status: 'running' },
{ name: 'Backend', url: 'http://macmini:8000', port: 8000, status: 'running' },
{ name: 'Gitea', url: 'http://macmini:3003', port: 3003, status: 'running' },
{ name: 'Klausur-Service', url: 'http://macmini:8086', port: 8086, status: 'running' },
]
const commitTypes = [
{ type: 'feat:', description: 'Neue Funktion', example: 'feat: add user login' },
{ type: 'fix:', description: 'Bugfix', example: 'fix: resolve login timeout' },
{ type: 'docs:', description: 'Dokumentation', example: 'docs: update API docs' },
{ type: 'style:', description: 'Formatierung', example: 'style: fix indentation' },
{ type: 'refactor:', description: 'Code-Umbau', example: 'refactor: extract helper' },
{ type: 'test:', description: 'Tests', example: 'test: add unit tests' },
{ type: 'chore:', description: 'Wartung', example: 'chore: update deps' },
]
return (
<div className="space-y-8">
{/* Header */}
<div className="bg-gradient-to-r from-indigo-600 to-purple-600 rounded-2xl p-8 text-white">
<h1 className="text-3xl font-bold mb-2">Entwicklungs-Workflow</h1>
<p className="text-indigo-100">
Wie wir bei BreakPilot entwickeln - von der Idee bis zum Deployment
</p>
</div>
{/* Architecture Overview */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
<Server className="h-5 w-5 text-indigo-600" />
Systemarchitektur
</h2>
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
{/* MacBook */}
<div className="bg-slate-50 rounded-xl p-5 border-2 border-slate-200">
<div className="flex items-center gap-3 mb-4">
<div className="p-2 bg-blue-100 rounded-lg">
<Laptop className="h-6 w-6 text-blue-600" />
</div>
<div>
<h3 className="font-semibold text-slate-900">MacBook (Entwicklung)</h3>
<p className="text-sm text-slate-500">Dein Arbeitsplatz</p>
</div>
</div>
<ul className="space-y-2 text-sm">
<li className="flex items-center gap-2">
<CheckCircle2 className="h-4 w-4 text-green-500" />
<span>Terminal + Claude Code</span>
</li>
<li className="flex items-center gap-2">
<CheckCircle2 className="h-4 w-4 text-green-500" />
<span>Lokales Git Repository</span>
</li>
<li className="flex items-center gap-2">
<CheckCircle2 className="h-4 w-4 text-green-500" />
<span>Browser für Frontend-Tests</span>
</li>
<li className="flex items-center gap-2">
<AlertTriangle className="h-4 w-4 text-amber-500" />
<span>Backup manuell (MacBook nachts aus)</span>
</li>
</ul>
</div>
{/* Mac Mini */}
<div className="bg-slate-50 rounded-xl p-5 border-2 border-indigo-200">
<div className="flex items-center gap-3 mb-4">
<div className="p-2 bg-indigo-100 rounded-lg">
<HardDrive className="h-6 w-6 text-indigo-600" />
</div>
<div>
<h3 className="font-semibold text-slate-900">Mac Mini (Server)</h3>
<p className="text-sm text-slate-500">192.168.178.100</p>
</div>
</div>
<ul className="space-y-2 text-sm">
<li className="flex items-center gap-2">
<CheckCircle2 className="h-4 w-4 text-green-500" />
<span>Gitea (Git Server)</span>
</li>
<li className="flex items-center gap-2">
<CheckCircle2 className="h-4 w-4 text-green-500" />
<span>Woodpecker (CI/CD)</span>
</li>
<li className="flex items-center gap-2">
<CheckCircle2 className="h-4 w-4 text-green-500" />
<span>Docker Container (alle Services)</span>
</li>
<li className="flex items-center gap-2">
<CheckCircle2 className="h-4 w-4 text-green-500" />
<span>PostgreSQL Datenbank</span>
</li>
<li className="flex items-center gap-2">
<CheckCircle2 className="h-4 w-4 text-green-500" />
<span>Automatisches Backup (02:00 Uhr lokal)</span>
</li>
</ul>
</div>
</div>
</div>
{/* Workflow Steps */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h2 className="text-xl font-semibold text-slate-900 mb-6 flex items-center gap-2">
<Play className="h-5 w-5 text-indigo-600" />
Entwicklungs-Schritte
</h2>
<div className="space-y-4">
{workflowSteps.map((step, index) => (
<div
key={step.id}
className={`relative flex items-start gap-4 p-4 rounded-xl transition-all cursor-pointer ${
activeStep === step.id
? 'bg-indigo-50 border-2 border-indigo-300'
: 'bg-slate-50 border-2 border-transparent hover:border-slate-200'
}`}
onClick={() => setActiveStep(step.id)}
>
{/* Step Number */}
<div className={`flex-shrink-0 w-10 h-10 rounded-full flex items-center justify-center font-bold ${
activeStep === step.id
? 'bg-indigo-600 text-white'
: 'bg-slate-200 text-slate-600'
}`}>
{step.id}
</div>
{/* Content */}
<div className="flex-grow">
<div className="flex items-center gap-2 mb-1">
<h3 className="font-semibold text-slate-900">{step.title}</h3>
<span className={`text-xs px-2 py-0.5 rounded-full ${
step.location === 'macbook'
? 'bg-blue-100 text-blue-700'
: 'bg-purple-100 text-purple-700'
}`}>
{step.location === 'macbook' ? 'MacBook' : 'Mac Mini'}
</span>
</div>
<p className="text-sm text-slate-600 mb-2">{step.description}</p>
{step.command && (
<code className="text-xs bg-slate-800 text-green-400 px-3 py-1.5 rounded-lg font-mono">
{step.command}
</code>
)}
</div>
{/* Icon */}
<div className={`flex-shrink-0 p-2 rounded-lg ${
activeStep === step.id ? 'bg-indigo-100 text-indigo-600' : 'bg-slate-100 text-slate-400'
}`}>
{step.icon}
</div>
{/* Connector Line */}
{index < workflowSteps.length - 1 && (
<div className="absolute left-9 top-14 w-0.5 h-8 bg-slate-200" />
)}
</div>
))}
</div>
</div>
{/* Services & URLs */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
<Eye className="h-5 w-5 text-indigo-600" />
Services & URLs zum Testen
</h2>
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
{services.map((service) => (
<a
key={service.name}
href={service.url}
target="_blank"
rel="noopener noreferrer"
className="flex items-center justify-between p-4 bg-slate-50 rounded-lg hover:bg-slate-100 transition-colors border border-slate-200"
>
<div>
<h3 className="font-medium text-slate-900">{service.name}</h3>
<p className="text-sm text-slate-500">Port {service.port}</p>
</div>
<div className="flex items-center gap-2">
<span className="w-2 h-2 bg-green-500 rounded-full animate-pulse" />
<ArrowRight className="h-4 w-4 text-slate-400" />
</div>
</a>
))}
</div>
</div>
{/* Commit Convention */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
<GitBranch className="h-5 w-5 text-indigo-600" />
Commit-Konventionen
</h2>
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-3">
{commitTypes.map((item) => (
<div key={item.type} className="bg-slate-50 rounded-lg p-3 border border-slate-200">
<code className="text-sm font-bold text-indigo-600">{item.type}</code>
<p className="text-sm text-slate-600 mt-1">{item.description}</p>
<p className="text-xs text-slate-400 mt-1 font-mono">{item.example}</p>
</div>
))}
</div>
</div>
{/* Backup Info */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
<Shield className="h-5 w-5 text-indigo-600" />
Backup & Sicherheit
</h2>
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
{/* Mac Mini - Automatisches lokales Backup */}
<div className="bg-green-50 rounded-xl p-5 border border-green-200">
<div className="flex items-center gap-3 mb-3">
<Clock className="h-5 w-5 text-green-600" />
<h3 className="font-semibold text-green-900">Mac Mini (Auto)</h3>
</div>
<ul className="space-y-2 text-sm text-green-800">
<li> Automatisch um 02:00 Uhr</li>
<li> PostgreSQL-Dump lokal</li>
<li> Git Repository gesichert</li>
<li> 7 Tage Aufbewahrung</li>
</ul>
<div className="mt-4 p-3 bg-green-100 rounded-lg">
<code className="text-xs text-green-700 font-mono">
~/Projekte/backup-logs/
</code>
</div>
</div>
{/* MacBook - Manuelles Backup */}
<div className="bg-amber-50 rounded-xl p-5 border border-amber-200">
<div className="flex items-center gap-3 mb-3">
<AlertTriangle className="h-5 w-5 text-amber-600" />
<h3 className="font-semibold text-amber-900">MacBook (Manuell)</h3>
</div>
<ul className="space-y-2 text-sm text-amber-800">
<li> MacBook nachts aus (02:00)</li>
<li> Keine Auto-Synchronisation</li>
<li> Backup manuell anstoßen</li>
</ul>
<div className="mt-4 p-3 bg-amber-100 rounded-lg">
<code className="text-xs text-amber-700 font-mono">
rsync -avz macmini:~/Projekte/ ~/Projekte/
</code>
</div>
</div>
{/* Manuelles Backup starten */}
<div className="bg-blue-50 rounded-xl p-5 border border-blue-200">
<div className="flex items-center gap-3 mb-3">
<Download className="h-5 w-5 text-blue-600" />
<h3 className="font-semibold text-blue-900">Backup Script</h3>
</div>
<p className="text-sm text-blue-800 mb-3">
Backup jederzeit manuell starten:
</p>
<code className="block text-xs bg-slate-800 text-green-400 p-3 rounded-lg font-mono">
~/Projekte/breakpilot-pwa/scripts/daily-backup.sh
</code>
</div>
</div>
</div>
{/* Quick Commands */}
<div className="bg-slate-800 rounded-xl p-6 text-white">
<h2 className="text-xl font-semibold mb-4 flex items-center gap-2">
<Terminal className="h-5 w-5 text-green-400" />
Wichtige Befehle
</h2>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4 font-mono text-sm">
<div className="bg-slate-900 rounded-lg p-4">
<p className="text-slate-400 mb-2"># CI/CD Logs ansehen</p>
<code className="text-green-400">ssh macmini &quot;docker logs breakpilot-pwa-backend --tail 50&quot;</code>
</div>
<div className="bg-slate-900 rounded-lg p-4">
<p className="text-slate-400 mb-2"># Container neu starten</p>
<code className="text-green-400">ssh macmini &quot;docker compose restart backend&quot;</code>
</div>
<div className="bg-slate-900 rounded-lg p-4">
<p className="text-slate-400 mb-2"># Alle Container Status</p>
<code className="text-green-400">ssh macmini &quot;docker ps&quot;</code>
</div>
<div className="bg-slate-900 rounded-lg p-4">
<p className="text-slate-400 mb-2"># Pipeline Status (Gitea)</p>
<code className="text-green-400">open http://macmini:3003</code>
</div>
</div>
</div>
{/* Team Workflow with Feature Branches */}
<div className="bg-indigo-50 rounded-xl border border-indigo-200 p-6">
<h2 className="text-xl font-semibold text-indigo-900 mb-4 flex items-center gap-2">
<GitBranch className="h-5 w-5 text-indigo-600" />
Team-Workflow (3+ Entwickler)
</h2>
<div className="bg-white rounded-xl p-5 mb-4">
<h3 className="font-semibold text-slate-900 mb-3">Feature Branch Workflow</h3>
<div className="flex flex-wrap items-center gap-2 text-sm">
<code className="bg-slate-100 px-2 py-1 rounded">main</code>
<ArrowRight className="h-4 w-4 text-slate-400" />
<code className="bg-blue-100 text-blue-700 px-2 py-1 rounded">feature/neue-funktion</code>
<ArrowRight className="h-4 w-4 text-slate-400" />
<span className="text-slate-600">Entwicklung</span>
<ArrowRight className="h-4 w-4 text-slate-400" />
<span className="bg-purple-100 text-purple-700 px-2 py-1 rounded">Pull Request</span>
<ArrowRight className="h-4 w-4 text-slate-400" />
<span className="bg-green-100 text-green-700 px-2 py-1 rounded">Code Review</span>
<ArrowRight className="h-4 w-4 text-slate-400" />
<code className="bg-slate-100 px-2 py-1 rounded">main</code>
</div>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="bg-white rounded-lg p-4 border border-indigo-100">
<h4 className="font-medium text-slate-900 mb-2">1. Feature Branch erstellen</h4>
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
git checkout -b feature/mein-feature
</code>
</div>
<div className="bg-white rounded-lg p-4 border border-indigo-100">
<h4 className="font-medium text-slate-900 mb-2">2. Änderungen committen</h4>
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
git commit -m &quot;feat: beschreibung&quot;
</code>
</div>
<div className="bg-white rounded-lg p-4 border border-indigo-100">
<h4 className="font-medium text-slate-900 mb-2">3. Branch pushen</h4>
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
git push -u origin feature/mein-feature
</code>
</div>
<div className="bg-white rounded-lg p-4 border border-indigo-100">
<h4 className="font-medium text-slate-900 mb-2">4. Pull Request in Gitea</h4>
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
http://macmini:3003 → Pull Request
</code>
</div>
</div>
<div className="mt-4 p-4 bg-indigo-100 rounded-lg">
<h4 className="font-medium text-indigo-900 mb-2">Branch-Namenskonvention</h4>
<div className="grid grid-cols-2 md:grid-cols-4 gap-2 text-sm">
<div><code className="text-indigo-700">feature/</code> Neue Funktion</div>
<div><code className="text-indigo-700">fix/</code> Bugfix</div>
<div><code className="text-indigo-700">hotfix/</code> Dringender Fix</div>
<div><code className="text-indigo-700">refactor/</code> Code-Umbau</div>
</div>
</div>
</div>
{/* Team Rules */}
<div className="bg-amber-50 rounded-xl border border-amber-200 p-6">
<h2 className="text-xl font-semibold text-amber-900 mb-4 flex items-center gap-2">
<Users className="h-5 w-5 text-amber-600" />
Team-Regeln
</h2>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="flex items-start gap-3">
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
<div>
<h3 className="font-medium text-slate-900">Feature Branches nutzen</h3>
<p className="text-sm text-slate-600">Nie direkt auf main pushen - immer über Pull Request</p>
</div>
</div>
<div className="flex items-start gap-3">
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
<div>
<h3 className="font-medium text-slate-900">Code Review erforderlich</h3>
<p className="text-sm text-slate-600">Mindestens 1 Approval vor dem Merge</p>
</div>
</div>
<div className="flex items-start gap-3">
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
<div>
<h3 className="font-medium text-slate-900">Tests müssen grün sein</h3>
<p className="text-sm text-slate-600">CI/CD Pipeline muss erfolgreich durchlaufen</p>
</div>
</div>
<div className="flex items-start gap-3">
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
<div>
<h3 className="font-medium text-slate-900">Aussagekräftige Commits</h3>
<p className="text-sm text-slate-600">Nutze Conventional Commits (feat:, fix:, etc.)</p>
</div>
</div>
<div className="flex items-start gap-3">
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
<div>
<h3 className="font-medium text-slate-900">Branch aktuell halten</h3>
<p className="text-sm text-slate-600">Regelmäßig main in deinen Branch mergen</p>
</div>
</div>
<div className="flex items-start gap-3">
<AlertTriangle className="h-5 w-5 text-amber-600 flex-shrink-0 mt-0.5" />
<div>
<h3 className="font-medium text-slate-900">Nie Force-Push auf main</h3>
<p className="text-sm text-slate-600">Geschichte von main nie überschreiben</p>
</div>
</div>
</div>
</div>
{/* CI/CD Infrastruktur - Automatisierte OAuth Integration */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
<Shield className="h-5 w-5 text-indigo-600" />
CI/CD Infrastruktur (Automatisiert)
</h2>
<div className="bg-blue-50 rounded-xl p-4 mb-6 border border-blue-200">
<div className="flex items-start gap-3">
<Info className="h-5 w-5 text-blue-600 flex-shrink-0 mt-0.5" />
<div>
<h4 className="font-medium text-blue-900">Warum automatisiert?</h4>
<p className="text-sm text-blue-800 mt-1">
Die OAuth-Integration zwischen Woodpecker und Gitea ist vollautomatisiert.
Dies ist eine DevSecOps Best Practice: Credentials werden in HashiCorp Vault gespeichert
und können bei Bedarf automatisch regeneriert werden.
</p>
</div>
</div>
</div>
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
{/* Architektur */}
<div className="bg-slate-50 rounded-xl p-5 border border-slate-200">
<h3 className="font-semibold text-slate-900 mb-3">Architektur</h3>
<div className="space-y-3 text-sm">
<div className="flex items-center gap-3 p-2 bg-white rounded-lg border">
<div className="w-3 h-3 bg-green-500 rounded-full" />
<span className="font-medium">Gitea</span>
<span className="text-slate-500">Port 3003</span>
<span className="text-xs text-slate-400 ml-auto">Git Server</span>
</div>
<div className="flex items-center justify-center">
<ArrowRight className="h-4 w-4 text-slate-400 rotate-90" />
<span className="text-xs text-slate-500 ml-2">OAuth 2.0</span>
</div>
<div className="flex items-center gap-3 p-2 bg-white rounded-lg border">
<div className="w-3 h-3 bg-blue-500 rounded-full" />
<span className="font-medium">Woodpecker</span>
<span className="text-slate-500">Port 8090</span>
<span className="text-xs text-slate-400 ml-auto">CI/CD Server</span>
</div>
<div className="flex items-center justify-center">
<ArrowRight className="h-4 w-4 text-slate-400 rotate-90" />
<span className="text-xs text-slate-500 ml-2">Credentials</span>
</div>
<div className="flex items-center gap-3 p-2 bg-white rounded-lg border">
<div className="w-3 h-3 bg-purple-500 rounded-full" />
<span className="font-medium">Vault</span>
<span className="text-slate-500">Port 8200</span>
<span className="text-xs text-slate-400 ml-auto">Secrets Manager</span>
</div>
</div>
</div>
{/* Credentials Speicherort */}
<div className="bg-slate-50 rounded-xl p-5 border border-slate-200">
<h3 className="font-semibold text-slate-900 mb-3">Credentials Speicherorte</h3>
<div className="space-y-3 text-sm">
<div className="p-3 bg-white rounded-lg border">
<div className="flex items-center gap-2 mb-1">
<Database className="h-4 w-4 text-purple-500" />
<span className="font-medium">HashiCorp Vault</span>
</div>
<code className="text-xs bg-slate-100 px-2 py-1 rounded">
secret/cicd/woodpecker
</code>
<p className="text-xs text-slate-500 mt-1">Client ID + Secret (Quelle der Wahrheit)</p>
</div>
<div className="p-3 bg-white rounded-lg border">
<div className="flex items-center gap-2 mb-1">
<FileCode className="h-4 w-4 text-blue-500" />
<span className="font-medium">.env Datei</span>
</div>
<code className="text-xs bg-slate-100 px-2 py-1 rounded">
WOODPECKER_GITEA_CLIENT/SECRET
</code>
<p className="text-xs text-slate-500 mt-1">Für Docker Compose (aus Vault geladen)</p>
</div>
<div className="p-3 bg-white rounded-lg border">
<div className="flex items-center gap-2 mb-1">
<Database className="h-4 w-4 text-green-500" />
<span className="font-medium">Gitea PostgreSQL</span>
</div>
<code className="text-xs bg-slate-100 px-2 py-1 rounded">
oauth2_application
</code>
<p className="text-xs text-slate-500 mt-1">OAuth App Registration (gehashtes Secret)</p>
</div>
</div>
</div>
</div>
{/* Troubleshooting */}
<div className="mt-6 bg-amber-50 rounded-xl p-5 border border-amber-200">
<h3 className="font-semibold text-amber-900 mb-3 flex items-center gap-2">
<AlertTriangle className="h-5 w-5 text-amber-600" />
Troubleshooting: OAuth Fehler beheben
</h3>
<p className="text-sm text-amber-800 mb-3">
Falls der Fehler &quot;Client ID not registered&quot; oder &quot;user does not exist&quot; auftritt:
</p>
<div className="bg-slate-800 rounded-lg p-4 font-mono text-sm">
<p className="text-slate-400"># Credentials automatisch regenerieren</p>
<p className="text-green-400">./scripts/sync-woodpecker-credentials.sh --regenerate</p>
<p className="text-slate-400 mt-2"># Oder manuell: Vault Gitea .env Restart</p>
<p className="text-green-400">rsync .env macmini:~/Projekte/breakpilot-pwa/</p>
<p className="text-green-400">ssh macmini &quot;cd ~/Projekte/breakpilot-pwa && docker compose up -d --force-recreate woodpecker-server&quot;</p>
</div>
</div>
</div>
{/* Team Members Info */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
<Users className="h-5 w-5 text-indigo-600" />
Team-Kommunikation
</h2>
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
<div className="bg-slate-50 rounded-lg p-4 text-center">
<div className="text-3xl mb-2">💬</div>
<h3 className="font-medium text-slate-900">Pull Request Kommentare</h3>
<p className="text-sm text-slate-600 mt-1">Code-Diskussionen im PR</p>
</div>
<div className="bg-slate-50 rounded-lg p-4 text-center">
<div className="text-3xl mb-2">📋</div>
<h3 className="font-medium text-slate-900">Issues in Gitea</h3>
<p className="text-sm text-slate-600 mt-1">Bugs & Features tracken</p>
</div>
<div className="bg-slate-50 rounded-lg p-4 text-center">
<div className="text-3xl mb-2">🔔</div>
<h3 className="font-medium text-slate-900">CI/CD Notifications</h3>
<p className="text-sm text-slate-600 mt-1">Pipeline-Status per Mail</p>
</div>
</div>
</div>
</div>
)
}

View File

@@ -177,6 +177,7 @@ export default function GPUInfrastructurePage() {
databases: ['PostgreSQL (Logs)'],
}}
relatedPages={[
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider testen' },
{ name: 'Security', href: '/infrastructure/security', description: 'DevSecOps Dashboard' },
{ name: 'Builds', href: '/infrastructure/builds', description: 'CI/CD Pipeline' },
]}

View File

@@ -51,9 +51,13 @@ const INFRASTRUCTURE_COMPONENTS: Component[] = [
// ===== DATABASES =====
{ type: 'service', name: 'PostgreSQL', version: '16-alpine', category: 'database', port: '5432', description: 'Hauptdatenbank', license: 'PostgreSQL', sourceUrl: 'https://github.com/postgres/postgres' },
{ type: 'service', name: 'Synapse PostgreSQL', version: '16-alpine', category: 'database', port: '-', description: 'Matrix Datenbank', license: 'PostgreSQL', sourceUrl: 'https://github.com/postgres/postgres' },
{ type: 'service', name: 'ERPNext MariaDB', version: '10.6', category: 'database', port: '-', description: 'ERPNext Datenbank', license: 'GPL-2.0', sourceUrl: 'https://github.com/MariaDB/server' },
{ type: 'service', name: 'MongoDB', version: '7.0', category: 'database', port: '27017', description: 'LibreChat Datenbank', license: 'SSPL-1.0', sourceUrl: 'https://github.com/mongodb/mongo' },
// ===== CACHE & QUEUE =====
{ type: 'service', name: 'Valkey', version: '8-alpine', category: 'cache', port: '6379', description: 'In-Memory Cache & Sessions (Redis OSS Fork)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/valkey-io/valkey' },
{ type: 'service', name: 'ERPNext Valkey Queue', version: 'alpine', category: 'cache', port: '-', description: 'Job Queue', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/valkey-io/valkey' },
{ type: 'service', name: 'ERPNext Valkey Cache', version: 'alpine', category: 'cache', port: '-', description: 'Cache Layer', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/valkey-io/valkey' },
// ===== SEARCH ENGINES =====
{ type: 'service', name: 'Qdrant', version: '1.7.4', category: 'search', port: '6333', description: 'Vector Database (RAG/Embeddings)', license: 'Apache-2.0', sourceUrl: 'https://github.com/qdrant/qdrant' },
@@ -62,6 +66,8 @@ const INFRASTRUCTURE_COMPONENTS: Component[] = [
// ===== OBJECT STORAGE =====
{ type: 'service', name: 'MinIO', version: 'latest', category: 'storage', port: '9000/9001', description: 'S3-kompatibel Object Storage', license: 'AGPL-3.0', sourceUrl: 'https://github.com/minio/minio' },
{ type: 'service', name: 'IPFS (Kubo)', version: '0.24', category: 'storage', port: '5001', description: 'Dezentrales Speichersystem', license: 'MIT/Apache-2.0', sourceUrl: 'https://github.com/ipfs/kubo' },
{ type: 'service', name: 'DSMS Gateway', version: '1.0', category: 'storage', port: '8082', description: 'IPFS REST API', license: 'Proprietary', sourceUrl: '-' },
// ===== SECURITY =====
{ type: 'service', name: 'HashiCorp Vault', version: '1.15', category: 'security', port: '8200', description: 'Secrets Management', license: 'BUSL-1.1', sourceUrl: 'https://github.com/hashicorp/vault' },
@@ -77,19 +83,36 @@ const INFRASTRUCTURE_COMPONENTS: Component[] = [
{ type: 'service', name: 'Jibri', version: 'stable-9823', category: 'communication', port: '-', description: 'Recording & Streaming Service', license: 'Apache-2.0', sourceUrl: 'https://github.com/jitsi/jibri' },
// ===== APPLICATION SERVICES (Python) =====
{ type: 'service', name: 'Python Backend (FastAPI)', version: '3.12', category: 'application', port: '8000', description: 'Lehrer Backend API (Klausuren, E-Mail, Alerts)', license: 'Proprietary', sourceUrl: '-' },
{ type: 'service', name: 'Python Backend (FastAPI)', version: '3.12', category: 'application', port: '8000', description: 'Haupt-Backend API, Studio & Alerts Agent', license: 'Proprietary', sourceUrl: '-' },
{ type: 'service', name: 'Klausur Service', version: '1.0', category: 'application', port: '8086', description: 'Abitur-Klausurkorrektur (BYOEH)', license: 'Proprietary', sourceUrl: '-' },
{ type: 'service', name: 'Compliance Module', version: '2.0', category: 'application', port: '8000', description: 'GRC Framework (19 Regulations, 558 Requirements, AI)', license: 'Proprietary', sourceUrl: '-' },
{ type: 'service', name: 'Transcription Worker', version: '1.0', category: 'application', port: '-', description: 'Whisper + pyannote Transkription', license: 'Proprietary', sourceUrl: '-' },
// ===== APPLICATION SERVICES (Go) =====
{ type: 'service', name: 'Go Consent Service', version: '1.21', category: 'application', port: '8081', description: 'DSGVO Consent Management', license: 'Proprietary', sourceUrl: '-' },
{ type: 'service', name: 'Go School Service', version: '1.21', category: 'application', port: '8084', description: 'Klausuren, Noten, Zeugnisse', license: 'Proprietary', sourceUrl: '-' },
{ type: 'service', name: 'Go Billing Service', version: '1.21', category: 'application', port: '8083', description: 'Stripe Billing Integration', license: 'Proprietary', sourceUrl: '-' },
// ===== APPLICATION SERVICES (Node.js) =====
{ type: 'service', name: 'Next.js Admin Frontend', version: '15.1', category: 'application', port: '3002', description: 'Admin Lehrer Dashboard (React)', license: 'Proprietary', sourceUrl: '-' },
{ type: 'service', name: 'Next.js Admin Frontend', version: '15.1', category: 'application', port: '3000', description: 'Admin Dashboard (React)', license: 'Proprietary', sourceUrl: '-' },
{ type: 'service', name: 'H5P Content Service', version: 'latest', category: 'application', port: '8085', description: 'Interaktive Inhalte', license: 'MIT', sourceUrl: 'https://github.com/h5p/h5p-server' },
{ type: 'service', name: 'Policy Vault (NestJS)', version: '1.0', category: 'application', port: '3001', description: 'Richtlinien-Verwaltung API', license: 'Proprietary', sourceUrl: '-' },
{ type: 'service', name: 'Policy Vault (Angular)', version: '17', category: 'application', port: '4200', description: 'Richtlinien-Verwaltung UI', license: 'Proprietary', sourceUrl: '-' },
// ===== APPLICATION SERVICES (Vue) =====
{ type: 'service', name: 'Creator Studio (Vue 3)', version: '3.4', category: 'application', port: '-', description: 'Content Creation UI', license: 'Proprietary', sourceUrl: '-' },
// ===== AI/LLM SERVICES =====
{ type: 'service', name: 'LibreChat', version: 'latest', category: 'ai', port: '3080', description: 'Multi-LLM Chat Interface', license: 'MIT', sourceUrl: 'https://github.com/danny-avila/LibreChat' },
{ type: 'service', name: 'RAGFlow', version: 'latest', category: 'ai', port: '9380', description: 'RAG Pipeline Service', license: 'Apache-2.0', sourceUrl: 'https://github.com/infiniflow/ragflow' },
// ===== ERP =====
{ type: 'service', name: 'ERPNext', version: 'v15', category: 'erp', port: '8090', description: 'Open Source ERP System', license: 'GPL-3.0', sourceUrl: 'https://github.com/frappe/erpnext' },
// ===== CI/CD & VERSION CONTROL =====
{ type: 'service', name: 'Woodpecker CI', version: '2.x', category: 'cicd', port: '8082', description: 'Self-hosted CI/CD Pipeline (Drone Fork)', license: 'Apache-2.0', sourceUrl: 'https://github.com/woodpecker-ci/woodpecker' },
{ type: 'service', name: 'Gitea', version: '1.21', category: 'cicd', port: '3003', description: 'Self-hosted Git Service', license: 'MIT', sourceUrl: 'https://github.com/go-gitea/gitea' },
{ type: 'service', name: 'Dokploy', version: '0.26.7', category: 'cicd', port: '3000', description: 'Self-hosted PaaS (Vercel/Heroku Alternative)', license: 'Apache-2.0', sourceUrl: 'https://github.com/Dokploy/dokploy' },
// ===== DEVELOPMENT =====
{ type: 'service', name: 'Mailpit', version: 'latest', category: 'development', port: '8025/1025', description: 'E-Mail Testing (SMTP Catch-All)', license: 'MIT', sourceUrl: 'https://github.com/axllent/mailpit' },
@@ -161,7 +184,10 @@ const PYTHON_PACKAGES: Component[] = [
{ type: 'library', name: 'structlog', version: '24.x', category: 'python', description: 'Structured Logging', license: 'Apache-2.0', sourceUrl: 'https://github.com/hynek/structlog' },
{ type: 'library', name: 'feedparser', version: '6.x', category: 'python', description: 'RSS/Atom Feed Parser (Alerts Agent)', license: 'BSD-2-Clause', sourceUrl: 'https://github.com/kurtmckee/feedparser' },
{ type: 'library', name: 'APScheduler', version: '3.x', category: 'python', description: 'AsyncIO Job Scheduler (Alerts Agent)', license: 'MIT', sourceUrl: 'https://github.com/agronholm/apscheduler' },
{ type: 'library', name: 'beautifulsoup4', version: '4.x', category: 'python', description: 'HTML Parser (Email Parsing)', license: 'MIT', sourceUrl: 'https://code.launchpad.net/beautifulsoup' },
{ type: 'library', name: 'beautifulsoup4', version: '4.x', category: 'python', description: 'HTML Parser (Email Parsing, Compliance Scraper)', license: 'MIT', sourceUrl: 'https://code.launchpad.net/beautifulsoup' },
{ type: 'library', name: 'lxml', version: '5.x', category: 'python', description: 'XML/HTML Parser (EUR-Lex Scraping)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/lxml/lxml' },
{ type: 'library', name: 'PyMuPDF', version: '1.24+', category: 'python', description: 'PDF Parser (BSI-TR Extraction)', license: 'AGPL-3.0', sourceUrl: 'https://github.com/pymupdf/PyMuPDF' },
{ type: 'library', name: 'pdfplumber', version: '0.11+', category: 'python', description: 'PDF Table Extraction (Compliance Docs)', license: 'MIT', sourceUrl: 'https://github.com/jsvine/pdfplumber' },
{ type: 'library', name: 'websockets', version: '14.x', category: 'python', description: 'WebSocket Support (Voice Streaming)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/python-websockets/websockets' },
{ type: 'library', name: 'soundfile', version: '0.13+', category: 'python', description: 'Audio File Processing (Voice Service)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/bastibe/python-soundfile' },
{ type: 'library', name: 'scipy', version: '1.14+', category: 'python', description: 'Signal Processing (Audio)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/scipy/scipy' },
@@ -174,8 +200,7 @@ const GO_MODULES: Component[] = [
{ type: 'library', name: 'gin-gonic/gin', version: '1.9+', category: 'go', description: 'Web Framework', license: 'MIT', sourceUrl: 'https://github.com/gin-gonic/gin' },
{ type: 'library', name: 'gorm.io/gorm', version: '1.25+', category: 'go', description: 'ORM', license: 'MIT', sourceUrl: 'https://github.com/go-gorm/gorm' },
{ type: 'library', name: 'golang-jwt/jwt', version: 'v5', category: 'go', description: 'JWT Library', license: 'MIT', sourceUrl: 'https://github.com/golang-jwt/jwt' },
{ type: 'library', name: 'opensearch-project/opensearch-go', version: '4.x', category: 'go', description: 'OpenSearch Client (edu-search-service)', license: 'Apache-2.0', sourceUrl: 'https://github.com/opensearch-project/opensearch-go' },
{ type: 'library', name: 'lib/pq', version: '1.10+', category: 'go', description: 'PostgreSQL Driver (school-service)', license: 'MIT', sourceUrl: 'https://github.com/lib/pq' },
{ type: 'library', name: 'stripe/stripe-go', version: 'v76', category: 'go', description: 'Stripe SDK', license: 'MIT', sourceUrl: 'https://github.com/stripe/stripe-go' },
{ type: 'library', name: 'spf13/viper', version: 'latest', category: 'go', description: 'Configuration', license: 'MIT', sourceUrl: 'https://github.com/spf13/viper' },
{ type: 'library', name: 'uber-go/zap', version: 'latest', category: 'go', description: 'Structured Logging', license: 'MIT', sourceUrl: 'https://github.com/uber-go/zap' },
{ type: 'library', name: 'swaggo/swag', version: 'latest', category: 'go', description: 'Swagger Docs', license: 'MIT', sourceUrl: 'https://github.com/swaggo/swag' },
@@ -185,10 +210,15 @@ const GO_MODULES: Component[] = [
const NODE_PACKAGES: Component[] = [
{ type: 'library', name: 'Next.js', version: '15.1', category: 'nodejs', description: 'React Framework', license: 'MIT', sourceUrl: 'https://github.com/vercel/next.js' },
{ type: 'library', name: 'React', version: '19', category: 'nodejs', description: 'UI Library', license: 'MIT', sourceUrl: 'https://github.com/facebook/react' },
{ type: 'library', name: 'Vue.js', version: '3.4', category: 'nodejs', description: 'UI Framework (Creator Studio)', license: 'MIT', sourceUrl: 'https://github.com/vuejs/core' },
{ type: 'library', name: 'Angular', version: '17', category: 'nodejs', description: 'UI Framework (Policy Vault)', license: 'MIT', sourceUrl: 'https://github.com/angular/angular' },
{ type: 'library', name: 'NestJS', version: '10', category: 'nodejs', description: 'Node.js Framework', license: 'MIT', sourceUrl: 'https://github.com/nestjs/nest' },
{ type: 'library', name: 'TypeScript', version: '5.x', category: 'nodejs', description: 'Type System', license: 'Apache-2.0', sourceUrl: 'https://github.com/microsoft/TypeScript' },
{ type: 'library', name: 'Tailwind CSS', version: '3.4', category: 'nodejs', description: 'Utility CSS', license: 'MIT', sourceUrl: 'https://github.com/tailwindlabs/tailwindcss' },
{ type: 'library', name: 'Prisma', version: '5.x', category: 'nodejs', description: 'ORM (Policy Vault)', license: 'Apache-2.0', sourceUrl: 'https://github.com/prisma/prisma' },
{ type: 'library', name: 'Material Design Icons', version: 'latest', category: 'nodejs', description: 'Icon-System (Companion UI, Studio)', license: 'Apache-2.0', sourceUrl: 'https://github.com/google/material-design-icons' },
{ type: 'library', name: 'Recharts', version: '2.12', category: 'nodejs', description: 'React Charts (Admin Dashboard)', license: 'MIT', sourceUrl: 'https://github.com/recharts/recharts' },
{ type: 'library', name: 'Recharts', version: '2.12', category: 'nodejs', description: 'React Charts (Compliance Dashboard)', license: 'MIT', sourceUrl: 'https://github.com/recharts/recharts' },
{ type: 'library', name: 'React Flow', version: '11.x', category: 'nodejs', description: 'Node-basierte Flow-Diagramme (Screen Flow)', license: 'MIT', sourceUrl: 'https://github.com/xyflow/xyflow' },
{ type: 'library', name: 'Playwright', version: '1.50', category: 'nodejs', description: 'E2E Testing Framework (SDK Tests)', license: 'Apache-2.0', sourceUrl: 'https://github.com/microsoft/playwright' },
{ type: 'library', name: 'Vitest', version: '4.x', category: 'nodejs', description: 'Unit Testing Framework', license: 'MIT', sourceUrl: 'https://github.com/vitest-dev/vitest' },
{ type: 'library', name: 'jsPDF', version: '4.x', category: 'nodejs', description: 'PDF Generation (SDK Export)', license: 'MIT', sourceUrl: 'https://github.com/parallax/jsPDF' },
@@ -327,7 +357,9 @@ export default function SBOMPage() {
case 'communication': return 'bg-yellow-100 text-yellow-800'
case 'storage': return 'bg-orange-100 text-orange-800'
case 'search': return 'bg-pink-100 text-pink-800'
case 'erp': return 'bg-indigo-100 text-indigo-800'
case 'cache': return 'bg-cyan-100 text-cyan-800'
case 'ai': return 'bg-violet-100 text-violet-800'
case 'development': return 'bg-gray-100 text-gray-800'
case 'cicd': return 'bg-orange-100 text-orange-800'
case 'python': return 'bg-emerald-100 text-emerald-800'
@@ -383,7 +415,7 @@ export default function SBOMPage() {
<div>
<PagePurpose
title="SBOM"
purpose="Software Bill of Materials - Alle Komponenten & Abhaengigkeiten der Breakpilot Lehrer-Plattform. Wichtig fuer Supply-Chain-Security, Compliance-Audits und Lizenz-Pruefung."
purpose="Software Bill of Materials - Alle Komponenten & Abhaengigkeiten der Breakpilot-Plattform. Wichtig fuer Supply-Chain-Security, Compliance-Audits und Lizenz-Pruefung."
audience={['DevOps', 'Compliance', 'Security', 'Auditoren']}
gdprArticles={['Art. 32 (Sicherheit der Verarbeitung)']}
architecture={{
@@ -622,7 +654,7 @@ export default function SBOMPage() {
const url = URL.createObjectURL(blob)
const a = document.createElement('a')
a.href = url
a.download = `breakpilot-lehrer-sbom-${new Date().toISOString().split('T')[0]}.json`
a.download = `breakpilot-sbom-${new Date().toISOString().split('T')[0]}.json`
a.click()
}}
className="px-4 py-2 bg-orange-600 text-white rounded-lg hover:bg-orange-700 transition-colors flex items-center gap-2"

View File

@@ -335,6 +335,7 @@ export default function RBACPage() {
}}
relatedPages={[
{ name: 'Audit Trail', href: '/sdk/audit-report', description: 'LLM-Operationen protokollieren' },
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider testen' },
]}
/>

View File

@@ -1,163 +0,0 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
/**
* Tests for Chunk-Browser logic:
* - Collection dropdown has all 10 collections
* - COLLECTION_TOTALS has expected keys
* - Text search highlighting logic
* - Pagination state management
*/
// Replicate the COMPLIANCE_COLLECTIONS from the dropdown
const COMPLIANCE_COLLECTIONS = [
'bp_compliance_gesetze',
'bp_compliance_ce',
'bp_compliance_datenschutz',
'bp_dsfa_corpus',
'bp_compliance_recht',
'bp_legal_templates',
'bp_compliance_gdpr',
'bp_compliance_schulrecht',
'bp_dsfa_templates',
'bp_dsfa_risks',
] as const
// Replicate COLLECTION_TOTALS from page.tsx
const COLLECTION_TOTALS: Record<string, number> = {
bp_compliance_gesetze: 58304,
bp_compliance_ce: 18183,
bp_legal_templates: 7689,
bp_compliance_datenschutz: 2448,
bp_dsfa_corpus: 7867,
bp_compliance_recht: 1425,
bp_nibis_eh: 7996,
total_legal: 76487,
total_all: 103912,
}
describe('Chunk-Browser Logic', () => {
describe('COMPLIANCE_COLLECTIONS', () => {
it('should have exactly 10 collections', () => {
expect(COMPLIANCE_COLLECTIONS).toHaveLength(10)
})
it('should include bp_compliance_ce for IFRS documents', () => {
expect(COMPLIANCE_COLLECTIONS).toContain('bp_compliance_ce')
})
it('should include bp_compliance_datenschutz for EFRAG/ENISA', () => {
expect(COMPLIANCE_COLLECTIONS).toContain('bp_compliance_datenschutz')
})
it('should include bp_compliance_gesetze as default', () => {
expect(COMPLIANCE_COLLECTIONS[0]).toBe('bp_compliance_gesetze')
})
it('should have all collection names starting with bp_', () => {
COMPLIANCE_COLLECTIONS.forEach((col) => {
expect(col).toMatch(/^bp_/)
})
})
})
describe('COLLECTION_TOTALS', () => {
it('should have bp_compliance_ce key', () => {
expect(COLLECTION_TOTALS).toHaveProperty('bp_compliance_ce')
})
it('should have bp_compliance_datenschutz key', () => {
expect(COLLECTION_TOTALS).toHaveProperty('bp_compliance_datenschutz')
})
it('should have positive counts for all collections', () => {
Object.values(COLLECTION_TOTALS).forEach((count) => {
expect(count).toBeGreaterThan(0)
})
})
it('total_all should be greater than total_legal', () => {
expect(COLLECTION_TOTALS.total_all).toBeGreaterThan(COLLECTION_TOTALS.total_legal)
})
})
describe('Text search filtering logic', () => {
const mockChunks = [
{ id: '1', text: 'DSGVO Artikel 1 Datenschutz', regulation_code: 'GDPR' },
{ id: '2', text: 'IFRS 16 Leasing Standard', regulation_code: 'EU_IFRS' },
{ id: '3', text: 'Datenschutz Grundverordnung', regulation_code: 'GDPR' },
{ id: '4', text: 'ENISA Supply Chain Security', regulation_code: 'ENISA' },
]
it('should filter chunks by text search (case insensitive)', () => {
const search = 'datenschutz'
const filtered = mockChunks.filter((c) =>
c.text.toLowerCase().includes(search.toLowerCase())
)
expect(filtered).toHaveLength(2)
})
it('should return all chunks when search is empty', () => {
const search = ''
const filtered = search
? mockChunks.filter((c) => c.text.toLowerCase().includes(search.toLowerCase()))
: mockChunks
expect(filtered).toHaveLength(4)
})
it('should return 0 chunks when no match', () => {
const search = 'blockchain'
const filtered = mockChunks.filter((c) =>
c.text.toLowerCase().includes(search.toLowerCase())
)
expect(filtered).toHaveLength(0)
})
it('should match IFRS chunks', () => {
const search = 'IFRS'
const filtered = mockChunks.filter((c) =>
c.text.toLowerCase().includes(search.toLowerCase())
)
expect(filtered).toHaveLength(1)
expect(filtered[0].regulation_code).toBe('EU_IFRS')
})
})
describe('Pagination state', () => {
it('should start at page 0', () => {
const currentPage = 0
expect(currentPage).toBe(0)
})
it('should increment page on next', () => {
let currentPage = 0
currentPage += 1
expect(currentPage).toBe(1)
})
it('should maintain offset history for back navigation', () => {
const history: (string | null)[] = []
history.push(null) // page 0 offset
history.push('uuid-20') // page 1 offset
history.push('uuid-40') // page 2 offset
// Go back to page 1
const prevOffset = history[history.length - 2]
expect(prevOffset).toBe('uuid-20')
})
it('should reset state on collection change', () => {
let chunkOffset: string | null = 'some-offset'
let chunkHistory: (string | null)[] = [null, 'uuid-1']
let chunkCurrentPage = 3
// Simulate collection change
chunkOffset = null
chunkHistory = []
chunkCurrentPage = 0
expect(chunkOffset).toBeNull()
expect(chunkHistory).toHaveLength(0)
expect(chunkCurrentPage).toBe(0)
})
})
})

View File

@@ -1,90 +0,0 @@
import { describe, it, expect } from 'vitest'
/**
* Tests for RAG page constants - REGULATIONS_IN_RAG, REGULATION_SOURCES, REGULATION_LICENSES
*
* These are defined inline in page.tsx, so we test the data structures
* by importing a subset of the expected values.
*/
// Expected IFRS entries in REGULATIONS_IN_RAG
const EXPECTED_IFRS_ENTRIES = {
EU_IFRS_DE: { collection: 'bp_compliance_ce', chunks: 0 },
EU_IFRS_EN: { collection: 'bp_compliance_ce', chunks: 0 },
EFRAG_ENDORSEMENT: { collection: 'bp_compliance_datenschutz', chunks: 0 },
}
// Expected REGULATION_SOURCES URLs
const EXPECTED_SOURCES = {
GDPR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32016R0679',
EU_IFRS_DE: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1803',
EU_IFRS_EN: 'https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32023R1803',
EFRAG_ENDORSEMENT: 'https://www.efrag.org/activities/endorsement-status-report',
ENISA_SECURE_DEV: 'https://www.enisa.europa.eu/publications/secure-development-best-practices',
NIST_SSDF: 'https://csrc.nist.gov/pubs/sp/800/218/final',
NIST_CSF: 'https://www.nist.gov/cyberframework',
OECD_AI: 'https://oecd.ai/en/ai-principles',
}
describe('RAG Page Constants', () => {
describe('IFRS entries in REGULATIONS_IN_RAG', () => {
it('should have EU_IFRS_DE entry with bp_compliance_ce collection', () => {
expect(EXPECTED_IFRS_ENTRIES.EU_IFRS_DE.collection).toBe('bp_compliance_ce')
})
it('should have EU_IFRS_EN entry with bp_compliance_ce collection', () => {
expect(EXPECTED_IFRS_ENTRIES.EU_IFRS_EN.collection).toBe('bp_compliance_ce')
})
it('should have EFRAG_ENDORSEMENT entry with bp_compliance_datenschutz collection', () => {
expect(EXPECTED_IFRS_ENTRIES.EFRAG_ENDORSEMENT.collection).toBe('bp_compliance_datenschutz')
})
})
describe('REGULATION_SOURCES URLs', () => {
it('should have valid EUR-Lex URLs for EU regulations', () => {
expect(EXPECTED_SOURCES.GDPR).toMatch(/^https:\/\/eur-lex\.europa\.eu/)
expect(EXPECTED_SOURCES.EU_IFRS_DE).toMatch(/^https:\/\/eur-lex\.europa\.eu/)
expect(EXPECTED_SOURCES.EU_IFRS_EN).toMatch(/^https:\/\/eur-lex\.europa\.eu/)
})
it('should have correct CELEX for IFRS DE (32023R1803)', () => {
expect(EXPECTED_SOURCES.EU_IFRS_DE).toContain('32023R1803')
})
it('should have correct CELEX for IFRS EN (32023R1803)', () => {
expect(EXPECTED_SOURCES.EU_IFRS_EN).toContain('32023R1803')
})
it('should have DE language for IFRS DE', () => {
expect(EXPECTED_SOURCES.EU_IFRS_DE).toContain('/DE/')
})
it('should have EN language for IFRS EN', () => {
expect(EXPECTED_SOURCES.EU_IFRS_EN).toContain('/EN/')
})
it('should have EFRAG URL for endorsement status', () => {
expect(EXPECTED_SOURCES.EFRAG_ENDORSEMENT).toMatch(/^https:\/\/www\.efrag\.org/)
})
it('should have ENISA URL for secure development', () => {
expect(EXPECTED_SOURCES.ENISA_SECURE_DEV).toMatch(/^https:\/\/www\.enisa\.europa\.eu/)
})
it('should have NIST URLs for SSDF and CSF', () => {
expect(EXPECTED_SOURCES.NIST_SSDF).toMatch(/nist\.gov/)
expect(EXPECTED_SOURCES.NIST_CSF).toMatch(/nist\.gov/)
})
it('should have OECD URL for AI principles', () => {
expect(EXPECTED_SOURCES.OECD_AI).toMatch(/oecd\.ai/)
})
it('should all be valid HTTPS URLs', () => {
Object.values(EXPECTED_SOURCES).forEach((url) => {
expect(url).toMatch(/^https:\/\//)
})
})
})
})

View File

@@ -1,249 +0,0 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
// Mock fetch globally
const mockFetch = vi.fn()
global.fetch = mockFetch
// Mock NextRequest and NextResponse
vi.mock('next/server', () => ({
NextRequest: class MockNextRequest {
url: string
constructor(url: string) {
this.url = url
}
},
NextResponse: {
json: (data: unknown, init?: { status?: number }) => ({
data,
status: init?.status || 200,
}),
},
}))
describe('Legal Corpus API Proxy', () => {
beforeEach(() => {
mockFetch.mockClear()
})
describe('scroll action', () => {
it('should call Qdrant scroll endpoint with correct collection', async () => {
const mockScrollResponse = {
result: {
points: [
{ id: 'uuid-1', payload: { text: 'DSGVO Artikel 1', regulation_code: 'GDPR' } },
{ id: 'uuid-2', payload: { text: 'DSGVO Artikel 2', regulation_code: 'GDPR' } },
],
next_page_offset: 'uuid-3',
},
}
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve(mockScrollResponse),
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&limit=20' }
const response = await GET(request as any)
expect(mockFetch).toHaveBeenCalledTimes(1)
const calledUrl = mockFetch.mock.calls[0][0]
expect(calledUrl).toContain('/collections/bp_compliance_ce/points/scroll')
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
expect(body.limit).toBe(20)
expect(body.with_payload).toBe(true)
expect(body.with_vector).toBe(false)
})
it('should pass offset parameter to Qdrant', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_gesetze&offset=some-uuid' }
await GET(request as any)
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
expect(body.offset).toBe('some-uuid')
})
it('should limit chunks to max 100', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&limit=500' }
await GET(request as any)
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
expect(body.limit).toBe(100)
})
it('should apply text_search filter client-side', async () => {
const mockScrollResponse = {
result: {
points: [
{ id: 'uuid-1', payload: { text: 'DSGVO Artikel 1 Datenschutz' } },
{ id: 'uuid-2', payload: { text: 'IFRS Standard 16 Leasing' } },
{ id: 'uuid-3', payload: { text: 'Datenschutz Grundverordnung' } },
],
next_page_offset: null,
},
}
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve(mockScrollResponse),
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&text_search=Datenschutz' }
const response = await GET(request as any)
// Should filter to only chunks containing "Datenschutz"
expect((response as any).data.chunks).toHaveLength(2)
expect((response as any).data.chunks[0].text).toContain('Datenschutz')
})
it('should flatten payload into chunk objects', async () => {
const mockScrollResponse = {
result: {
points: [
{
id: 'uuid-1',
payload: {
text: 'IFRS 16 Leasing',
regulation_code: 'EU_IFRS',
language: 'de',
celex: '32023R1803',
},
},
],
next_page_offset: null,
},
}
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve(mockScrollResponse),
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce' }
const response = await GET(request as any)
const chunk = (response as any).data.chunks[0]
expect(chunk.id).toBe('uuid-1')
expect(chunk.text).toBe('IFRS 16 Leasing')
expect(chunk.regulation_code).toBe('EU_IFRS')
expect(chunk.language).toBe('de')
})
it('should return next_offset from Qdrant response', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({
result: { points: [], next_page_offset: 'next-uuid' },
}),
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce' }
const response = await GET(request as any)
expect((response as any).data.next_offset).toBe('next-uuid')
})
it('should handle Qdrant scroll failure', async () => {
mockFetch.mockResolvedValueOnce({
ok: false,
status: 404,
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=nonexistent' }
const response = await GET(request as any)
expect((response as any).status).toBe(404)
})
it('should apply filter when filter_key and filter_value provided', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&filter_key=language&filter_value=de' }
await GET(request as any)
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
expect(body.filter).toEqual({
must: [{ key: 'language', match: { value: 'de' } }],
})
})
it('should default collection to bp_compliance_gesetze', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=scroll' }
await GET(request as any)
const calledUrl = mockFetch.mock.calls[0][0]
expect(calledUrl).toContain('/collections/bp_compliance_gesetze/')
})
})
describe('collection-count action', () => {
it('should return points_count from Qdrant collection info', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({
result: { points_count: 55053 },
}),
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=collection-count&collection=bp_compliance_ce' }
const response = await GET(request as any)
expect((response as any).data.count).toBe(55053)
})
it('should return 0 when Qdrant is unavailable', async () => {
mockFetch.mockResolvedValueOnce({
ok: false,
status: 500,
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=collection-count&collection=bp_compliance_ce' }
const response = await GET(request as any)
expect((response as any).data.count).toBe(0)
})
it('should default to bp_compliance_gesetze collection', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ result: { points_count: 1234 } }),
})
const { GET } = await import('../route')
const request = { url: 'http://localhost/api/legal-corpus?action=collection-count' }
await GET(request as any)
const calledUrl = mockFetch.mock.calls[0][0]
expect(calledUrl).toContain('/collections/bp_compliance_gesetze')
})
})
})

View File

@@ -66,99 +66,6 @@ export async function GET(request: NextRequest) {
url += `/traceability?chunk_id=${encodeURIComponent(chunkId || '')}&regulation=${encodeURIComponent(regulation || '')}`
break
}
case 'scroll': {
const collection = searchParams.get('collection') || 'bp_compliance_gesetze'
const limit = parseInt(searchParams.get('limit') || '20', 10)
const offsetParam = searchParams.get('offset')
const filterKey = searchParams.get('filter_key')
const filterValue = searchParams.get('filter_value')
const textSearch = searchParams.get('text_search')
const scrollBody: Record<string, unknown> = {
limit: Math.min(limit, 100),
with_payload: true,
with_vector: false,
}
if (offsetParam) {
scrollBody.offset = offsetParam
}
if (filterKey && filterValue) {
scrollBody.filter = {
must: [{ key: filterKey, match: { value: filterValue } }],
}
}
const scrollRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(collection)}/points/scroll`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(scrollBody),
cache: 'no-store',
})
if (!scrollRes.ok) {
return NextResponse.json({ error: 'Qdrant scroll failed' }, { status: scrollRes.status })
}
const scrollData = await scrollRes.json()
const points = (scrollData.result?.points || []).map((p: { id: string; payload?: Record<string, unknown> }) => ({
id: p.id,
...p.payload,
}))
// Client-side text search filter
let filtered = points
if (textSearch && textSearch.trim()) {
const term = textSearch.toLowerCase()
filtered = points.filter((p: Record<string, unknown>) => {
const text = String(p.text || p.content || p.chunk_text || '')
return text.toLowerCase().includes(term)
})
}
return NextResponse.json({
chunks: filtered,
next_offset: scrollData.result?.next_page_offset || null,
total_in_page: points.length,
})
}
case 'regulation-counts-batch': {
const col = searchParams.get('collection') || 'bp_compliance_gesetze'
// Accept qdrant_ids (actual regulation_id values in Qdrant payload)
const qdrantIds = (searchParams.get('qdrant_ids') || '').split(',').filter(Boolean)
const results: Record<string, number> = {}
for (let i = 0; i < qdrantIds.length; i += 10) {
const batch = qdrantIds.slice(i, i + 10)
await Promise.all(batch.map(async (qid) => {
try {
const res = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(col)}/points/count`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
filter: { must: [{ key: 'regulation_id', match: { value: qid } }] },
exact: true,
}),
cache: 'no-store',
})
if (res.ok) {
const data = await res.json()
results[qid] = data.result?.count || 0
}
} catch { /* skip failed counts */ }
}))
}
return NextResponse.json({ counts: results })
}
case 'collection-count': {
const col = searchParams.get('collection') || 'bp_compliance_gesetze'
const countRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(col)}`, {
cache: 'no-store',
})
if (!countRes.ok) {
return NextResponse.json({ count: 0 })
}
const countData = await countRes.json()
return NextResponse.json({
count: countData.result?.points_count || 0,
})
}
default:
return NextResponse.json({ error: 'Unknown action' }, { status: 400 })
}

View File

@@ -1,19 +1,8 @@
import type { Metadata } from 'next'
import localFont from 'next/font/local'
import { Noto_Sans } from 'next/font/google'
import { Inter } from 'next/font/google'
import './globals.css'
const inter = localFont({
src: '../public/fonts/Inter-VariableFont.woff2',
variable: '--font-inter',
display: 'swap',
})
const notoSans = Noto_Sans({
subsets: ['latin', 'latin-ext'],
variable: '--font-noto-sans',
display: 'swap',
})
const inter = Inter({ subsets: ['latin'] })
export const metadata: Metadata = {
title: 'BreakPilot Admin Lehrer KI',
@@ -27,7 +16,7 @@ export default function RootLayout({
}) {
return (
<html lang="de">
<body className={`${inter.className} ${notoSans.variable}`}>{children}</body>
<body className={inter.className}>{children}</body>
</html>
)
}

View File

@@ -14,7 +14,7 @@
import Link from 'next/link'
import { useState, useEffect } from 'react'
export type AIToolId = 'test-quality' | 'gpu' | 'ocr-compare' | 'ocr-labeling' | 'rag-pipeline' | 'magic-help'
export type AIToolId = 'llm-compare' | 'test-quality' | 'gpu' | 'ocr-compare' | 'ocr-labeling' | 'rag-pipeline' | 'magic-help'
export interface AIToolModule {
id: AIToolId
@@ -25,6 +25,13 @@ export interface AIToolModule {
}
export const AI_TOOLS_MODULES: AIToolModule[] = [
{
id: 'llm-compare',
name: 'LLM Vergleich',
href: '/ai/llm-compare',
description: 'KI-Provider vergleichen',
icon: '⚖️',
},
{
id: 'test-quality',
name: 'Test Quality (BQAS)',
@@ -86,6 +93,13 @@ export interface AIToolsSidebarResponsiveProps extends AIToolsSidebarProps {
// Icons für die Tools
const ToolIcon = ({ id }: { id: string }) => {
switch (id) {
case 'llm-compare':
return (
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
d="M3 6l3 1m0 0l-3 9a5.002 5.002 0 006.001 0M6 7l3 9M6 7l6-2m6 2l3-1m-3 1l-3 9a5.002 5.002 0 006.001 0M18 7l3 9m-3-9l-6-2m0-2v2m0 16V5m0 16H9m3 0h3" />
</svg>
)
case 'test-quality':
return (
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
@@ -214,6 +228,8 @@ export function AIToolsSidebar({
<div className="flex items-center gap-2 text-xs">
<span title="GPU Infrastruktur">🖥</span>
<span className="text-slate-400"></span>
<span title="LLM Vergleich"></span>
<span className="text-slate-400"></span>
<span title="Test Quality">🧪</span>
</div>
</div>
@@ -225,6 +241,9 @@ export function AIToolsSidebar({
{/* Quick Info zum aktuellen Tool */}
<div className="pt-2 border-t border-slate-200 dark:border-gray-700">
<div className="text-xs text-slate-500 dark:text-slate-400 px-1">
{currentTool === 'llm-compare' && (
<span>Vergleichen Sie LLM-Antworten verschiedener Provider</span>
)}
{currentTool === 'test-quality' && (
<span>Ueberwachen Sie die Qualitaet der KI-Ausgaben</span>
)}
@@ -368,6 +387,11 @@ export function AIToolsSidebarResponsive({
<span className="text-xs text-slate-500 mt-1">GPU</span>
</div>
<span className="text-slate-400"></span>
<div className="flex flex-col items-center">
<span className="text-2xl"></span>
<span className="text-xs text-slate-500 mt-1">LLM</span>
</div>
<span className="text-slate-400"></span>
<div className="flex flex-col items-center">
<span className="text-2xl">🧪</span>
<span className="text-xs text-slate-500 mt-1">BQAS</span>
@@ -381,6 +405,11 @@ export function AIToolsSidebarResponsive({
{/* Quick Info */}
<div className="pt-4 border-t border-slate-200 dark:border-gray-700">
<div className="text-sm text-slate-600 dark:text-slate-400 p-3 bg-slate-50 dark:bg-gray-800 rounded-xl">
{currentTool === 'llm-compare' && (
<>
<strong className="text-slate-700 dark:text-slate-300">Aktuell:</strong> LLM-Antworten verschiedener Provider vergleichen
</>
)}
{currentTool === 'test-quality' && (
<>
<strong className="text-slate-700 dark:text-slate-300">Aktuell:</strong> Qualitaet der KI-Ausgaben ueberwachen

View File

@@ -1,236 +0,0 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import { useGridEditor } from './useGridEditor'
import { GridToolbar } from './GridToolbar'
import { GridTable } from './GridTable'
import { GridImageOverlay } from './GridImageOverlay'
interface GridEditorProps {
sessionId: string | null
onNext?: () => void
}
export function GridEditor({ sessionId, onNext }: GridEditorProps) {
const {
grid,
loading,
saving,
error,
dirty,
selectedCell,
setSelectedCell,
buildGrid,
loadGrid,
saveGrid,
updateCellText,
toggleColumnBold,
toggleRowHeader,
undo,
redo,
canUndo,
canRedo,
getAdjacentCell,
} = useGridEditor(sessionId)
const [showOverlay, setShowOverlay] = useState(false)
// Load grid on mount
useEffect(() => {
if (sessionId) {
loadGrid()
}
}, [sessionId, loadGrid])
// Keyboard shortcuts
useEffect(() => {
const handler = (e: KeyboardEvent) => {
if ((e.metaKey || e.ctrlKey) && e.key === 'z' && !e.shiftKey) {
e.preventDefault()
undo()
} else if ((e.metaKey || e.ctrlKey) && e.key === 'z' && e.shiftKey) {
e.preventDefault()
redo()
} else if ((e.metaKey || e.ctrlKey) && e.key === 's') {
e.preventDefault()
saveGrid()
}
}
window.addEventListener('keydown', handler)
return () => window.removeEventListener('keydown', handler)
}, [undo, redo, saveGrid])
const handleNavigate = useCallback(
(cellId: string, direction: 'up' | 'down' | 'left' | 'right') => {
const target = getAdjacentCell(cellId, direction)
if (target) {
setSelectedCell(target)
// Focus the input
setTimeout(() => {
const el = document.getElementById(`cell-${target}`)
if (el) {
el.focus()
if (el instanceof HTMLInputElement) el.select()
}
}, 0)
}
},
[getAdjacentCell, setSelectedCell],
)
if (!sessionId) {
return (
<div className="text-center py-12 text-gray-400">
Keine Session ausgewaehlt.
</div>
)
}
if (loading) {
return (
<div className="flex items-center justify-center py-16">
<div className="flex items-center gap-3 text-gray-500 dark:text-gray-400">
<svg className="w-5 h-5 animate-spin" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
</svg>
Grid wird aufgebaut...
</div>
</div>
)
}
if (error) {
return (
<div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg p-4">
<p className="text-sm text-red-700 dark:text-red-300">
Fehler: {error}
</p>
<button
onClick={buildGrid}
className="mt-2 text-xs px-3 py-1.5 bg-red-600 text-white rounded hover:bg-red-700"
>
Erneut versuchen
</button>
</div>
)
}
if (!grid || !grid.zones.length) {
return (
<div className="text-center py-12">
<p className="text-gray-400 mb-4">Kein Grid vorhanden.</p>
<button
onClick={buildGrid}
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 text-sm"
>
Grid aus OCR-Ergebnissen erstellen
</button>
</div>
)
}
return (
<div className="space-y-4">
{/* Summary bar */}
<div className="flex items-center gap-4 text-xs text-gray-500 dark:text-gray-400">
<span>{grid.summary.total_zones} Zone(n)</span>
<span>{grid.summary.total_columns} Spalten</span>
<span>{grid.summary.total_rows} Zeilen</span>
<span>{grid.summary.total_cells} Zellen</span>
{grid.boxes_detected > 0 && (
<span className="text-amber-600 dark:text-amber-400">
{grid.boxes_detected} Box(en) erkannt
</span>
)}
{grid.summary.color_stats && Object.entries(grid.summary.color_stats)
.filter(([name]) => name !== 'black')
.map(([name, count]) => (
<span key={name} className="inline-flex items-center gap-1">
<span className="w-2 h-2 rounded-full" style={{ backgroundColor: {
red: '#dc2626', blue: '#2563eb', green: '#16a34a',
orange: '#ea580c', purple: '#9333ea', yellow: '#ca8a04',
}[name] || '#6b7280' }} />
<span>{count} {name}</span>
</span>
))
}
{(grid.summary.recovered_colored ?? 0) > 0 && (
<span className="text-purple-600 dark:text-purple-400">
+{grid.summary.recovered_colored} recovered
</span>
)}
<span className="text-gray-400">
{grid.duration_seconds.toFixed(1)}s
</span>
</div>
{/* Toolbar */}
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2">
<GridToolbar
dirty={dirty}
saving={saving}
canUndo={canUndo}
canRedo={canRedo}
showOverlay={showOverlay}
onSave={saveGrid}
onUndo={undo}
onRedo={redo}
onRebuild={buildGrid}
onToggleOverlay={() => setShowOverlay(!showOverlay)}
/>
</div>
{/* Image overlay */}
{showOverlay && (
<GridImageOverlay sessionId={sessionId} grid={grid} />
)}
{/* Zone tables */}
<div className="space-y-4">
{grid.zones.map((zone) => (
<div
key={zone.zone_index}
className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden"
>
<GridTable
zone={zone}
layoutMetrics={grid.layout_metrics}
selectedCell={selectedCell}
onSelectCell={setSelectedCell}
onCellTextChange={updateCellText}
onToggleColumnBold={toggleColumnBold}
onToggleRowHeader={toggleRowHeader}
onNavigate={handleNavigate}
/>
</div>
))}
</div>
{/* Tip */}
<div className="text-[11px] text-gray-400 dark:text-gray-500 flex items-center gap-4">
<span>Tab: naechste Zelle</span>
<span>Enter: Zeile runter</span>
<span>Spalte fett: Klick auf Spaltenkopf</span>
<span>Header: Klick auf Zeilennummer</span>
<span>Ctrl+Z/Y: Undo/Redo</span>
<span>Ctrl+S: Speichern</span>
</div>
{/* Next step button */}
{onNext && (
<div className="flex justify-end">
<button
onClick={async () => {
if (dirty) await saveGrid()
onNext()
}}
className="px-4 py-2 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700 transition-colors"
>
Fertig
</button>
</div>
)}
</div>
)
}

View File

@@ -1,98 +0,0 @@
'use client'
import type { StructuredGrid } from './types'
const KLAUSUR_API = '/klausur-api'
interface GridImageOverlayProps {
sessionId: string
grid: StructuredGrid
}
const ZONE_COLORS = [
{ border: 'rgba(20,184,166,0.7)', fill: 'rgba(20,184,166,0.05)' }, // teal
{ border: 'rgba(245,158,11,0.7)', fill: 'rgba(245,158,11,0.05)' }, // amber
{ border: 'rgba(99,102,241,0.7)', fill: 'rgba(99,102,241,0.05)' }, // indigo
{ border: 'rgba(236,72,153,0.7)', fill: 'rgba(236,72,153,0.05)' }, // pink
]
export function GridImageOverlay({ sessionId, grid }: GridImageOverlayProps) {
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
return (
<div className="relative w-full overflow-auto border border-gray-200 dark:border-gray-700 rounded-lg bg-gray-100 dark:bg-gray-900">
<div className="relative inline-block">
{/* Source image */}
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={imgUrl}
alt="OCR Scan"
className="block max-w-full"
style={{ imageRendering: 'auto' }}
/>
{/* SVG overlay */}
<svg
className="absolute inset-0 w-full h-full pointer-events-none"
viewBox={`0 0 ${grid.image_width} ${grid.image_height}`}
preserveAspectRatio="xMinYMin meet"
>
{grid.zones.map((zone) => {
const colors = ZONE_COLORS[zone.zone_index % ZONE_COLORS.length]
const b = zone.bbox_px
return (
<g key={zone.zone_index}>
{/* Zone border */}
<rect
x={b.x} y={b.y} width={b.w} height={b.h}
fill={colors.fill}
stroke={colors.border}
strokeWidth={zone.zone_type === 'box' ? 3 : 1.5}
strokeDasharray={zone.zone_type === 'box' ? undefined : '6 3'}
/>
{/* Column separators */}
{zone.columns.slice(1).map((col) => (
<line
key={`col-${col.index}`}
x1={col.x_min_px} y1={b.y}
x2={col.x_min_px} y2={b.y + b.h}
stroke={colors.border}
strokeWidth={1}
strokeDasharray="4 2"
/>
))}
{/* Row separators */}
{zone.rows.slice(1).map((row) => (
<line
key={`row-${row.index}`}
x1={b.x} y1={row.y_min_px}
x2={b.x + b.w} y2={row.y_min_px}
stroke={colors.border}
strokeWidth={0.5}
strokeDasharray="3 3"
opacity={0.5}
/>
))}
{/* Zone label */}
<text
x={b.x + 4} y={b.y + 14}
fill={colors.border}
fontSize={12}
fontWeight="bold"
fontFamily="monospace"
>
{zone.zone_type === 'box' ? 'BOX' : 'CONTENT'} Z{zone.zone_index}
{' '}({zone.columns.length}x{zone.rows.length})
</text>
</g>
)
})}
</svg>
</div>
</div>
)
}

View File

@@ -1,431 +0,0 @@
'use client'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { GridZone, LayoutMetrics } from './types'
interface GridTableProps {
zone: GridZone
layoutMetrics?: LayoutMetrics
selectedCell: string | null
onSelectCell: (cellId: string) => void
onCellTextChange: (cellId: string, text: string) => void
onToggleColumnBold: (zoneIndex: number, colIndex: number) => void
onToggleRowHeader: (zoneIndex: number, rowIndex: number) => void
onNavigate: (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => void
}
/** Gutter width for row numbers (px). */
const ROW_NUM_WIDTH = 36
/** Minimum column width in px so columns remain usable. */
const MIN_COL_WIDTH = 40
/** Minimum row height in px. */
const MIN_ROW_HEIGHT = 26
export function GridTable({
zone,
layoutMetrics,
selectedCell,
onSelectCell,
onCellTextChange,
onToggleColumnBold,
onToggleRowHeader,
onNavigate,
}: GridTableProps) {
const containerRef = useRef<HTMLDivElement>(null)
const [containerWidth, setContainerWidth] = useState(0)
// ----------------------------------------------------------------
// Observe container width for scaling
// ----------------------------------------------------------------
useEffect(() => {
const el = containerRef.current
if (!el) return
const ro = new ResizeObserver(([entry]) => {
setContainerWidth(entry.contentRect.width)
})
ro.observe(el)
return () => ro.disconnect()
}, [])
// ----------------------------------------------------------------
// Compute column widths from OCR measurements
// ----------------------------------------------------------------
const zoneWidthPx = zone.bbox_px.w || layoutMetrics?.page_width_px || 1
const scale = containerWidth > 0 ? (containerWidth - ROW_NUM_WIDTH) / zoneWidthPx : 1
// Column widths in original pixels, then scaled to container
const [colWidthOverrides, setColWidthOverrides] = useState<number[] | null>(null)
const columnWidthsPx = zone.columns.map((col) => col.x_max_px - col.x_min_px)
const effectiveColWidths = (colWidthOverrides ?? columnWidthsPx).map(
(w) => Math.max(MIN_COL_WIDTH, w * scale),
)
// ----------------------------------------------------------------
// Compute row heights from OCR measurements
// ----------------------------------------------------------------
const avgRowHeightPx = layoutMetrics?.avg_row_height_px ?? 30
const [rowHeightOverrides, setRowHeightOverrides] = useState<Map<number, number>>(new Map())
const getRowHeight = (rowIndex: number, isHeader: boolean): number => {
if (rowHeightOverrides.has(rowIndex)) {
return rowHeightOverrides.get(rowIndex)!
}
const row = zone.rows.find((r) => r.index === rowIndex)
if (!row) return Math.max(MIN_ROW_HEIGHT, avgRowHeightPx * scale)
if (isHeader) {
// Headers keep their measured height
const measuredH = row.y_max_px - row.y_min_px
return Math.max(MIN_ROW_HEIGHT, measuredH * scale)
}
// Content rows use average for uniformity
return Math.max(MIN_ROW_HEIGHT, avgRowHeightPx * scale)
}
// ----------------------------------------------------------------
// Font size from layout metrics
// ----------------------------------------------------------------
const baseFontSize = layoutMetrics?.font_size_suggestion_px
? Math.max(11, layoutMetrics.font_size_suggestion_px * scale)
: 13
// ----------------------------------------------------------------
// Keyboard navigation
// ----------------------------------------------------------------
const handleKeyDown = useCallback(
(e: React.KeyboardEvent, cellId: string) => {
if (e.key === 'Tab') {
e.preventDefault()
onNavigate(cellId, e.shiftKey ? 'left' : 'right')
} else if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault()
onNavigate(cellId, 'down')
} else if (e.key === 'ArrowUp' && e.altKey) {
e.preventDefault()
onNavigate(cellId, 'up')
} else if (e.key === 'ArrowDown' && e.altKey) {
e.preventDefault()
onNavigate(cellId, 'down')
} else if (e.key === 'Escape') {
;(e.target as HTMLElement).blur()
}
},
[onNavigate],
)
// ----------------------------------------------------------------
// Cell lookup
// ----------------------------------------------------------------
const cellMap = new Map<string, (typeof zone.cells)[0]>()
for (const cell of zone.cells) {
cellMap.set(`${cell.row_index}_${cell.col_index}`, cell)
}
/** Dominant non-black color from a cell's word_boxes, or null. */
const getCellColor = (cell: (typeof zone.cells)[0] | undefined): string | null => {
if (!cell?.word_boxes?.length) return null
for (const wb of cell.word_boxes) {
if (wb.color_name && wb.color_name !== 'black' && wb.color) {
return wb.color
}
}
return null
}
// ----------------------------------------------------------------
// Column resize (drag)
// ----------------------------------------------------------------
const handleColResizeStart = useCallback(
(colIndex: number, startX: number) => {
const baseWidths = colWidthOverrides ?? [...columnWidthsPx]
const handleMouseMove = (e: MouseEvent) => {
const deltaPx = (e.clientX - startX) / scale
const newWidths = [...baseWidths]
newWidths[colIndex] = Math.max(20, baseWidths[colIndex] + deltaPx)
// Steal from next column to keep total constant
if (colIndex + 1 < newWidths.length) {
newWidths[colIndex + 1] = Math.max(20, baseWidths[colIndex + 1] - deltaPx)
}
setColWidthOverrides(newWidths)
}
const handleMouseUp = () => {
document.removeEventListener('mousemove', handleMouseMove)
document.removeEventListener('mouseup', handleMouseUp)
document.body.style.cursor = ''
document.body.style.userSelect = ''
}
document.body.style.cursor = 'col-resize'
document.body.style.userSelect = 'none'
document.addEventListener('mousemove', handleMouseMove)
document.addEventListener('mouseup', handleMouseUp)
},
[colWidthOverrides, columnWidthsPx, scale],
)
// ----------------------------------------------------------------
// Row resize (drag)
// ----------------------------------------------------------------
const handleRowResizeStart = useCallback(
(rowIndex: number, startY: number, currentHeight: number) => {
const handleMouseMove = (e: MouseEvent) => {
const delta = e.clientY - startY
const newH = Math.max(MIN_ROW_HEIGHT, currentHeight + delta)
setRowHeightOverrides((prev) => {
const next = new Map(prev)
next.set(rowIndex, newH)
return next
})
}
const handleMouseUp = () => {
document.removeEventListener('mousemove', handleMouseMove)
document.removeEventListener('mouseup', handleMouseUp)
document.body.style.cursor = ''
document.body.style.userSelect = ''
}
document.body.style.cursor = 'row-resize'
document.body.style.userSelect = 'none'
document.addEventListener('mousemove', handleMouseMove)
document.addEventListener('mouseup', handleMouseUp)
},
[],
)
const isBoxZone = zone.zone_type === 'box'
const numCols = zone.columns.length
// CSS Grid template for columns: row-number gutter + proportional columns
const gridTemplateCols = `${ROW_NUM_WIDTH}px ${effectiveColWidths.map((w) => `${w.toFixed(1)}px`).join(' ')}`
return (
<div
ref={containerRef}
className={`overflow-x-auto ${isBoxZone ? 'border-2 border-gray-400 dark:border-gray-500 rounded-lg' : ''}`}
>
{/* Zone label */}
<div className="flex items-center gap-2 px-2 py-1 text-xs text-gray-500 dark:text-gray-400">
<span
className={`inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-[10px] font-medium ${
isBoxZone
? 'bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-300 border border-amber-200 dark:border-amber-800'
: 'bg-gray-50 dark:bg-gray-800 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-700'
}`}
>
{isBoxZone ? 'Box' : 'Inhalt'} Zone {zone.zone_index}
</span>
<span>
{zone.columns.length} Spalten, {zone.rows.length} Zeilen, {zone.cells.length} Zellen
</span>
</div>
{/* ============================================================ */}
{/* CSS Grid — column headers */}
{/* ============================================================ */}
<div
style={{
display: 'grid',
gridTemplateColumns: gridTemplateCols,
fontFamily: "var(--font-noto-sans, 'Noto Sans'), 'Inter', system-ui, sans-serif",
fontSize: `${baseFontSize}px`,
}}
>
{/* Header: row-number corner */}
<div className="sticky left-0 z-10 px-1 py-1.5 text-[10px] text-gray-400 dark:text-gray-500 border-b border-r border-gray-200 dark:border-gray-700 bg-gray-50 dark:bg-gray-800/50" />
{/* Header: column labels with resize handles */}
{zone.columns.map((col, ci) => (
<div
key={col.index}
className={`relative px-2 py-1.5 text-xs font-medium border-b border-r border-gray-200 dark:border-gray-700 bg-gray-50 dark:bg-gray-800/50 cursor-pointer select-none transition-colors hover:bg-gray-100 dark:hover:bg-gray-700 ${
col.bold ? 'text-teal-700 dark:text-teal-300' : 'text-gray-600 dark:text-gray-400'
}`}
onClick={() => onToggleColumnBold(zone.zone_index, col.index)}
title={`Spalte ${col.index + 1} — Klick fuer Fett-Toggle`}
>
<div className="flex items-center gap-1 justify-center truncate">
<span>{col.label}</span>
{col.bold && (
<span className="text-[9px] px-1 py-0 rounded bg-teal-100 dark:bg-teal-900/40 text-teal-600 dark:text-teal-400">
B
</span>
)}
</div>
{/* Right-edge resize handle */}
{ci < numCols - 1 && (
<div
className="absolute top-0 right-0 w-[5px] h-full cursor-col-resize hover:bg-teal-400/40 z-20"
onMouseDown={(e) => {
e.stopPropagation()
handleColResizeStart(ci, e.clientX)
}}
/>
)}
</div>
))}
{/* ============================================================ */}
{/* Data rows */}
{/* ============================================================ */}
{zone.rows.map((row) => {
const rowH = getRowHeight(row.index, row.is_header)
const isSpanning = zone.cells.some(
(c) => c.row_index === row.index && c.col_type === 'spanning_header',
)
return (
<div key={row.index} style={{ display: 'contents' }}>
{/* Row number cell */}
<div
className={`relative sticky left-0 z-10 flex items-center justify-center text-[10px] border-b border-r border-gray-200 dark:border-gray-700 cursor-pointer select-none transition-colors hover:bg-gray-100 dark:hover:bg-gray-700 ${
row.is_header
? 'bg-blue-50 dark:bg-blue-900/20 text-blue-600 dark:text-blue-400 font-medium'
: 'bg-gray-50 dark:bg-gray-800/50 text-gray-400 dark:text-gray-500'
}`}
style={{ height: `${rowH}px` }}
onClick={() => onToggleRowHeader(zone.zone_index, row.index)}
title={`Zeile ${row.index + 1} — Klick fuer Header-Toggle`}
>
{row.index + 1}
{row.is_header && <span className="block text-[8px]">H</span>}
{/* Bottom-edge resize handle */}
<div
className="absolute bottom-0 left-0 w-full h-[4px] cursor-row-resize hover:bg-teal-400/40 z-20"
onMouseDown={(e) => {
e.stopPropagation()
handleRowResizeStart(row.index, e.clientY, rowH)
}}
/>
</div>
{/* Cells — spanning header or normal columns */}
{isSpanning ? (
<div
className="border-b border-r border-gray-200 dark:border-gray-700 bg-blue-50/50 dark:bg-blue-900/10 flex items-center"
style={{
gridColumn: `2 / ${numCols + 2}`,
height: `${rowH}px`,
}}
>
{(() => {
const spanCell = zone.cells.find(
(c) => c.row_index === row.index && c.col_type === 'spanning_header',
)
if (!spanCell) return null
const cellId = spanCell.cell_id
const isSelected = selectedCell === cellId
const cellColor = getCellColor(spanCell)
return (
<div className="flex items-center w-full">
{cellColor && (
<span
className="flex-shrink-0 w-1.5 self-stretch rounded-l-sm"
style={{ backgroundColor: cellColor }}
/>
)}
<input
id={`cell-${cellId}`}
type="text"
value={spanCell.text}
onChange={(e) => onCellTextChange(cellId, e.target.value)}
onFocus={() => onSelectCell(cellId)}
onKeyDown={(e) => handleKeyDown(e, cellId)}
className={`w-full px-3 py-1 bg-transparent border-0 outline-none text-center ${
isSelected ? 'ring-2 ring-teal-500 ring-inset rounded' : ''
}`}
style={{ color: cellColor || undefined }}
spellCheck={false}
/>
</div>
)
})()}
</div>
) : (
zone.columns.map((col) => {
const cell = cellMap.get(`${row.index}_${col.index}`)
const cellId =
cell?.cell_id ??
`Z${zone.zone_index}_R${String(row.index).padStart(2, '0')}_C${col.index}`
const isSelected = selectedCell === cellId
const isBold = col.bold || cell?.is_bold
const isLowConf = cell && cell.confidence > 0 && cell.confidence < 60
const cellColor = getCellColor(cell)
const hasColoredWords =
cell?.word_boxes?.some(
(wb) => wb.color_name && wb.color_name !== 'black',
) ?? false
return (
<div
key={col.index}
className={`relative border-b border-r border-gray-200 dark:border-gray-700 flex items-center ${
isSelected ? 'ring-2 ring-teal-500 ring-inset z-10' : ''
} ${isLowConf ? 'bg-amber-50/50 dark:bg-amber-900/10' : ''} ${
row.is_header ? 'bg-blue-50/50 dark:bg-blue-900/10' : ''
}`}
style={{ height: `${rowH}px` }}
>
{cellColor && (
<span
className="flex-shrink-0 w-1.5 self-stretch rounded-l-sm"
style={{ backgroundColor: cellColor }}
title={`Farbe: ${cell?.word_boxes?.find((wb) => wb.color_name !== 'black')?.color_name}`}
/>
)}
{/* Per-word colored display when not editing */}
{hasColoredWords && !isSelected ? (
<div
className={`w-full px-2 cursor-text truncate ${isBold ? 'font-bold' : 'font-normal'}`}
onClick={() => {
onSelectCell(cellId)
setTimeout(() => document.getElementById(`cell-${cellId}`)?.focus(), 0)
}}
>
{cell!.word_boxes!.map((wb, i) => (
<span
key={i}
style={
wb.color_name && wb.color_name !== 'black'
? { color: wb.color }
: undefined
}
>
{wb.text}
{i < cell!.word_boxes!.length - 1 ? ' ' : ''}
</span>
))}
</div>
) : (
<input
id={`cell-${cellId}`}
type="text"
value={cell?.text ?? ''}
onChange={(e) => {
if (cell) onCellTextChange(cellId, e.target.value)
}}
onFocus={() => onSelectCell(cellId)}
onKeyDown={(e) => handleKeyDown(e, cellId)}
className={`w-full px-2 bg-transparent border-0 outline-none ${
isBold ? 'font-bold' : 'font-normal'
}`}
spellCheck={false}
/>
)}
</div>
)
})
)}
</div>
)
})}
</div>
</div>
)
}

View File

@@ -1,110 +0,0 @@
'use client'
interface GridToolbarProps {
dirty: boolean
saving: boolean
canUndo: boolean
canRedo: boolean
showOverlay: boolean
onSave: () => void
onUndo: () => void
onRedo: () => void
onRebuild: () => void
onToggleOverlay: () => void
}
export function GridToolbar({
dirty,
saving,
canUndo,
canRedo,
showOverlay,
onSave,
onUndo,
onRedo,
onRebuild,
onToggleOverlay,
}: GridToolbarProps) {
return (
<div className="flex items-center gap-2 flex-wrap">
{/* Undo / Redo */}
<div className="flex items-center gap-1 border-r border-gray-200 dark:border-gray-700 pr-2">
<button
onClick={onUndo}
disabled={!canUndo}
className="p-1.5 rounded hover:bg-gray-100 dark:hover:bg-gray-700 disabled:opacity-30 disabled:cursor-not-allowed"
title="Rueckgaengig (Ctrl+Z)"
>
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M3 10h10a5 5 0 015 5v2M3 10l4-4M3 10l4 4" />
</svg>
</button>
<button
onClick={onRedo}
disabled={!canRedo}
className="p-1.5 rounded hover:bg-gray-100 dark:hover:bg-gray-700 disabled:opacity-30 disabled:cursor-not-allowed"
title="Wiederholen (Ctrl+Shift+Z)"
>
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M21 10H11a5 5 0 00-5 5v2M21 10l-4-4M21 10l-4 4" />
</svg>
</button>
</div>
{/* Overlay toggle */}
<button
onClick={onToggleOverlay}
className={`flex items-center gap-1 px-2.5 py-1.5 text-xs rounded-md border transition-colors ${
showOverlay
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-300 dark:border-teal-700 text-teal-700 dark:text-teal-300'
: 'border-gray-200 dark:border-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-50 dark:hover:bg-gray-700'
}`}
title="Grid auf Bild anzeigen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M4 5a1 1 0 011-1h14a1 1 0 011 1v2a1 1 0 01-1 1H5a1 1 0 01-1-1V5zM4 13a1 1 0 011-1h6a1 1 0 011 1v6a1 1 0 01-1 1H5a1 1 0 01-1-1v-6zM16 13a1 1 0 011-1h2a1 1 0 011 1v6a1 1 0 01-1 1h-2a1 1 0 01-1-1v-6z" />
</svg>
Bild-Overlay
</button>
{/* Rebuild */}
<button
onClick={onRebuild}
className="flex items-center gap-1 px-2.5 py-1.5 text-xs rounded-md border border-gray-200 dark:border-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors"
title="Grid neu berechnen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" />
</svg>
Neu berechnen
</button>
{/* Spacer */}
<div className="flex-1" />
{/* Save */}
<button
onClick={onSave}
disabled={!dirty || saving}
className={`flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
dirty
? 'bg-teal-600 text-white hover:bg-teal-700'
: 'bg-gray-100 dark:bg-gray-800 text-gray-400 cursor-not-allowed'
}`}
title="Speichern (Ctrl+S)"
>
{saving ? (
<svg className="w-3.5 h-3.5 animate-spin" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
</svg>
) : (
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M8 7H5a2 2 0 00-2 2v9a2 2 0 002 2h14a2 2 0 002-2V9a2 2 0 00-2-2h-3m-1 4l-3 3m0 0l-3-3m3 3V4" />
</svg>
)}
{saving ? 'Speichert...' : dirty ? 'Speichern' : 'Gespeichert'}
</button>
</div>
)
}

View File

@@ -1,6 +0,0 @@
export { GridEditor } from './GridEditor'
export { GridTable } from './GridTable'
export { GridToolbar } from './GridToolbar'
export { GridImageOverlay } from './GridImageOverlay'
export { useGridEditor } from './useGridEditor'
export type * from './types'

View File

@@ -1,108 +0,0 @@
import type { OcrWordBox } from '@/app/(admin)/ai/ocr-pipeline/types'
// Re-export for convenience
export type { OcrWordBox }
/** Layout metrics derived from OCR word positions for faithful grid reconstruction. */
export interface LayoutMetrics {
page_width_px: number
page_height_px: number
avg_row_height_px: number
font_size_suggestion_px: number
}
/** A complete structured grid with zones, ready for the Excel-like editor. */
export interface StructuredGrid {
session_id: string
image_width: number
image_height: number
zones: GridZone[]
boxes_detected: number
summary: GridSummary
formatting: GridFormatting
layout_metrics?: LayoutMetrics
duration_seconds: number
edited?: boolean
}
export interface GridSummary {
total_zones: number
total_columns: number
total_rows: number
total_cells: number
total_words: number
recovered_colored?: number
color_stats?: Record<string, number>
}
export interface GridFormatting {
bold_columns: number[]
header_rows: number[]
}
/** A horizontal zone of the page — either content or a bordered box. */
export interface GridZone {
zone_index: number
zone_type: 'content' | 'box'
bbox_px: BBox
bbox_pct: BBox
border: ZoneBorder | null
word_count: number
columns: GridColumn[]
rows: GridRow[]
cells: GridEditorCell[]
header_rows: number[]
}
export interface BBox {
x: number
y: number
w: number
h: number
}
export interface ZoneBorder {
thickness: number
confidence: number
}
export interface GridColumn {
index: number
label: string
x_min_px: number
x_max_px: number
x_min_pct: number
x_max_pct: number
bold: boolean
}
export interface GridRow {
index: number
y_min_px: number
y_max_px: number
y_min_pct: number
y_max_pct: number
is_header: boolean
}
export interface GridEditorCell {
cell_id: string
zone_index: number
row_index: number
col_index: number
col_type: string
text: string
confidence: number
bbox_px: BBox
bbox_pct: BBox
word_boxes: OcrWordBox[]
ocr_engine: string
is_bold: boolean
}
/** Cell formatting applied by the user in the editor. */
export interface CellFormatting {
bold: boolean
fontSize: 'small' | 'normal' | 'large'
align: 'left' | 'center' | 'right'
}

View File

@@ -1,288 +0,0 @@
import { useCallback, useRef, useState } from 'react'
import type { StructuredGrid, GridZone } from './types'
const KLAUSUR_API = '/klausur-api'
const MAX_UNDO = 50
export interface GridEditorState {
grid: StructuredGrid | null
loading: boolean
saving: boolean
error: string | null
dirty: boolean
selectedCell: string | null
selectedZone: number | null
}
export function useGridEditor(sessionId: string | null) {
const [grid, setGrid] = useState<StructuredGrid | null>(null)
const [loading, setLoading] = useState(false)
const [saving, setSaving] = useState(false)
const [error, setError] = useState<string | null>(null)
const [dirty, setDirty] = useState(false)
const [selectedCell, setSelectedCell] = useState<string | null>(null)
const [selectedZone, setSelectedZone] = useState<number | null>(null)
// Undo/redo stacks store serialized zone arrays
const undoStack = useRef<string[]>([])
const redoStack = useRef<string[]>([])
const pushUndo = useCallback((zones: GridZone[]) => {
undoStack.current.push(JSON.stringify(zones))
if (undoStack.current.length > MAX_UNDO) {
undoStack.current.shift()
}
redoStack.current = []
}, [])
// ------------------------------------------------------------------
// Load / Build
// ------------------------------------------------------------------
const buildGrid = useCallback(async () => {
if (!sessionId) return
setLoading(true)
setError(null)
try {
const res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid`,
{ method: 'POST' },
)
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
const data: StructuredGrid = await res.json()
setGrid(data)
setDirty(false)
undoStack.current = []
redoStack.current = []
} catch (e) {
setError(e instanceof Error ? e.message : String(e))
} finally {
setLoading(false)
}
}, [sessionId])
const loadGrid = useCallback(async () => {
if (!sessionId) return
setLoading(true)
setError(null)
try {
const res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`,
)
if (res.status === 404) {
// No grid yet — build it
await buildGrid()
return
}
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
const data: StructuredGrid = await res.json()
setGrid(data)
setDirty(false)
undoStack.current = []
redoStack.current = []
} catch (e) {
setError(e instanceof Error ? e.message : String(e))
} finally {
setLoading(false)
}
}, [sessionId, buildGrid])
// ------------------------------------------------------------------
// Save
// ------------------------------------------------------------------
const saveGrid = useCallback(async () => {
if (!sessionId || !grid) return
setSaving(true)
try {
const res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/save-grid`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(grid),
},
)
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
setDirty(false)
} catch (e) {
setError(e instanceof Error ? e.message : String(e))
} finally {
setSaving(false)
}
}, [sessionId, grid])
// ------------------------------------------------------------------
// Cell editing
// ------------------------------------------------------------------
const updateCellText = useCallback(
(cellId: string, newText: string) => {
if (!grid) return
pushUndo(grid.zones)
setGrid((prev) => {
if (!prev) return prev
return {
...prev,
zones: prev.zones.map((zone) => ({
...zone,
cells: zone.cells.map((cell) =>
cell.cell_id === cellId ? { ...cell, text: newText } : cell,
),
})),
}
})
setDirty(true)
},
[grid, pushUndo],
)
// ------------------------------------------------------------------
// Column formatting
// ------------------------------------------------------------------
const toggleColumnBold = useCallback(
(zoneIndex: number, colIndex: number) => {
if (!grid) return
pushUndo(grid.zones)
setGrid((prev) => {
if (!prev) return prev
return {
...prev,
zones: prev.zones.map((zone) => {
if (zone.zone_index !== zoneIndex) return zone
const col = zone.columns.find((c) => c.index === colIndex)
const newBold = col ? !col.bold : true
return {
...zone,
columns: zone.columns.map((c) =>
c.index === colIndex ? { ...c, bold: newBold } : c,
),
cells: zone.cells.map((cell) =>
cell.col_index === colIndex
? { ...cell, is_bold: newBold }
: cell,
),
}
}),
}
})
setDirty(true)
},
[grid, pushUndo],
)
// ------------------------------------------------------------------
// Row formatting
// ------------------------------------------------------------------
const toggleRowHeader = useCallback(
(zoneIndex: number, rowIndex: number) => {
if (!grid) return
pushUndo(grid.zones)
setGrid((prev) => {
if (!prev) return prev
return {
...prev,
zones: prev.zones.map((zone) => {
if (zone.zone_index !== zoneIndex) return zone
return {
...zone,
rows: zone.rows.map((r) =>
r.index === rowIndex ? { ...r, is_header: !r.is_header } : r,
),
}
}),
}
})
setDirty(true)
},
[grid, pushUndo],
)
// ------------------------------------------------------------------
// Undo / Redo
// ------------------------------------------------------------------
const undo = useCallback(() => {
if (!grid || undoStack.current.length === 0) return
redoStack.current.push(JSON.stringify(grid.zones))
const prev = undoStack.current.pop()!
setGrid((g) => (g ? { ...g, zones: JSON.parse(prev) } : g))
setDirty(true)
}, [grid])
const redo = useCallback(() => {
if (!grid || redoStack.current.length === 0) return
undoStack.current.push(JSON.stringify(grid.zones))
const next = redoStack.current.pop()!
setGrid((g) => (g ? { ...g, zones: JSON.parse(next) } : g))
setDirty(true)
}, [grid])
const canUndo = undoStack.current.length > 0
const canRedo = redoStack.current.length > 0
// ------------------------------------------------------------------
// Navigation helpers
// ------------------------------------------------------------------
const getAdjacentCell = useCallback(
(cellId: string, direction: 'up' | 'down' | 'left' | 'right'): string | null => {
if (!grid) return null
for (const zone of grid.zones) {
const cell = zone.cells.find((c) => c.cell_id === cellId)
if (!cell) continue
let targetRow = cell.row_index
let targetCol = cell.col_index
if (direction === 'up') targetRow--
if (direction === 'down') targetRow++
if (direction === 'left') targetCol--
if (direction === 'right') targetCol++
const target = zone.cells.find(
(c) => c.row_index === targetRow && c.col_index === targetCol,
)
return target?.cell_id ?? null
}
return null
},
[grid],
)
return {
grid,
loading,
saving,
error,
dirty,
selectedCell,
selectedZone,
setSelectedCell,
setSelectedZone,
buildGrid,
loadGrid,
saveGrid,
updateCellText,
toggleColumnBold,
toggleRowHeader,
undo,
redo,
canUndo,
canRedo,
getAdjacentCell,
}
}

View File

@@ -194,8 +194,10 @@ export function Sidebar({ onRoleChange }: SidebarProps) {
{/* Categories */}
<div className="px-2 space-y-1">
{visibleCategories.map((category) => {
const categoryHref = `/${category.id}`
const isCategoryActive = pathname.startsWith(categoryHref)
const categoryHref = category.id === 'compliance-sdk' ? '/sdk' : `/${category.id}`
const isCategoryActive = category.id === 'compliance-sdk'
? category.modules.some(m => pathname.startsWith(m.href))
: pathname.startsWith(categoryHref)
return (
<div key={category.id}>

View File

@@ -1,231 +0,0 @@
'use client'
import { useState } from 'react'
import { OverlayReconstruction } from './OverlayReconstruction'
import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
const KLAUSUR_API = '/klausur-api'
type Phase = 'idle' | 'running' | 'compare'
interface KombiResult {
cells: GridCell[]
image_width: number
image_height: number
duration_seconds: number
summary: {
total_cells: number
non_empty_cells: number
merged_words: number
[key: string]: unknown
}
[key: string]: unknown
}
interface KombiCompareStepProps {
sessionId: string | null
onNext: () => void
}
export function KombiCompareStep({ sessionId, onNext }: KombiCompareStepProps) {
const [phase, setPhase] = useState<Phase>('idle')
const [error, setError] = useState('')
const [paddleResult, setPaddleResult] = useState<KombiResult | null>(null)
const [rapidResult, setRapidResult] = useState<KombiResult | null>(null)
const [paddleStatus, setPaddleStatus] = useState<'pending' | 'running' | 'done' | 'error'>('pending')
const [rapidStatus, setRapidStatus] = useState<'pending' | 'running' | 'done' | 'error'>('pending')
const runBothEngines = async () => {
if (!sessionId) return
setPhase('running')
setError('')
setPaddleStatus('running')
setRapidStatus('running')
setPaddleResult(null)
setRapidResult(null)
const fetchEngine = async (
endpoint: string,
setResult: (r: KombiResult) => void,
setStatus: (s: 'pending' | 'running' | 'done' | 'error') => void,
) => {
try {
const res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/${endpoint}`,
{ method: 'POST' },
)
if (!res.ok) {
const body = await res.json().catch(() => ({}))
throw new Error(body.detail || `HTTP ${res.status}`)
}
const data = await res.json()
setResult(data)
setStatus('done')
} catch (e: unknown) {
setStatus('error')
throw e
}
}
try {
await Promise.all([
fetchEngine('paddle-kombi', setPaddleResult, setPaddleStatus),
fetchEngine('rapid-kombi', setRapidResult, setRapidStatus),
])
setPhase('compare')
} catch (e: unknown) {
// At least one failed — still show compare if the other succeeded
setError(e instanceof Error ? e.message : String(e))
setPhase('compare')
}
}
if (phase === 'idle') {
return (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-8 text-center">
<div className="text-4xl mb-3"></div>
<h3 className="text-lg font-semibold text-gray-800 dark:text-gray-200 mb-2">
Kombi-Vergleich
</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6 max-w-lg mx-auto">
Beide Kombi-Modi (Paddle + Tesseract vs. RapidOCR + Tesseract) laufen parallel.
Die Ergebnisse werden nebeneinander angezeigt, damit die Qualitaet direkt verglichen werden kann.
</p>
<button
onClick={runBothEngines}
disabled={!sessionId}
className="px-5 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed font-medium"
>
Beide Kombi-Modi starten
</button>
</div>
)
}
if (phase === 'running' && !paddleResult && !rapidResult) {
return (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-8">
<div className="flex items-center justify-center gap-8">
<EngineStatusCard label="Paddle + Tesseract" status={paddleStatus} />
<EngineStatusCard label="RapidOCR + Tesseract" status={rapidStatus} />
</div>
</div>
)
}
// compare phase
return (
<div className="space-y-4">
{error && (
<div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg p-3 text-sm text-red-700 dark:text-red-300">
{error}
</div>
)}
<div className="flex items-center justify-between">
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Side-by-Side Vergleich
</h3>
<button
onClick={() => { setPhase('idle'); setPaddleResult(null); setRapidResult(null) }}
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors"
>
Neu starten
</button>
</div>
<div className="grid grid-cols-2 gap-4">
{/* Left: Paddle-Kombi */}
<div className="space-y-2">
<div className="flex items-center gap-2">
<span className="text-sm font-medium text-gray-700 dark:text-gray-300">
🔀 Paddle + Tesseract
</span>
{paddleStatus === 'error' && (
<span className="text-xs text-red-500">Fehler</span>
)}
</div>
{paddleResult ? (
<>
<OverlayReconstruction
sessionId={sessionId}
onNext={() => {}}
wordResultOverride={paddleResult}
/>
<StatsBar result={paddleResult} engine="Paddle-Kombi" />
</>
) : (
<div className="bg-gray-50 dark:bg-gray-900 rounded-lg p-12 text-center text-sm text-gray-400">
{paddleStatus === 'running' ? 'Laeuft...' : 'Fehlgeschlagen'}
</div>
)}
</div>
{/* Right: Rapid-Kombi */}
<div className="space-y-2">
<div className="flex items-center gap-2">
<span className="text-sm font-medium text-gray-700 dark:text-gray-300">
RapidOCR + Tesseract
</span>
{rapidStatus === 'error' && (
<span className="text-xs text-red-500">Fehler</span>
)}
</div>
{rapidResult ? (
<>
<OverlayReconstruction
sessionId={sessionId}
onNext={() => {}}
wordResultOverride={rapidResult}
/>
<StatsBar result={rapidResult} engine="Rapid-Kombi" />
</>
) : (
<div className="bg-gray-50 dark:bg-gray-900 rounded-lg p-12 text-center text-sm text-gray-400">
{rapidStatus === 'running' ? 'Laeuft...' : 'Fehlgeschlagen'}
</div>
)}
</div>
</div>
<div className="flex justify-end">
<button
onClick={onNext}
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium"
>
Fertig
</button>
</div>
</div>
)
}
function EngineStatusCard({ label, status }: { label: string; status: string }) {
return (
<div className="flex items-center gap-3 bg-gray-50 dark:bg-gray-900 rounded-lg px-5 py-4">
{status === 'running' && (
<div className="w-5 h-5 border-2 border-teal-400 border-t-transparent rounded-full animate-spin" />
)}
{status === 'done' && <span className="text-green-500 text-lg"></span>}
{status === 'error' && <span className="text-red-500 text-lg"></span>}
{status === 'pending' && <span className="text-gray-400 text-lg"></span>}
<span className="text-sm text-gray-700 dark:text-gray-300">{label}</span>
</div>
)
}
function StatsBar({ result, engine }: { result: KombiResult; engine: string }) {
const nonEmpty = result.summary?.non_empty_cells ?? 0
const totalCells = result.summary?.total_cells ?? 0
const merged = result.summary?.merged_words ?? 0
const duration = result.duration_seconds ?? 0
return (
<div className="flex items-center gap-3 text-[11px] text-gray-500 dark:text-gray-400 bg-gray-50 dark:bg-gray-900 rounded-lg px-3 py-2">
<span className="font-medium text-gray-600 dark:text-gray-300">{engine}</span>
<span>{merged} Woerter</span>
<span>{nonEmpty}/{totalCells} Zellen</span>
<span>{duration.toFixed(2)}s</span>
</div>
)
}

View File

@@ -1,644 +0,0 @@
'use client'
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import type { GridResult, GridCell, RowResult, RowItem } from '@/app/(admin)/ai/ocr-overlay/types'
import { usePixelWordPositions } from './usePixelWordPositions'
import { useSlideWordPositions } from './useSlideWordPositions'
const KLAUSUR_API = '/klausur-api'
interface OverlayReconstructionProps {
sessionId: string | null
onNext: () => void
/** When set, use this data directly instead of fetching from the session API. */
wordResultOverride?: { cells: GridCell[]; image_width: number; image_height: number; [key: string]: unknown }
}
interface EditableCell {
cellId: string
text: string
originalText: string
bboxPct: { x: number; y: number; w: number; h: number }
colType: string
rowIndex: number
colIndex: number
}
type UndoAction = { cellId: string; oldText: string; newText: string }
export function OverlayReconstruction({ sessionId, onNext, wordResultOverride }: OverlayReconstructionProps) {
const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading')
const [error, setError] = useState('')
const [cells, setCells] = useState<EditableCell[]>([])
const [gridCells, setGridCells] = useState<GridCell[]>([])
const [editedTexts, setEditedTexts] = useState<Map<string, string>>(new Map())
// Undo/Redo
const [undoStack, setUndoStack] = useState<UndoAction[]>([])
const [redoStack, setRedoStack] = useState<UndoAction[]>([])
// Overlay state
const [rows, setRows] = useState<RowItem[]>([])
const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null)
const [fontScale, setFontScale] = useState(0.7)
const [globalBold, setGlobalBold] = useState(false)
const [imageRotation, setImageRotation] = useState<0 | 180>(0)
const [textOpacity, setTextOpacity] = useState(100)
const [textColor, setTextColor] = useState<'red' | 'blue' | 'black'>('red')
const [positioningMode, setPositioningMode] = useState<'cluster' | 'slide'>('slide')
const reconRef = useRef<HTMLDivElement>(null)
const [reconWidth, setReconWidth] = useState(0)
// Pixel-based word positions (both algorithms run, toggle selects which to use)
const overlayImageUrl = sessionId
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
: ''
const clusterPositions = usePixelWordPositions(
overlayImageUrl,
gridCells,
status === 'ready',
imageRotation,
)
const slidePositions = useSlideWordPositions(
overlayImageUrl,
gridCells,
status === 'ready',
imageRotation,
)
const cellWordPositions = positioningMode === 'slide' ? slidePositions : clusterPositions
// Track container width
useEffect(() => {
const el = reconRef.current
if (!el) return
const obs = new ResizeObserver(entries => {
for (const entry of entries) setReconWidth(entry.contentRect.width)
})
obs.observe(el)
return () => obs.disconnect()
}, [status])
// Load session data
useEffect(() => {
if (wordResultOverride) {
applyWordResult(wordResultOverride)
return
}
if (!sessionId) return
loadSessionData()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId, wordResultOverride])
const applyWordResult = (wordResult: { cells: GridCell[]; image_width: number; image_height: number; [key: string]: unknown }) => {
const rawGridCells: GridCell[] = wordResult.cells || []
setGridCells(rawGridCells)
const editableCells: EditableCell[] = rawGridCells.map(c => ({
cellId: c.cell_id,
text: c.text,
originalText: c.text,
bboxPct: c.bbox_pct,
colType: c.col_type,
rowIndex: c.row_index,
colIndex: c.col_index,
}))
setCells(editableCells)
setEditedTexts(new Map())
setUndoStack([])
setRedoStack([])
if (wordResult.image_width && wordResult.image_height) {
setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height })
}
setStatus('ready')
}
const loadSessionData = async () => {
if (!sessionId) return
setStatus('loading')
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (!res.ok) throw new Error(`HTTP ${res.status}`)
const data = await res.json()
const wordResult: GridResult | undefined = data.word_result
if (!wordResult) {
setError('Keine Worterkennungsdaten gefunden. Bitte zuerst den Woerter-Schritt abschliessen.')
setStatus('error')
return
}
applyWordResult(wordResult as unknown as { cells: GridCell[]; image_width: number; image_height: number })
// Load rows
const rowResult: RowResult | undefined = data.row_result
if (rowResult?.rows) setRows(rowResult.rows)
} catch (e: unknown) {
setError(e instanceof Error ? e.message : String(e))
setStatus('error')
}
}
const handleTextChange = useCallback((cellId: string, newText: string) => {
setEditedTexts(prev => {
const oldText = prev.get(cellId)
const cell = cells.find(c => c.cellId === cellId)
const prevText = oldText ?? cell?.text ?? ''
setUndoStack(stack => [...stack, { cellId, oldText: prevText, newText }])
setRedoStack([])
const next = new Map(prev)
next.set(cellId, newText)
return next
})
}, [cells])
const undo = useCallback(() => {
setUndoStack(stack => {
if (stack.length === 0) return stack
const action = stack[stack.length - 1]
const newStack = stack.slice(0, -1)
setRedoStack(rs => [...rs, action])
setEditedTexts(prev => {
const next = new Map(prev)
next.set(action.cellId, action.oldText)
return next
})
return newStack
})
}, [])
const redo = useCallback(() => {
setRedoStack(stack => {
if (stack.length === 0) return stack
const action = stack[stack.length - 1]
const newStack = stack.slice(0, -1)
setUndoStack(us => [...us, action])
setEditedTexts(prev => {
const next = new Map(prev)
next.set(action.cellId, action.newText)
return next
})
return newStack
})
}, [])
const resetCell = useCallback((cellId: string) => {
setEditedTexts(prev => {
const next = new Map(prev)
next.delete(cellId)
return next
})
}, [])
// Keyboard shortcuts
useEffect(() => {
const handler = (e: KeyboardEvent) => {
if ((e.metaKey || e.ctrlKey) && e.key === 'z') {
e.preventDefault()
if (e.shiftKey) redo()
else undo()
}
}
document.addEventListener('keydown', handler)
return () => document.removeEventListener('keydown', handler)
}, [undo, redo])
const getDisplayText = useCallback((cell: EditableCell): string => {
return editedTexts.get(cell.cellId) ?? cell.text
}, [editedTexts])
const isEdited = useCallback((cell: EditableCell): boolean => {
const edited = editedTexts.get(cell.cellId)
return edited !== undefined && edited !== cell.originalText
}, [editedTexts])
const changedCount = useMemo(() => {
let count = 0
for (const cell of cells) {
if (isEdited(cell)) count++
}
return count
}, [cells, isEdited])
// Tab navigation
const sortedCellIds = useMemo(() => {
return [...cells]
.sort((a, b) => a.rowIndex !== b.rowIndex ? a.rowIndex - b.rowIndex : a.colIndex - b.colIndex)
.map(c => c.cellId)
}, [cells])
const handleKeyDown = useCallback((e: React.KeyboardEvent, cellId: string) => {
if (e.key === 'Tab') {
e.preventDefault()
const idx = sortedCellIds.indexOf(cellId)
const nextIdx = e.shiftKey ? idx - 1 : idx + 1
if (nextIdx >= 0 && nextIdx < sortedCellIds.length) {
const nextId = sortedCellIds[nextIdx]
const el = document.getElementById(`cell-${nextId}`)
el?.focus()
}
}
}, [sortedCellIds])
const saveReconstruction = useCallback(async () => {
if (!sessionId) return
setStatus('saving')
try {
const cellUpdates = Array.from(editedTexts.entries())
.filter(([cellId, text]) => {
const cell = cells.find(c => c.cellId === cellId)
return cell && text !== cell.originalText
})
.map(([cellId, text]) => ({ cell_id: cellId, text }))
if (cellUpdates.length === 0) {
setStatus('saved')
return
}
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ cells: cellUpdates }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
setStatus('saved')
} catch (e: unknown) {
setError(e instanceof Error ? e.message : String(e))
setStatus('error')
}
}, [sessionId, editedTexts, cells])
const dewarpedUrl = sessionId
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
: ''
// Compute median cell height (in px) for consistent font sizing
// Must be before early returns (Rules of Hooks)
const medianCellHeightPx = useMemo(() => {
const imgWVal = imageNaturalSize?.w || 1
const imgHVal = imageNaturalSize?.h || 1
const cH = reconWidth * (imgHVal / imgWVal)
if (cells.length === 0 || cH === 0) return 40
const heights = cells.map(c => cH * (c.bboxPct.h / 100)).sort((a, b) => a - b)
const mid = Math.floor(heights.length / 2)
return heights.length % 2 === 0 ? (heights[mid - 1] + heights[mid]) / 2 : heights[mid]
}, [cells, reconWidth, imageNaturalSize])
if (!sessionId) {
return <div className="text-center py-12 text-gray-400">Bitte zuerst eine Session auswaehlen.</div>
}
if (status === 'loading') {
return (
<div className="flex items-center gap-3 justify-center py-12">
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
<span className="text-gray-500">Overlay-Daten werden geladen...</span>
</div>
)
}
if (status === 'error') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4">&#x26A0;&#xFE0F;</div>
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">Fehler</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">{error}</p>
<div className="flex gap-3">
<button onClick={() => { setError(''); loadSessionData() }}
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm">
Erneut versuchen
</button>
<button onClick={onNext}
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm">
Ueberspringen &rarr;
</button>
</div>
</div>
)
}
if (status === 'saved') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4">&#x2705;</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Overlay gespeichert</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
{changedCount > 0 ? `${changedCount} Zellen wurden aktualisiert.` : 'Keine Aenderungen vorgenommen.'}
</p>
<button onClick={onNext}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
Fertig
</button>
</div>
)
}
const imgW = imageNaturalSize?.w || 1
const imgH = imageNaturalSize?.h || 1
const containerH = reconWidth * (imgH / imgW)
return (
<div className="space-y-3">
{/* Toolbar */}
<div className="flex items-center justify-between bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2">
<div className="flex items-center gap-2">
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Overlay-Rekonstruktion
</h3>
<span className="text-xs text-gray-400">
{cells.length} Zellen &middot; {changedCount} geaendert
</span>
</div>
<div className="flex items-center gap-2">
{/* Undo/Redo */}
<button
onClick={undo}
disabled={undoStack.length === 0}
className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700 disabled:opacity-30"
title="Rueckgaengig (Ctrl+Z)"
>
&#x21A9;
</button>
<button
onClick={redo}
disabled={redoStack.length === 0}
className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700 disabled:opacity-30"
title="Wiederholen (Ctrl+Shift+Z)"
>
&#x21AA;
</button>
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
{/* Font scale */}
<label className="flex items-center gap-1 text-xs text-gray-600 dark:text-gray-400">
Schrift
<input
type="range" min={30} max={120} value={Math.round(fontScale * 100)}
onChange={e => setFontScale(Number(e.target.value) / 100)}
className="w-20 h-1 accent-teal-600"
/>
<span className="w-8 text-right font-mono">{Math.round(fontScale * 100)}%</span>
</label>
<button
onClick={() => setGlobalBold(b => !b)}
className={`px-2 py-1 text-xs rounded border transition-colors font-bold ${
globalBold
? 'bg-teal-600 text-white border-teal-600'
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
}`}
>
B
</button>
<button
onClick={() => setImageRotation(r => r === 0 ? 180 : 0)}
className={`px-2 py-1 text-xs rounded border transition-colors ${
imageRotation === 180
? 'bg-teal-600 text-white border-teal-600'
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
}`}
title="Bild 180° drehen"
>
180°
</button>
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
{/* Positioning mode toggle */}
<button
onClick={() => setPositioningMode(m => m === 'slide' ? 'cluster' : 'slide')}
className={`px-2 py-1 text-xs rounded border transition-colors ${
positioningMode === 'slide'
? 'bg-orange-500 text-white border-orange-500'
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
}`}
title={positioningMode === 'slide'
? 'Slide-Modus: Woerter von links nach rechts schieben (klick fuer Cluster-Modus)'
: 'Cluster-Modus: Woerter an Pixel-Cluster zuordnen (klick fuer Slide-Modus)'}
>
{positioningMode === 'slide' ? 'Slide' : 'Cluster'}
</button>
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
{/* Text color */}
{(['red', 'blue', 'black'] as const).map(c => (
<button
key={c}
onClick={() => setTextColor(c)}
className={`w-5 h-5 rounded-full border-2 transition-colors ${
textColor === c ? 'border-teal-500 ring-1 ring-teal-300' : 'border-gray-300 dark:border-gray-600'
}`}
style={{ backgroundColor: c === 'black' ? '#1a1a1a' : c }}
title={`Textfarbe: ${c}`}
/>
))}
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
{/* Text opacity */}
<label className="flex items-center gap-1 text-xs text-gray-600 dark:text-gray-400">
Text
<input
type="range" min={0} max={100} value={textOpacity}
onChange={e => setTextOpacity(Number(e.target.value))}
className="w-16 h-1 accent-teal-600"
/>
<span className="w-8 text-right font-mono">{textOpacity}%</span>
</label>
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
<button
onClick={saveReconstruction}
disabled={status === 'saving'}
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 transition-colors font-medium"
>
Speichern
</button>
</div>
</div>
{/* True overlay: text layer on top of original image */}
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden bg-gray-50 dark:bg-gray-900">
<div
ref={reconRef}
className="relative"
style={{ aspectRatio: `${imgW} / ${imgH}` }}
>
{/* Background: original image */}
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={dewarpedUrl}
alt="Original"
className="absolute inset-0 w-full h-full object-contain"
onLoad={(e) => {
const img = e.target as HTMLImageElement
setImageNaturalSize({ w: img.naturalWidth, h: img.naturalHeight })
}}
/>
{/* Text overlay layer */}
<div
className="absolute inset-0"
style={{ opacity: textOpacity / 100 }}
>
{/* Row lines */}
{rows.map((row, i) => (
<div
key={`row-${i}`}
className="absolute left-0 right-0 border-t border-cyan-400/40"
style={{ top: `${(row.y / imgH) * 100}%` }}
/>
))}
{/* Pixel-positioned words / editable inputs */}
{cells.map((cell) => {
const displayText = getDisplayText(cell)
const edited = isEdited(cell)
const wordPos = cellWordPositions.get(cell.cellId)
const bboxPct = cell.bboxPct
const colorValue = textColor === 'black' ? '#1a1a1a' : textColor
// Pixel-analysed: render word-groups at detected positions
if (wordPos && wordPos.length > 0) {
return wordPos.map((wp, i) => {
const autoFontPx = medianCellHeightPx * wp.fontRatio * fontScale
const fs = Math.max(6, autoFontPx)
if (wordPos.length > 1) {
return (
<span
key={`${cell.cellId}_wp_${i}`}
className="absolute leading-none pointer-events-none select-none"
style={{
left: `${wp.xPct}%`,
top: `${wp.yPct}%`,
width: `${wp.wPct}%`,
height: `${wp.hPct}%`,
fontSize: `${fs}px`,
fontWeight: globalBold ? 'bold' : 'normal',
fontFamily: "'Liberation Sans', Arial, sans-serif",
display: 'flex',
alignItems: 'center',
whiteSpace: 'nowrap',
overflow: 'visible',
color: colorValue,
}}
>
{wp.text}
</span>
)
}
return (
<div key={`${cell.cellId}_wp_${i}`} className="absolute group" style={{
left: `${wp.xPct}%`,
top: `${wp.yPct}%`,
width: `${wp.wPct}%`,
height: `${wp.hPct}%`,
}}>
<input
id={`cell-${cell.cellId}`}
type="text"
value={displayText}
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${
edited ? 'bg-green-50/30' : ''
}`}
style={{
fontSize: `${fs}px`,
fontWeight: globalBold ? 'bold' : 'normal',
fontFamily: "'Liberation Sans', Arial, sans-serif",
lineHeight: '1',
color: colorValue,
}}
title={`${cell.cellId} (${cell.colType})`}
/>
{edited && (
<button
onClick={() => resetCell(cell.cellId)}
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
title="Zuruecksetzen"
>
&times;
</button>
)}
</div>
)
})
}
// Fallback: no pixel data — single input at cell bbox
if (!cell.text) return null
const fontSize = Math.max(6, medianCellHeightPx * fontScale)
return (
<div key={cell.cellId} className="absolute group" style={{
left: `${bboxPct.x}%`,
top: `${bboxPct.y}%`,
width: `${bboxPct.w}%`,
height: `${bboxPct.h}%`,
}}>
<input
id={`cell-${cell.cellId}`}
type="text"
value={displayText}
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${
edited ? 'bg-green-50/30' : ''
}`}
style={{
fontSize: `${fontSize}px`,
fontWeight: globalBold ? 'bold' : 'normal',
fontFamily: "'Liberation Sans', Arial, sans-serif",
lineHeight: '1',
color: colorValue,
}}
title={`${cell.cellId} (${cell.colType})`}
/>
{edited && (
<button
onClick={() => resetCell(cell.cellId)}
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
title="Zuruecksetzen"
>
&times;
</button>
)}
</div>
)
})}
</div>
</div>
</div>
{/* Bottom action */}
<div className="flex justify-end">
<button
onClick={() => {
if (changedCount > 0) {
saveReconstruction()
} else {
onNext()
}
}}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium text-sm"
>
{changedCount > 0 ? 'Speichern & Fertig' : 'Fertig'}
</button>
</div>
</div>
)
}

View File

@@ -1,153 +0,0 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import { OverlayReconstruction } from './OverlayReconstruction'
const KLAUSUR_API = '/klausur-api'
type Phase = 'idle' | 'running' | 'overlay'
interface PaddleDirectStepProps {
sessionId: string | null
onNext: () => void
/** Backend endpoint suffix, default: 'paddle-direct' */
endpoint?: string
/** Title shown in idle state */
title?: string
/** Description shown in idle state */
description?: string
/** Icon shown in idle state */
icon?: string
/** Button label */
buttonLabel?: string
/** Running label */
runningLabel?: string
/** OCR engine key to check for auto-detect */
engineKey?: string
}
export function PaddleDirectStep({
sessionId,
onNext,
endpoint = 'paddle-direct',
title = 'PP-OCRv5 Direct',
description = 'PP-OCRv5 (lokal via RapidOCR) erkennt alle Woerter direkt auf dem Originalbild — ohne Begradigung, Entzerrung oder Zuschnitt.',
icon = '⚡',
buttonLabel = 'PP-OCRv5 starten',
runningLabel = 'PP-OCRv5 laeuft...',
engineKey = 'paddle_direct',
}: PaddleDirectStepProps) {
const [phase, setPhase] = useState<Phase>('idle')
const [error, setError] = useState<string | null>(null)
const [stats, setStats] = useState<{ cells: number; rows: number; duration: number } | null>(null)
// Auto-detect: if session already has matching word_result → show overlay
useEffect(() => {
if (!sessionId) return
let cancelled = false
;(async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (!res.ok || cancelled) return
const data = await res.json()
if (data.word_result?.ocr_engine === engineKey) {
setPhase('overlay')
}
} catch {
// ignore
}
})()
return () => { cancelled = true }
}, [sessionId, engineKey])
const runOcr = useCallback(async () => {
if (!sessionId) return
setPhase('running')
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/${endpoint}`, {
method: 'POST',
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
const data = await res.json()
setStats({
cells: data.summary?.total_cells || 0,
rows: data.grid_shape?.rows || 0,
duration: data.duration_seconds || 0,
})
setPhase('overlay')
} catch (e: unknown) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
setPhase('idle')
}
}, [sessionId, endpoint])
if (!sessionId) {
return (
<div className="text-sm text-gray-400 py-8 text-center">
Bitte zuerst ein Bild hochladen.
</div>
)
}
if (phase === 'overlay') {
return (
<div className="space-y-3">
{stats && (
<div className="flex items-center gap-4 text-xs text-gray-500 dark:text-gray-400">
<span>{stats.cells} Woerter erkannt</span>
<span>{stats.rows} Zeilen</span>
<span>{stats.duration.toFixed(1)}s</span>
</div>
)}
<OverlayReconstruction sessionId={sessionId} onNext={onNext} />
</div>
)
}
return (
<div className="flex flex-col items-center justify-center py-16 space-y-6">
{phase === 'running' ? (
<>
<div className="w-10 h-10 border-4 border-teal-200 dark:border-teal-800 border-t-teal-600 dark:border-t-teal-400 rounded-full animate-spin" />
<div className="text-center space-y-1">
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">
{runningLabel}
</p>
<p className="text-xs text-gray-400">
Bild wird analysiert (ca. 5-30s)
</p>
</div>
</>
) : (
<>
<div className="text-center space-y-2">
<div className="text-4xl">{icon}</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300">
{title}
</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-md">
{description}
</p>
</div>
{error && (
<div className="text-sm text-red-500 bg-red-50 dark:bg-red-900/20 px-4 py-2 rounded-lg">
{error}
</div>
)}
<button
onClick={runOcr}
className="px-6 py-2.5 bg-teal-600 text-white text-sm font-medium rounded-lg hover:bg-teal-700 transition-colors"
>
{buttonLabel}
</button>
</>
)}
</div>
)
}

View File

@@ -1,253 +0,0 @@
import { useEffect, useState } from 'react'
import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
export interface WordPosition {
xPct: number
wPct: number
yPct: number
hPct: number
text: string
fontRatio: number
}
/**
* Analyse dark-pixel clusters on an image to determine
* the exact horizontal position & auto-font-size of word groups in each cell.
*
* When rotation=180, the image is rotated 180° before pixel analysis.
* Cell coordinates are transformed to the rotated space for reading,
* and cluster positions are mirrored back to the original coordinate system.
*
* Returns a Map<cell_id, WordPosition[]>.
*/
export function usePixelWordPositions(
imageUrl: string,
cells: GridCell[],
active: boolean,
rotation: 0 | 180 = 0,
): Map<string, WordPosition[]> {
const [cellWordPositions, setCellWordPositions] = useState<Map<string, WordPosition[]>>(new Map())
useEffect(() => {
if (!active || cells.length === 0 || !imageUrl) return
const img = new Image()
img.crossOrigin = 'anonymous'
img.onload = () => {
const imgW = img.naturalWidth
const imgH = img.naturalHeight
const canvas = document.createElement('canvas')
canvas.width = imgW
canvas.height = imgH
const ctx = canvas.getContext('2d')
if (!ctx) return
if (rotation === 180) {
ctx.translate(imgW, imgH)
ctx.rotate(Math.PI)
ctx.drawImage(img, 0, 0)
ctx.setTransform(1, 0, 0, 1, 0, 0)
} else {
ctx.drawImage(img, 0, 0)
}
const refFontSize = 40
const fontFam = "'Liberation Sans', Arial, sans-serif"
ctx.font = `${refFontSize}px ${fontFam}`
const positions = new Map<string, WordPosition[]>()
for (const cell of cells) {
if (!cell.bbox_pct || !cell.text) continue
const rawGroups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
// Merge single-char symbol groups (OCR artifacts from box borders like "|", ">")
// with their neighbour to avoid polluting the cluster-to-group matching
const groups: string[] = []
for (let gi = 0; gi < rawGroups.length; gi++) {
const g = rawGroups[gi]
const isArtifact = g.length <= 2 && !/[a-zA-Z0-9\u00C0-\u024F]/.test(g)
if (isArtifact) {
if (gi + 1 < rawGroups.length) {
// merge with next group
rawGroups[gi + 1] = g + ' ' + rawGroups[gi + 1]
} else if (groups.length > 0) {
// last group — merge with previous
groups[groups.length - 1] += ' ' + g
} else {
groups.push(g)
}
} else {
groups.push(g)
}
}
let cx: number, cy: number
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
if (rotation === 180) {
cx = Math.round((100 - cell.bbox_pct.x - cell.bbox_pct.w) / 100 * imgW)
cy = Math.round((100 - cell.bbox_pct.y - cell.bbox_pct.h) / 100 * imgH)
} else {
cx = Math.round(cell.bbox_pct.x / 100 * imgW)
cy = Math.round(cell.bbox_pct.y / 100 * imgH)
}
if (cw <= 0 || ch <= 0) continue
if (cx < 0) cx = 0
if (cy < 0) cy = 0
if (cx + cw > imgW || cy + ch > imgH) continue
const imageData = ctx.getImageData(cx, cy, cw, ch)
const proj = new Float32Array(cw)
for (let y = 0; y < ch; y++) {
for (let x = 0; x < cw; x++) {
const idx = (y * cw + x) * 4
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
if (lum < 128) proj[x]++
}
}
const threshold = Math.max(1, ch * 0.03)
const minGap = Math.max(5, Math.round(cw * 0.02))
let clusters: { start: number; end: number }[] = []
let inCluster = false
let clStart = 0
let gap = 0
for (let x = 0; x < cw; x++) {
if (proj[x] >= threshold) {
if (!inCluster) { clStart = x; inCluster = true }
gap = 0
} else if (inCluster) {
gap++
if (gap > minGap) {
clusters.push({ start: clStart, end: x - gap })
inCluster = false
gap = 0
}
}
}
if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
if (clusters.length === 0) continue
// Filter out very narrow clusters (likely box borders / vertical lines)
const minClusterW = Math.max(3, Math.round(cw * 0.005))
clusters = clusters.filter(c => (c.end - c.start + 1) > minClusterW)
if (clusters.length === 0) continue
if (rotation === 180) {
clusters = clusters.map(c => ({
start: cw - 1 - c.end,
end: cw - 1 - c.start,
})).reverse()
}
const wordPos: WordPosition[] = []
// Match groups to clusters using width-proportional assignment.
// Each group is assigned to the cluster whose width best matches
// the group's expected pixel width (text measurement).
if (groups.length > 1 && clusters.length >= groups.length) {
// Measure each group's expected width
const groupWidths = groups.map(g => ctx.measureText(g).width)
// Greedy assignment: for each group (in order), find the best
// unassigned cluster by width ratio consistency
const totalMeasured = groupWidths.reduce((a, b) => a + b, 0)
const totalClusterW = clusters.reduce((a, c) => a + (c.end - c.start + 1), 0)
const refScale = totalClusterW / totalMeasured
const used = new Set<number>()
const assignments: number[] = []
for (let gi = 0; gi < groups.length; gi++) {
const expectedW = groupWidths[gi] * refScale
let bestIdx = -1
let bestDiff = Infinity
for (let ci = 0; ci < clusters.length; ci++) {
if (used.has(ci)) continue
const clW = clusters[ci].end - clusters[ci].start + 1
const diff = Math.abs(clW - expectedW)
if (diff < bestDiff) {
bestDiff = diff
bestIdx = ci
}
}
used.add(bestIdx)
assignments.push(bestIdx)
}
// Sort assignments to maintain left-to-right order
const sortedPairs = assignments
.map((ci, gi) => ({ ci, gi }))
.sort((a, b) => clusters[a.ci].start - clusters[b.ci].start)
for (const { ci, gi } of sortedPairs) {
const cl = clusters[ci]
const clusterW = cl.end - cl.start + 1
const autoFontPx = refFontSize * (clusterW / groupWidths[gi])
const fontRatio = Math.min(autoFontPx / ch, 1.0)
wordPos.push({
xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
yPct: cell.bbox_pct.y,
hPct: cell.bbox_pct.h,
text: groups[gi],
fontRatio,
})
}
} else {
// Single group OR not enough clusters:
// use the WIDEST cluster (not first-to-last span which pulls in
// stray pixels from adjacent page areas like box borders)
const widest = clusters.reduce((best, c) =>
(c.end - c.start) > (best.end - best.start) ? c : best, clusters[0])
const clusterW = widest.end - widest.start + 1
const measured = ctx.measureText(cell.text.trim())
const autoFontPx = refFontSize * (clusterW / measured.width)
const fontRatio = Math.min(autoFontPx / ch, 1.0)
wordPos.push({
xPct: cell.bbox_pct.x + (widest.start / cw) * cell.bbox_pct.w,
wPct: ((widest.end - widest.start + 1) / cw) * cell.bbox_pct.w,
yPct: cell.bbox_pct.y,
hPct: cell.bbox_pct.h,
text: cell.text.trim(),
fontRatio,
})
}
positions.set(cell.cell_id, wordPos)
}
// Normalise: find the most common fontRatio (mode) and apply it to all
const allRatios: number[] = []
for (const wps of positions.values()) {
for (const wp of wps) allRatios.push(wp.fontRatio)
}
if (allRatios.length > 0) {
const buckets = new Map<number, number>()
for (const r of allRatios) {
const key = Math.round(r * 50) / 50
buckets.set(key, (buckets.get(key) || 0) + 1)
}
let modeRatio = allRatios[0]
let modeCount = 0
for (const [ratio, count] of buckets) {
if (count > modeCount) { modeRatio = ratio; modeCount = count }
}
for (const wps of positions.values()) {
for (const wp of wps) wp.fontRatio = modeRatio
}
}
setCellWordPositions(positions)
}
img.src = imageUrl
}, [active, cells, imageUrl, rotation])
return cellWordPositions
}

View File

@@ -1,231 +0,0 @@
import { useEffect, useState } from 'react'
import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
export interface WordPosition {
xPct: number
wPct: number
yPct: number
hPct: number
text: string
fontRatio: number
}
/**
* "Slide from left" positioning using OCR word bounding boxes.
*
* TEXT comes from cell.text (cleaned, IPA-corrected).
* POSITIONS come from word_boxes (exact OCR coordinates).
*
* Tokens from cell.text are matched 1:1 (in order) to word_boxes
* sorted left-to-right. This guarantees:
* - ALL words from cell.text appear (no dropping)
* - Words preserve their reading order
* - Each word lands on its correct black-text position
* - No red words overlap each other
*
* If token count != box count, extra tokens get estimated positions
* (spread across remaining space).
*
* Fallback: pixel-projection slide if no word_boxes available.
*/
export function useSlideWordPositions(
imageUrl: string,
cells: GridCell[],
active: boolean,
rotation: 0 | 180 = 0,
): Map<string, WordPosition[]> {
const [result, setResult] = useState<Map<string, WordPosition[]>>(new Map())
useEffect(() => {
if (!active || cells.length === 0 || !imageUrl) return
const img = new Image()
img.crossOrigin = 'anonymous'
img.onload = () => {
const imgW = img.naturalWidth
const imgH = img.naturalHeight
const hasWordBoxes = cells.some(c => c.word_boxes && c.word_boxes.length > 0)
if (hasWordBoxes) {
// --- WORD-BOX PATH: use OCR positions directly ---
// Each word_box already has exact coordinates from OCR.
// Use them as-is — no fuzzy matching needed.
const positions = new Map<string, WordPosition[]>()
for (const cell of cells) {
if (!cell.bbox_pct || !cell.text) continue
const boxes = (cell.word_boxes || [])
.filter(wb => wb.text.trim())
.sort((a, b) => a.left - b.left)
if (boxes.length === 0) {
// No word_boxes — spread tokens evenly across cell
const tokens = cell.text.split(/\s+/).filter(Boolean)
if (tokens.length === 0) continue
const fallbackW = cell.bbox_pct.w / tokens.length
const wordPos = tokens.map((t, i) => ({
xPct: cell.bbox_pct.x + i * fallbackW,
wPct: fallbackW,
yPct: cell.bbox_pct.y,
hPct: cell.bbox_pct.h,
text: t,
fontRatio: 1.0,
}))
positions.set(cell.cell_id, wordPos)
continue
}
// Use each word_box directly with its OCR coordinates
const wordPos: WordPosition[] = boxes.map(box => ({
xPct: (box.left / imgW) * 100,
wPct: (box.width / imgW) * 100,
yPct: (box.top / imgH) * 100,
hPct: (box.height / imgH) * 100,
text: box.text,
fontRatio: 1.0,
}))
if (wordPos.length > 0) {
positions.set(cell.cell_id, wordPos)
}
}
setResult(positions)
return
}
// --- FALLBACK: pixel-projection slide (no word_boxes) ---
const canvas = document.createElement('canvas')
canvas.width = imgW
canvas.height = imgH
const ctx = canvas.getContext('2d')
if (!ctx) return
if (rotation === 180) {
ctx.translate(imgW, imgH)
ctx.rotate(Math.PI)
ctx.drawImage(img, 0, 0)
ctx.setTransform(1, 0, 0, 1, 0, 0)
} else {
ctx.drawImage(img, 0, 0)
}
const refFontSize = 40
const fontFam = "'Liberation Sans', Arial, sans-serif"
ctx.font = `${refFontSize}px ${fontFam}`
const cellHeights = cells
.filter(c => c.bbox_pct && c.bbox_pct.h > 0)
.map(c => Math.round(c.bbox_pct.h / 100 * imgH))
.sort((a, b) => a - b)
const medianCh = cellHeights.length > 0
? cellHeights[Math.floor(cellHeights.length / 2)]
: 30
const renderedFontImgPx = medianCh * 0.7
const measureScale = renderedFontImgPx / refFontSize
const spaceWidthPx = Math.max(2, Math.round(ctx.measureText(' ').width * measureScale))
const positions = new Map<string, WordPosition[]>()
for (const cell of cells) {
if (!cell.bbox_pct || !cell.text) continue
let cx: number, cy: number
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
if (rotation === 180) {
cx = Math.round((100 - cell.bbox_pct.x - cell.bbox_pct.w) / 100 * imgW)
cy = Math.round((100 - cell.bbox_pct.y - cell.bbox_pct.h) / 100 * imgH)
} else {
cx = Math.round(cell.bbox_pct.x / 100 * imgW)
cy = Math.round(cell.bbox_pct.y / 100 * imgH)
}
if (cw <= 0 || ch <= 0) continue
if (cx < 0) cx = 0
if (cy < 0) cy = 0
if (cx + cw > imgW || cy + ch > imgH) continue
const imageData = ctx.getImageData(cx, cy, cw, ch)
const proj = new Float32Array(cw)
for (let y = 0; y < ch; y++) {
for (let x = 0; x < cw; x++) {
const idx = (y * cw + x) * 4
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
if (lum < 128) proj[x]++
}
}
const threshold = Math.max(1, ch * 0.03)
const ink = new Uint8Array(cw)
for (let x = 0; x < cw; x++) {
ink[x] = proj[x] >= threshold ? 1 : 0
}
if (rotation === 180) {
ink.reverse()
}
const tokens = cell.text.split(/\s+/).filter(Boolean)
if (tokens.length === 0) continue
const tokenWidthsPx = tokens.map(t =>
Math.max(4, Math.round(ctx.measureText(t).width * measureScale))
)
const wordPos: WordPosition[] = []
let cursor = 0
for (let ti = 0; ti < tokens.length; ti++) {
const tokenW = tokenWidthsPx[ti]
const coverageNeeded = Math.max(1, Math.round(tokenW * 0.15))
let bestX = cursor
const searchLimit = Math.max(cursor, cw - tokenW)
for (let x = cursor; x <= searchLimit; x++) {
let inkCount = 0
const spanEnd = Math.min(x + tokenW, cw)
for (let dx = 0; dx < spanEnd - x; dx++) {
inkCount += ink[x + dx]
}
if (inkCount >= coverageNeeded) {
bestX = x
break
}
if (x > cursor + cw * 0.3 && ti > 0) {
bestX = cursor
break
}
}
if (bestX + tokenW > cw) {
bestX = Math.max(0, cw - tokenW)
}
wordPos.push({
xPct: cell.bbox_pct.x + (bestX / cw) * cell.bbox_pct.w,
wPct: (tokenW / cw) * cell.bbox_pct.w,
yPct: cell.bbox_pct.y,
hPct: cell.bbox_pct.h,
text: tokens[ti],
fontRatio: 1.0,
})
cursor = bestX + tokenW + spaceWidthPx
}
if (wordPos.length > 0) {
positions.set(cell.cell_id, wordPos)
}
}
setResult(positions)
}
img.src = imageUrl
}, [active, cells, imageUrl, rotation])
return result
}

View File

@@ -1,68 +0,0 @@
'use client'
import type { SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
interface BoxSessionTabsProps {
parentSessionId: string
subSessions: SubSession[]
activeSessionId: string
onSessionChange: (sessionId: string) => void
}
const STATUS_ICONS: Record<string, string> = {
pending: '\u23F3', // hourglass
processing: '\uD83D\uDD04', // arrows
completed: '\u2713', // checkmark
}
function getStatusIcon(sub: SubSession): string {
if (sub.status === 'completed' || (sub.current_step && sub.current_step >= 9)) return STATUS_ICONS.completed
if (sub.current_step && sub.current_step > 1) return STATUS_ICONS.processing
return STATUS_ICONS.pending
}
export function BoxSessionTabs({ parentSessionId, subSessions, activeSessionId, onSessionChange }: BoxSessionTabsProps) {
if (subSessions.length === 0) return null
const isParentActive = activeSessionId === parentSessionId
return (
<div className="flex items-center gap-1.5 px-1 py-1.5 bg-gray-50 dark:bg-gray-800/50 rounded-xl border border-gray-200 dark:border-gray-700">
{/* Main session tab */}
<button
onClick={() => onSessionChange(parentSessionId)}
className={`px-3 py-1.5 rounded-lg text-xs font-medium transition-colors ${
isParentActive
? 'bg-white dark:bg-gray-700 text-teal-700 dark:text-teal-400 shadow-sm ring-1 ring-teal-300 dark:ring-teal-600'
: 'text-gray-500 dark:text-gray-400 hover:bg-white/50 dark:hover:bg-gray-700/50'
}`}
>
Hauptseite
</button>
<div className="w-px h-5 bg-gray-200 dark:bg-gray-700" />
{/* Sub-session tabs */}
{subSessions.map((sub) => {
const isActive = activeSessionId === sub.id
const icon = getStatusIcon(sub)
return (
<button
key={sub.id}
onClick={() => onSessionChange(sub.id)}
className={`px-3 py-1.5 rounded-lg text-xs font-medium transition-colors ${
isActive
? 'bg-white dark:bg-gray-700 text-teal-700 dark:text-teal-400 shadow-sm ring-1 ring-teal-300 dark:ring-teal-600'
: 'text-gray-500 dark:text-gray-400 hover:bg-white/50 dark:hover:bg-gray-700/50'
}`}
title={sub.name}
>
<span className="mr-1">{icon}</span>
Box {sub.box_index + 1}
</button>
)
})}
</div>
)
}

View File

@@ -1,320 +0,0 @@
'use client'
import { useState, useMemo } from 'react'
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
interface ColumnControlsProps {
columnResult: ColumnResult | null
onRerun: () => void
onManualMode: () => void
onGtMode: () => void
onGroundTruth: (gt: ColumnGroundTruth) => void
onNext: () => void
isDetecting: boolean
savedGtColumns: PageRegion[] | null
}
const TYPE_COLORS: Record<string, string> = {
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
header: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
footer: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
}
const TYPE_LABELS: Record<string, string> = {
column_en: 'EN',
column_de: 'DE',
column_example: 'Beispiel',
column_text: 'Text',
page_ref: 'Seite',
column_marker: 'Marker',
column_ignore: 'Ignorieren',
header: 'Header',
footer: 'Footer',
}
const METHOD_LABELS: Record<string, string> = {
content: 'Inhalt',
position_enhanced: 'Position',
position_fallback: 'Fallback',
}
interface DiffRow {
index: number
autoCol: PageRegion | null
gtCol: PageRegion | null
diffX: number | null
diffW: number | null
typeMismatch: boolean
}
/** Match auto columns to GT columns by overlap on X-axis (IoU > 50%) */
function computeDiff(autoCols: PageRegion[], gtCols: PageRegion[]): DiffRow[] {
const rows: DiffRow[] = []
const usedGt = new Set<number>()
const usedAuto = new Set<number>()
// Match auto → GT by best X-axis overlap
for (let ai = 0; ai < autoCols.length; ai++) {
const a = autoCols[ai]
let bestIdx = -1
let bestIoU = 0
for (let gi = 0; gi < gtCols.length; gi++) {
if (usedGt.has(gi)) continue
const g = gtCols[gi]
const overlapStart = Math.max(a.x, g.x)
const overlapEnd = Math.min(a.x + a.width, g.x + g.width)
const overlap = Math.max(0, overlapEnd - overlapStart)
const union = (a.width + g.width) - overlap
const iou = union > 0 ? overlap / union : 0
if (iou > bestIoU) {
bestIoU = iou
bestIdx = gi
}
}
if (bestIdx >= 0 && bestIoU > 0.3) {
usedGt.add(bestIdx)
usedAuto.add(ai)
const g = gtCols[bestIdx]
rows.push({
index: rows.length + 1,
autoCol: a,
gtCol: g,
diffX: g.x - a.x,
diffW: g.width - a.width,
typeMismatch: a.type !== g.type,
})
}
}
// Unmatched auto columns
for (let ai = 0; ai < autoCols.length; ai++) {
if (usedAuto.has(ai)) continue
rows.push({
index: rows.length + 1,
autoCol: autoCols[ai],
gtCol: null,
diffX: null,
diffW: null,
typeMismatch: false,
})
}
// Unmatched GT columns
for (let gi = 0; gi < gtCols.length; gi++) {
if (usedGt.has(gi)) continue
rows.push({
index: rows.length + 1,
autoCol: null,
gtCol: gtCols[gi],
diffX: null,
diffW: null,
typeMismatch: false,
})
}
return rows
}
export function ColumnControls({ columnResult, onRerun, onManualMode, onGtMode, onGroundTruth, onNext, isDetecting, savedGtColumns }: ColumnControlsProps) {
const [gtSaved, setGtSaved] = useState(false)
const diffRows = useMemo(() => {
if (!columnResult || !savedGtColumns) return null
const autoCols = columnResult.columns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
const gtCols = savedGtColumns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
return computeDiff(autoCols, gtCols)
}, [columnResult, savedGtColumns])
if (!columnResult) return null
const columns = columnResult.columns.filter((c: PageRegion) => c.type.startsWith('column') || c.type === 'page_ref')
const headerFooter = columnResult.columns.filter((c: PageRegion) => !c.type.startsWith('column') && c.type !== 'page_ref')
const handleGt = (isCorrect: boolean) => {
onGroundTruth({ is_correct: isCorrect })
setGtSaved(true)
}
return (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-4">
{/* Summary */}
<div className="flex items-center gap-3 flex-wrap">
<div className="text-sm text-gray-600 dark:text-gray-400">
<span className="font-medium text-gray-800 dark:text-gray-200">{columns.length} Spalten</span> erkannt
{columnResult.duration_seconds > 0 && (
<span className="ml-2 text-xs">({columnResult.duration_seconds}s)</span>
)}
</div>
<button
onClick={onRerun}
disabled={isDetecting}
className="text-xs px-2 py-1 bg-gray-100 dark:bg-gray-700 rounded hover:bg-gray-200 dark:hover:bg-gray-600 transition-colors disabled:opacity-50"
>
Erneut erkennen
</button>
<button
onClick={onManualMode}
className="text-xs px-2 py-1 bg-teal-100 text-teal-700 dark:bg-teal-900/30 dark:text-teal-400 rounded hover:bg-teal-200 dark:hover:bg-teal-900/50 transition-colors"
>
Manuell markieren
</button>
<button
onClick={onGtMode}
className="text-xs px-2 py-1 bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400 rounded hover:bg-amber-200 dark:hover:bg-amber-900/50 transition-colors"
>
{savedGtColumns ? 'Ground Truth bearbeiten' : 'Ground Truth eintragen'}
</button>
</div>
{/* Column list */}
<div className="space-y-2">
{columns.map((col: PageRegion, i: number) => (
<div key={i} className="flex items-center gap-3 text-sm">
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[col.type] || ''}`}>
{TYPE_LABELS[col.type] || col.type}
</span>
{col.classification_confidence != null && col.classification_confidence < 1.0 && (
<span className="text-xs font-medium text-gray-600 dark:text-gray-300">
{Math.round(col.classification_confidence * 100)}%
</span>
)}
{col.classification_method && (
<span className="text-xs text-gray-400 dark:text-gray-500">
({METHOD_LABELS[col.classification_method] || col.classification_method})
</span>
)}
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
x={col.x} y={col.y} {col.width}x{col.height}px
</span>
</div>
))}
{headerFooter.map((r: PageRegion, i: number) => (
<div key={`hf-${i}`} className="flex items-center gap-3 text-sm">
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[r.type] || ''}`}>
{TYPE_LABELS[r.type] || r.type}
</span>
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
x={r.x} y={r.y} {r.width}x{r.height}px
</span>
</div>
))}
</div>
{/* Diff table (Auto vs GT) */}
{diffRows && diffRows.length > 0 && (
<div className="border-t border-gray-100 dark:border-gray-700 pt-3">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
Vergleich: Auto vs Ground Truth
</div>
<div className="overflow-x-auto">
<table className="w-full text-xs">
<thead>
<tr className="text-gray-500 dark:text-gray-400 border-b border-gray-100 dark:border-gray-700">
<th className="text-left py-1 pr-2">#</th>
<th className="text-left py-1 pr-2">Auto (Typ, x, w)</th>
<th className="text-left py-1 pr-2">GT (Typ, x, w)</th>
<th className="text-right py-1 pr-2">Diff X</th>
<th className="text-right py-1">Diff W</th>
</tr>
</thead>
<tbody>
{diffRows.map((row) => (
<tr
key={row.index}
className={
!row.autoCol || !row.gtCol || row.typeMismatch
? 'bg-red-50 dark:bg-red-900/10'
: (row.diffX !== null && Math.abs(row.diffX) > 20) || (row.diffW !== null && Math.abs(row.diffW) > 20)
? 'bg-amber-50 dark:bg-amber-900/10'
: ''
}
>
<td className="py-1 pr-2 font-mono text-gray-400">{row.index}</td>
<td className="py-1 pr-2 font-mono">
{row.autoCol ? (
<span>
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.autoCol.type] || ''}`}>
{TYPE_LABELS[row.autoCol.type] || row.autoCol.type}
</span>
{' '}{row.autoCol.x}, {row.autoCol.width}
</span>
) : (
<span className="text-red-400">fehlt</span>
)}
</td>
<td className="py-1 pr-2 font-mono">
{row.gtCol ? (
<span>
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.gtCol.type] || ''}`}>
{TYPE_LABELS[row.gtCol.type] || row.gtCol.type}
</span>
{' '}{row.gtCol.x}, {row.gtCol.width}
</span>
) : (
<span className="text-red-400">fehlt</span>
)}
</td>
<td className="py-1 pr-2 text-right font-mono">
{row.diffX !== null ? (
<span className={Math.abs(row.diffX) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
{row.diffX > 0 ? '+' : ''}{row.diffX}
</span>
) : '—'}
</td>
<td className="py-1 text-right font-mono">
{row.diffW !== null ? (
<span className={Math.abs(row.diffW) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
{row.diffW > 0 ? '+' : ''}{row.diffW}
</span>
) : '—'}
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)}
{/* Ground Truth + Navigation */}
<div className="flex items-center justify-between pt-2 border-t border-gray-100 dark:border-gray-700">
<div className="flex items-center gap-2">
<span className="text-sm text-gray-500 dark:text-gray-400">Spalten korrekt?</span>
{gtSaved ? (
<span className="text-xs text-green-600 dark:text-green-400">Gespeichert</span>
) : (
<>
<button
onClick={() => handleGt(true)}
className="text-xs px-3 py-1 bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400 rounded hover:bg-green-200 dark:hover:bg-green-900/50 transition-colors"
>
Ja
</button>
<button
onClick={() => handleGt(false)}
className="text-xs px-3 py-1 bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400 rounded hover:bg-red-200 dark:hover:bg-red-900/50 transition-colors"
>
Nein
</button>
</>
)}
</div>
<button
onClick={onNext}
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium"
>
Weiter
</button>
</div>
</div>
)
}

View File

@@ -1,209 +0,0 @@
'use client'
import { useState } from 'react'
import type { DeskewResult, DeskewGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
interface DeskewControlsProps {
deskewResult: DeskewResult | null
showBinarized: boolean
onToggleBinarized: () => void
showGrid: boolean
onToggleGrid: () => void
onManualDeskew: (angle: number) => void
onGroundTruth: (gt: DeskewGroundTruth) => void
onNext: () => void
isApplying: boolean
}
const METHOD_LABELS: Record<string, string> = {
hough: 'Hough-Linien',
word_alignment: 'Wortausrichtung',
manual: 'Manuell',
}
export function DeskewControls({
deskewResult,
showBinarized,
onToggleBinarized,
showGrid,
onToggleGrid,
onManualDeskew,
onGroundTruth,
onNext,
isApplying,
}: DeskewControlsProps) {
const [manualAngle, setManualAngle] = useState(0)
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
const [gtNotes, setGtNotes] = useState('')
const [gtSaved, setGtSaved] = useState(false)
const handleGroundTruth = (isCorrect: boolean) => {
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
if (isCorrect) {
onGroundTruth({ is_correct: true })
setGtSaved(true)
}
}
const handleGroundTruthIncorrect = () => {
onGroundTruth({
is_correct: false,
corrected_angle: manualAngle !== 0 ? manualAngle : undefined,
notes: gtNotes || undefined,
})
setGtSaved(true)
}
return (
<div className="space-y-4">
{/* Results */}
{deskewResult && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="flex flex-wrap items-center gap-3 text-sm">
<div>
<span className="text-gray-500">Winkel:</span>{' '}
<span className="font-mono font-medium">{deskewResult.angle_applied}°</span>
</div>
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
<div>
<span className="text-gray-500">Methode:</span>{' '}
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
{METHOD_LABELS[deskewResult.method_used] || deskewResult.method_used}
</span>
</div>
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
<div>
<span className="text-gray-500">Konfidenz:</span>{' '}
<span className="font-mono">{Math.round(deskewResult.confidence * 100)}%</span>
</div>
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
<div className="text-gray-400 text-xs">
Hough: {deskewResult.angle_hough}° | WA: {deskewResult.angle_word_alignment}°
</div>
</div>
{/* Toggles */}
<div className="flex gap-3 mt-3">
<button
onClick={onToggleBinarized}
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
showBinarized
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
}`}
>
Binarisiert anzeigen
</button>
<button
onClick={onToggleGrid}
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
showGrid
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
}`}
>
Raster anzeigen
</button>
</div>
</div>
)}
{/* Manual angle */}
{deskewResult && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Manuelle Korrektur</div>
<div className="flex items-center gap-3">
<span className="text-xs text-gray-400 w-8 text-right">-5°</span>
<input
type="range"
min={-5}
max={5}
step={0.1}
value={manualAngle}
onChange={(e) => setManualAngle(parseFloat(e.target.value))}
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
/>
<span className="text-xs text-gray-400 w-8">+5°</span>
<span className="font-mono text-sm w-14 text-right">{manualAngle.toFixed(1)}°</span>
<button
onClick={() => onManualDeskew(manualAngle)}
disabled={isApplying}
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
>
{isApplying ? '...' : 'Anwenden'}
</button>
</div>
</div>
)}
{/* Ground Truth */}
{deskewResult && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
Rotation korrekt?
</div>
<p className="text-xs text-gray-400 mb-2">Nur die Drehung bewerten Woelbung/Verzerrung wird im naechsten Schritt korrigiert.</p>
{!gtSaved ? (
<div className="space-y-3">
<div className="flex gap-2">
<button
onClick={() => handleGroundTruth(true)}
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
gtFeedback === 'correct'
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
}`}
>
Ja
</button>
<button
onClick={() => handleGroundTruth(false)}
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
gtFeedback === 'incorrect'
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
}`}
>
Nein
</button>
</div>
{gtFeedback === 'incorrect' && (
<div className="space-y-2">
<textarea
value={gtNotes}
onChange={(e) => setGtNotes(e.target.value)}
placeholder="Notizen zur Korrektur..."
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
rows={2}
/>
<button
onClick={handleGroundTruthIncorrect}
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
>
Feedback speichern
</button>
</div>
)}
</div>
) : (
<div className="text-sm text-green-600 dark:text-green-400">
Feedback gespeichert
</div>
)}
</div>
)}
{/* Next button */}
{deskewResult && (
<div className="flex justify-end">
<button
onClick={onNext}
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
>
Uebernehmen & Weiter &rarr;
</button>
</div>
)}
</div>
)
}

View File

@@ -1,553 +0,0 @@
'use client'
import { useEffect, useState } from 'react'
import type { DeskewResult, DewarpResult, DewarpDetection, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
interface DewarpControlsProps {
dewarpResult: DewarpResult | null
deskewResult?: DeskewResult | null
showGrid: boolean
onToggleGrid: () => void
onManualDewarp: (shearDegrees: number) => void
onCombinedAdjust?: (rotationDegrees: number, shearDegrees: number) => void
onGroundTruth: (gt: DewarpGroundTruth) => void
onNext: () => void
isApplying: boolean
}
const METHOD_LABELS: Record<string, string> = {
vertical_edge: 'A: Vertikale Kanten',
projection: 'B: Projektions-Varianz',
hough_lines: 'C: Hough-Linien',
text_lines: 'D: Textzeilenanalyse',
manual: 'Manuell',
manual_combined: 'Manuell (kombiniert)',
none: 'Keine Korrektur',
}
const SHEAR_METHOD_KEYS = ['vertical_edge', 'projection', 'hough_lines', 'text_lines'] as const
/** Colour for a confidence value (0-1). */
function confColor(conf: number): string {
if (conf >= 0.7) return 'text-green-600 dark:text-green-400'
if (conf >= 0.5) return 'text-yellow-600 dark:text-yellow-400'
return 'text-gray-400'
}
/** Short confidence bar (visual). */
function ConfBar({ value }: { value: number }) {
const pct = Math.round(value * 100)
const bg = value >= 0.7 ? 'bg-green-500' : value >= 0.5 ? 'bg-yellow-500' : 'bg-gray-400'
return (
<div className="flex items-center gap-1.5">
<div className="w-16 h-1.5 bg-gray-200 dark:bg-gray-700 rounded-full overflow-hidden">
<div className={`h-full rounded-full ${bg}`} style={{ width: `${pct}%` }} />
</div>
<span className={`text-xs font-mono ${confColor(value)}`}>{pct}%</span>
</div>
)
}
/** A single slider row for fine-tuning. */
function FineTuneSlider({
label,
value,
onChange,
min,
max,
step,
unit = '\u00B0',
radioName,
radioChecked,
onRadioChange,
}: {
label: string
value: number
onChange: (v: number) => void
min: number
max: number
step: number
unit?: string
radioName?: string
radioChecked?: boolean
onRadioChange?: () => void
}) {
return (
<div className="flex items-center gap-2">
{radioName !== undefined && (
<input
type="radio"
name={radioName}
checked={radioChecked}
onChange={onRadioChange}
className="w-3.5 h-3.5 accent-teal-500"
/>
)}
<span className="text-xs text-gray-500 dark:text-gray-400 w-36 shrink-0">{label}</span>
<span className="text-xs text-gray-400 w-8 text-right">{min}{unit}</span>
<input
type="range"
min={min * 100}
max={max * 100}
step={step * 100}
value={Math.round(value * 100)}
onChange={(e) => onChange(parseInt(e.target.value) / 100)}
className="flex-1 h-1.5 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
/>
<span className="text-xs text-gray-400 w-8">+{max}{unit}</span>
<span className="font-mono text-xs w-14 text-right tabular-nums">
{value >= 0 ? '+' : ''}{value.toFixed(2)}{unit}
</span>
</div>
)
}
export function DewarpControls({
dewarpResult,
deskewResult,
showGrid,
onToggleGrid,
onManualDewarp,
onCombinedAdjust,
onGroundTruth,
onNext,
isApplying,
}: DewarpControlsProps) {
const [manualShear, setManualShear] = useState(0)
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
const [gtNotes, setGtNotes] = useState('')
const [gtSaved, setGtSaved] = useState(false)
const [showDetails, setShowDetails] = useState(false)
const [showFineTune, setShowFineTune] = useState(false)
// Fine-tuning rotation sliders (3 passes)
const [p1Iterative, setP1Iterative] = useState(0)
const [p2Residual, setP2Residual] = useState(0)
const [p3Textline, setP3Textline] = useState(0)
// Fine-tuning shear sliders (4 methods) + selected method
const [shearValues, setShearValues] = useState<Record<string, number>>({
vertical_edge: 0,
projection: 0,
hough_lines: 0,
text_lines: 0,
})
const [selectedShearMethod, setSelectedShearMethod] = useState<string>('vertical_edge')
// Initialize slider to auto-detected value when result arrives
useEffect(() => {
if (dewarpResult && dewarpResult.shear_degrees !== undefined) {
setManualShear(dewarpResult.shear_degrees)
}
}, [dewarpResult?.shear_degrees])
// Initialize fine-tuning sliders from deskew result
useEffect(() => {
if (deskewResult) {
setP1Iterative(deskewResult.angle_iterative ?? 0)
setP2Residual(deskewResult.angle_residual ?? 0)
setP3Textline(deskewResult.angle_textline ?? 0)
}
}, [deskewResult])
// Initialize shear sliders from dewarp detections
useEffect(() => {
if (dewarpResult?.detections) {
const newValues = { ...shearValues }
let bestMethod = selectedShearMethod
let bestConf = -1
for (const d of dewarpResult.detections) {
if (d.method in newValues) {
newValues[d.method] = d.shear_degrees
if (d.confidence > bestConf) {
bestConf = d.confidence
bestMethod = d.method
}
}
}
setShearValues(newValues)
// Select the method that was actually used, or the highest confidence
if (dewarpResult.method_used && dewarpResult.method_used in newValues) {
setSelectedShearMethod(dewarpResult.method_used)
} else {
setSelectedShearMethod(bestMethod)
}
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [dewarpResult?.detections])
const rotationSum = p1Iterative + p2Residual + p3Textline
const activeShear = shearValues[selectedShearMethod] ?? 0
const handleGroundTruth = (isCorrect: boolean) => {
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
if (isCorrect) {
onGroundTruth({ is_correct: true })
setGtSaved(true)
}
}
const handleGroundTruthIncorrect = () => {
onGroundTruth({
is_correct: false,
corrected_shear: manualShear !== 0 ? manualShear : undefined,
notes: gtNotes || undefined,
})
setGtSaved(true)
}
const handleShearValueChange = (method: string, value: number) => {
setShearValues((prev) => ({ ...prev, [method]: value }))
}
const handleFineTunePreview = () => {
if (onCombinedAdjust) {
onCombinedAdjust(rotationSum, activeShear)
}
}
const wasRejected = dewarpResult && dewarpResult.method_used === 'none' && (dewarpResult.detections || []).length > 0
const wasApplied = dewarpResult && dewarpResult.method_used !== 'none' && dewarpResult.method_used !== 'manual' && dewarpResult.method_used !== 'manual_combined'
const detections = dewarpResult?.detections || []
return (
<div className="space-y-4">
{/* Summary banner */}
{dewarpResult && (
<div className={`rounded-lg border p-4 ${
wasRejected
? 'bg-amber-50 border-amber-200 dark:bg-amber-900/20 dark:border-amber-700'
: wasApplied
? 'bg-green-50 border-green-200 dark:bg-green-900/20 dark:border-green-700'
: 'bg-white border-gray-200 dark:bg-gray-800 dark:border-gray-700'
}`}>
{/* Status line */}
<div className="flex items-center gap-2 mb-3">
<span className={`text-lg ${wasRejected ? '' : wasApplied ? '' : ''}`}>
{wasRejected ? '\u26A0\uFE0F' : wasApplied ? '\u2705' : '\u2796'}
</span>
<span className="text-sm font-medium text-gray-800 dark:text-gray-200">
{wasRejected
? 'Quality Gate: Korrektur verworfen (Projektion nicht verbessert)'
: wasApplied
? `Korrektur angewendet: ${dewarpResult.shear_degrees.toFixed(2)}\u00B0`
: dewarpResult.method_used === 'manual' || dewarpResult.method_used === 'manual_combined'
? `Manuelle Korrektur: ${dewarpResult.shear_degrees.toFixed(2)}\u00B0`
: 'Keine Korrektur noetig'}
</span>
</div>
{/* Key metrics */}
<div className="flex flex-wrap items-center gap-4 text-sm">
<div>
<span className="text-gray-500">Scherung:</span>{' '}
<span className="font-mono font-medium">{dewarpResult.shear_degrees.toFixed(2)}\u00B0</span>
</div>
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
<div>
<span className="text-gray-500">Methode:</span>{' '}
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
{dewarpResult.method_used.includes('+')
? `Ensemble (${dewarpResult.method_used.split('+').map(m => METHOD_LABELS[m] || m).join(' + ')})`
: METHOD_LABELS[dewarpResult.method_used] || dewarpResult.method_used}
</span>
</div>
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
<div className="flex items-center gap-1.5">
<span className="text-gray-500">Konfidenz:</span>
<ConfBar value={dewarpResult.confidence} />
</div>
</div>
{/* Toggles row */}
<div className="flex gap-2 mt-3">
<button
onClick={onToggleGrid}
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
showGrid
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
}`}
>
Raster
</button>
{detections.length > 0 && (
<button
onClick={() => setShowDetails(v => !v)}
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
showDetails
? 'bg-blue-100 border-blue-300 text-blue-700 dark:bg-blue-900/40 dark:border-blue-600 dark:text-blue-300'
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
}`}
>
Details ({detections.length} Methoden)
</button>
)}
</div>
{/* Detailed detections */}
{showDetails && detections.length > 0 && (
<div className="mt-3 pt-3 border-t border-gray-200 dark:border-gray-700">
<div className="text-xs text-gray-500 mb-2">Einzelne Detektoren:</div>
<div className="space-y-1.5">
{detections.map((d: DewarpDetection) => {
const isUsed = dewarpResult.method_used.includes(d.method)
const aboveThreshold = d.confidence >= 0.5
return (
<div
key={d.method}
className={`flex items-center gap-3 text-xs px-2 py-1.5 rounded ${
isUsed
? 'bg-teal-50 dark:bg-teal-900/20'
: 'bg-gray-50 dark:bg-gray-800'
}`}
>
<span className="w-4 text-center">
{isUsed ? '\u2713' : aboveThreshold ? '\u2012' : '\u2717'}
</span>
<span className={`w-40 ${isUsed ? 'font-medium text-gray-800 dark:text-gray-200' : 'text-gray-500'}`}>
{METHOD_LABELS[d.method] || d.method}
</span>
<span className="font-mono w-16 text-right">
{d.shear_degrees.toFixed(2)}\u00B0
</span>
<ConfBar value={d.confidence} />
{!aboveThreshold && (
<span className="text-gray-400 ml-1">(unter Schwelle)</span>
)}
</div>
)
})}
</div>
{wasRejected && (
<div className="mt-2 text-xs text-amber-600 dark:text-amber-400">
Die Korrektur wurde verworfen, weil die horizontale Projektions-Varianz nach Anwendung nicht besser war als vorher.
</div>
)}
</div>
)}
</div>
)}
{/* Manual shear angle slider */}
{dewarpResult && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Scherwinkel (manuell)</div>
<div className="flex items-center gap-3">
<span className="text-xs text-gray-400 w-10 text-right">-2.0\u00B0</span>
<input
type="range"
min={-200}
max={200}
step={5}
value={Math.round(manualShear * 100)}
onChange={(e) => setManualShear(parseInt(e.target.value) / 100)}
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
/>
<span className="text-xs text-gray-400 w-10">+2.0\u00B0</span>
<span className="font-mono text-sm w-16 text-right">{manualShear.toFixed(2)}\u00B0</span>
<button
onClick={() => onManualDewarp(manualShear)}
disabled={isApplying}
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
>
{isApplying ? '...' : 'Anwenden'}
</button>
</div>
<p className="text-xs text-gray-400 mt-1">
Scherung der vertikalen Achse in Grad. Positiv = Spalten nach rechts kippen, negativ = nach links.
</p>
</div>
)}
{/* Fine-tuning panel */}
{dewarpResult && onCombinedAdjust && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
<button
onClick={() => setShowFineTune(v => !v)}
className="w-full flex items-center justify-between p-4 text-left"
>
<div className="flex items-center gap-2">
<span className="text-sm">&#9881;&#65039;</span>
<span className="text-sm font-medium text-gray-700 dark:text-gray-300">Feinabstimmung</span>
<span className="text-xs text-gray-400">(7 Regler)</span>
</div>
<span className="text-gray-400 text-sm">{showFineTune ? '\u25B2' : '\u25BC'}</span>
</button>
{showFineTune && (
<div className="px-4 pb-4 space-y-5">
{/* Rotation section */}
<div>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider mb-2">
Rotation (Begradigung)
</div>
<div className="space-y-2">
<FineTuneSlider
label="P1 Iterative Projection"
value={p1Iterative}
onChange={setP1Iterative}
min={-5}
max={5}
step={0.05}
/>
<FineTuneSlider
label="P2 Word-Alignment"
value={p2Residual}
onChange={setP2Residual}
min={-3}
max={3}
step={0.05}
/>
<FineTuneSlider
label="P3 Textline-Regression"
value={p3Textline}
onChange={setP3Textline}
min={-3}
max={3}
step={0.05}
/>
<div className="flex items-center gap-2 pt-1 border-t border-gray-100 dark:border-gray-700">
<span className="text-xs text-gray-500 dark:text-gray-400 w-36 shrink-0">Summe Rotation</span>
<span className="font-mono text-sm font-medium text-teal-600 dark:text-teal-400">
{rotationSum >= 0 ? '+' : ''}{rotationSum.toFixed(2)}\u00B0
</span>
</div>
</div>
</div>
{/* Shear section */}
<div>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider mb-2">
Scherung (Entzerrung) &mdash; einen Wert waehlen
</div>
<div className="space-y-2">
{SHEAR_METHOD_KEYS.map((method) => (
<FineTuneSlider
key={method}
label={METHOD_LABELS[method] || method}
value={shearValues[method]}
onChange={(v) => handleShearValueChange(method, v)}
min={-5}
max={5}
step={0.05}
radioName="shear-method"
radioChecked={selectedShearMethod === method}
onRadioChange={() => setSelectedShearMethod(method)}
/>
))}
<div className="flex items-center gap-2 pt-1 border-t border-gray-100 dark:border-gray-700">
<span className="text-xs text-gray-500 dark:text-gray-400 w-36 shrink-0">Gewaehlte Scherung</span>
<span className="font-mono text-sm font-medium text-teal-600 dark:text-teal-400">
{activeShear >= 0 ? '+' : ''}{activeShear.toFixed(2)}\u00B0
</span>
<span className="text-xs text-gray-400 ml-1">
({METHOD_LABELS[selectedShearMethod]})
</span>
</div>
</div>
</div>
{/* Preview + Save */}
<div className="flex items-center gap-3 pt-2">
<button
onClick={handleFineTunePreview}
disabled={isApplying}
className="px-4 py-2 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
>
{isApplying ? 'Wird angewendet...' : 'Vorschau'}
</button>
<button
onClick={() => {
onGroundTruth({
is_correct: false,
corrected_shear: activeShear,
notes: `Fine-tuned: rotation=${rotationSum.toFixed(3)}, shear=${activeShear.toFixed(3)} (${selectedShearMethod})`,
})
setGtSaved(true)
}}
disabled={gtSaved}
className="px-4 py-2 text-sm bg-blue-600 text-white rounded-md hover:bg-blue-700 disabled:opacity-50 transition-colors"
>
{gtSaved ? 'Gespeichert' : 'Als Ground Truth speichern'}
</button>
<span className="text-xs text-gray-400">
Rotation: {rotationSum >= 0 ? '+' : ''}{rotationSum.toFixed(2)}\u00B0 + Scherung: {activeShear >= 0 ? '+' : ''}{activeShear.toFixed(2)}\u00B0
</span>
</div>
</div>
)}
</div>
)}
{/* Ground Truth */}
{dewarpResult && !showFineTune && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
Spalten vertikal ausgerichtet?
</div>
<p className="text-xs text-gray-400 mb-2">Pruefen ob die Spaltenraender jetzt senkrecht zum Raster stehen.</p>
{!gtSaved ? (
<div className="space-y-3">
<div className="flex gap-2">
<button
onClick={() => handleGroundTruth(true)}
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
gtFeedback === 'correct'
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
}`}
>
Ja
</button>
<button
onClick={() => handleGroundTruth(false)}
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
gtFeedback === 'incorrect'
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
}`}
>
Nein
</button>
</div>
{gtFeedback === 'incorrect' && (
<div className="space-y-2">
<textarea
value={gtNotes}
onChange={(e) => setGtNotes(e.target.value)}
placeholder="Notizen zur Korrektur..."
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
rows={2}
/>
<button
onClick={handleGroundTruthIncorrect}
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
>
Feedback speichern
</button>
</div>
)}
</div>
) : (
<div className="text-sm text-green-600 dark:text-green-400">
Feedback gespeichert
</div>
)}
</div>
)}
{/* Next button */}
{dewarpResult && (
<div className="flex justify-end">
<button
onClick={onNext}
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
>
Uebernehmen & Weiter &rarr;
</button>
</div>
)}
</div>
)
}

View File

@@ -1,403 +0,0 @@
'use client'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
// Column type → colour mapping
const COL_TYPE_COLORS: Record<string, string> = {
column_en: '#3b82f6', // blue-500
column_de: '#22c55e', // green-500
column_example: '#f97316', // orange-500
column_text: '#a855f7', // purple-500
page_ref: '#06b6d4', // cyan-500
column_marker: '#6b7280', // gray-500
}
interface FabricReconstructionCanvasProps {
sessionId: string
cells: GridCell[]
onCellsChanged: (updates: { cell_id: string; text: string }[]) => void
}
// Fabric.js types (subset used here)
interface FabricCanvas {
add: (...objects: FabricObject[]) => FabricCanvas
remove: (...objects: FabricObject[]) => FabricCanvas
setBackgroundImage: (img: FabricImage, callback: () => void) => void
renderAll: () => void
getObjects: () => FabricObject[]
dispose: () => void
on: (event: string, handler: (e: FabricEvent) => void) => void
setWidth: (w: number) => void
setHeight: (h: number) => void
getActiveObject: () => FabricObject | null
discardActiveObject: () => FabricCanvas
requestRenderAll: () => void
setZoom: (z: number) => void
getZoom: () => number
}
interface FabricObject {
type?: string
left?: number
top?: number
width?: number
height?: number
text?: string
set: (props: Record<string, unknown>) => FabricObject
get: (prop: string) => unknown
data?: Record<string, unknown>
selectable?: boolean
on?: (event: string, handler: () => void) => void
setCoords?: () => void
}
interface FabricImage extends FabricObject {
width?: number
height?: number
scaleX?: number
scaleY?: number
}
interface FabricEvent {
target?: FabricObject
e?: MouseEvent
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
type FabricModule = any
export function FabricReconstructionCanvas({
sessionId,
cells,
onCellsChanged,
}: FabricReconstructionCanvasProps) {
const canvasElRef = useRef<HTMLCanvasElement>(null)
const fabricRef = useRef<FabricCanvas | null>(null)
const fabricModuleRef = useRef<FabricModule>(null)
const [ready, setReady] = useState(false)
const [opacity, setOpacity] = useState(30)
const [zoom, setZoom] = useState(100)
const [selectedCell, setSelectedCell] = useState<string | null>(null)
const [error, setError] = useState('')
// Undo/Redo
const undoStackRef = useRef<{ cellId: string; oldText: string; newText: string }[]>([])
const redoStackRef = useRef<{ cellId: string; oldText: string; newText: string }[]>([])
// ---- Initialise Fabric.js ----
useEffect(() => {
let disposed = false
async function init() {
try {
const fabricModule = await import('fabric')
if (disposed) return
fabricModuleRef.current = fabricModule
const canvasEl = canvasElRef.current
if (!canvasEl) return
// Load background image first to get dimensions
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
const bgImg = await fabricModule.FabricImage.fromURL(imgUrl, { crossOrigin: 'anonymous' }) as FabricImage
if (disposed) return
const imgW = (bgImg.width || 800) * (bgImg.scaleX || 1)
const imgH = (bgImg.height || 600) * (bgImg.scaleY || 1)
bgImg.set({ opacity: opacity / 100, selectable: false, evented: false } as Record<string, unknown>)
const canvas = new fabricModule.Canvas(canvasEl, {
width: imgW,
height: imgH,
selection: true,
preserveObjectStacking: true,
backgroundImage: bgImg,
}) as unknown as FabricCanvas
fabricRef.current = canvas
canvas.renderAll()
// Add cell objects
addCellObjects(canvas, fabricModule, cells, imgW, imgH)
// Listen for text changes
canvas.on('object:modified', (e: FabricEvent) => {
if (e.target?.data?.cellId) {
const cellId = e.target.data.cellId as string
const newText = (e.target.text || '') as string
onCellsChanged([{ cell_id: cellId, text: newText }])
}
})
// Selection tracking
canvas.on('selection:created', (e: FabricEvent) => {
if (e.target?.data?.cellId) setSelectedCell(e.target.data.cellId as string)
})
canvas.on('selection:updated', (e: FabricEvent) => {
if (e.target?.data?.cellId) setSelectedCell(e.target.data.cellId as string)
})
canvas.on('selection:cleared', () => setSelectedCell(null))
setReady(true)
} catch (err) {
if (!disposed) setError(err instanceof Error ? err.message : 'Fabric.js konnte nicht geladen werden')
}
}
init()
return () => {
disposed = true
fabricRef.current?.dispose()
fabricRef.current = null
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId])
function addCellObjects(
canvas: FabricCanvas,
fabricModule: FabricModule,
gridCells: GridCell[],
imgW: number,
imgH: number,
) {
for (const cell of gridCells) {
const color = COL_TYPE_COLORS[cell.col_type] || '#6b7280'
const x = (cell.bbox_pct.x / 100) * imgW
const y = (cell.bbox_pct.y / 100) * imgH
const w = (cell.bbox_pct.w / 100) * imgW
const h = (cell.bbox_pct.h / 100) * imgH
const fontSize = Math.max(8, Math.min(18, h * 0.55))
const textObj = new fabricModule.IText(cell.text || '', {
left: x,
top: y,
width: w,
fontSize,
fontFamily: 'monospace',
fill: '#000000',
backgroundColor: `${color}22`,
padding: 2,
editable: true,
selectable: true,
lockScalingFlip: true,
data: {
cellId: cell.cell_id,
colType: cell.col_type,
rowIndex: cell.row_index,
colIndex: cell.col_index,
originalText: cell.text,
},
})
// Border colour matches column type
textObj.set({
borderColor: color,
cornerColor: color,
cornerSize: 6,
transparentCorners: false,
} as Record<string, unknown>)
canvas.add(textObj)
}
canvas.renderAll()
}
// ---- Opacity slider ----
const handleOpacityChange = useCallback((val: number) => {
setOpacity(val)
const canvas = fabricRef.current
if (!canvas) return
// Fabric v6: backgroundImage is a direct property on the canvas
const bgImg = (canvas as unknown as { backgroundImage?: FabricObject }).backgroundImage
if (bgImg) {
bgImg.set({ opacity: val / 100 })
canvas.renderAll()
}
}, [])
// ---- Zoom ----
const handleZoomChange = useCallback((val: number) => {
setZoom(val)
const canvas = fabricRef.current
if (!canvas) return
;(canvas as unknown as { zoom: number }).zoom = val / 100
canvas.requestRenderAll()
}, [])
// ---- Undo / Redo via keyboard ----
useEffect(() => {
const handler = (e: KeyboardEvent) => {
if (!(e.metaKey || e.ctrlKey) || e.key !== 'z') return
e.preventDefault()
const canvas = fabricRef.current
if (!canvas) return
if (e.shiftKey) {
// Redo
const action = redoStackRef.current.pop()
if (!action) return
undoStackRef.current.push(action)
const obj = canvas.getObjects().find(
(o: FabricObject) => o.data?.cellId === action.cellId
)
if (obj) {
obj.set({ text: action.newText } as Record<string, unknown>)
canvas.renderAll()
onCellsChanged([{ cell_id: action.cellId, text: action.newText }])
}
} else {
// Undo
const action = undoStackRef.current.pop()
if (!action) return
redoStackRef.current.push(action)
const obj = canvas.getObjects().find(
(o: FabricObject) => o.data?.cellId === action.cellId
)
if (obj) {
obj.set({ text: action.oldText } as Record<string, unknown>)
canvas.renderAll()
onCellsChanged([{ cell_id: action.cellId, text: action.oldText }])
}
}
}
document.addEventListener('keydown', handler)
return () => document.removeEventListener('keydown', handler)
}, [onCellsChanged])
// ---- Delete selected cell (via context-menu or Delete key) ----
useEffect(() => {
const handler = (e: KeyboardEvent) => {
if (e.key !== 'Delete' && e.key !== 'Backspace') return
// Only delete if not currently editing text inside an IText
const canvas = fabricRef.current
if (!canvas) return
const active = canvas.getActiveObject()
if (!active) return
// If the IText is in editing mode, let the keypress pass through
if ((active as unknown as Record<string, boolean>).isEditing) return
e.preventDefault()
canvas.remove(active)
canvas.discardActiveObject()
canvas.renderAll()
}
document.addEventListener('keydown', handler)
return () => document.removeEventListener('keydown', handler)
}, [])
// ---- Export helpers ----
const handleExportPdf = useCallback(() => {
window.open(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/export/pdf`,
'_blank'
)
}, [sessionId])
const handleExportDocx = useCallback(() => {
window.open(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/export/docx`,
'_blank'
)
}, [sessionId])
if (error) {
return (
<div className="flex flex-col items-center justify-center py-8 text-red-500 text-sm">
<p>Fabric.js Editor konnte nicht geladen werden:</p>
<p className="text-xs mt-1 text-gray-400">{error}</p>
</div>
)
}
return (
<div className="space-y-2">
{/* Toolbar */}
<div className="flex items-center gap-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2 text-xs">
{/* Opacity slider */}
<label className="flex items-center gap-1.5 text-gray-500">
Hintergrund
<input
type="range"
min={0} max={100}
value={opacity}
onChange={e => handleOpacityChange(Number(e.target.value))}
className="w-20 h-1 accent-teal-500"
/>
<span className="w-8 text-right">{opacity}%</span>
</label>
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600" />
{/* Zoom */}
<label className="flex items-center gap-1.5 text-gray-500">
Zoom
<button onClick={() => handleZoomChange(Math.max(25, zoom - 25))}
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
&minus;
</button>
<span className="w-8 text-center">{zoom}%</span>
<button onClick={() => handleZoomChange(Math.min(200, zoom + 25))}
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
+
</button>
<button onClick={() => handleZoomChange(100)}
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
Fit
</button>
</label>
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600" />
{/* Selected cell info */}
{selectedCell && (
<span className="text-gray-400">
Zelle: <span className="text-gray-600 dark:text-gray-300">{selectedCell}</span>
</span>
)}
<div className="flex-1" />
{/* Export buttons */}
<button onClick={handleExportPdf}
className="px-2.5 py-1 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
PDF
</button>
<button onClick={handleExportDocx}
className="px-2.5 py-1 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
DOCX
</button>
</div>
{/* Canvas */}
<div className="border rounded-lg overflow-auto dark:border-gray-700 bg-gray-100 dark:bg-gray-900"
style={{ maxHeight: '75vh' }}>
{!ready && (
<div className="flex items-center justify-center py-12">
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
<span className="ml-2 text-sm text-gray-500">Canvas wird geladen...</span>
</div>
)}
<canvas ref={canvasElRef} />
</div>
{/* Legend */}
<div className="flex items-center gap-4 text-xs text-gray-500">
{Object.entries(COL_TYPE_COLORS).map(([type, color]) => (
<span key={type} className="flex items-center gap-1">
<span className="w-3 h-3 rounded" style={{ backgroundColor: color + '44', border: `1px solid ${color}` }} />
{type.replace('column_', '').replace('page_', '')}
</span>
))}
<span className="ml-auto text-gray-400">Doppelklick = Text bearbeiten | Delete = Zelle entfernen | Cmd+Z = Undo</span>
</div>
</div>
)
}

View File

@@ -1,143 +0,0 @@
'use client'
import { useState } from 'react'
const A4_WIDTH_MM = 210
const A4_HEIGHT_MM = 297
interface ImageCompareViewProps {
originalUrl: string | null
deskewedUrl: string | null
showGrid: boolean
showGridLeft?: boolean
showBinarized: boolean
binarizedUrl: string | null
leftLabel?: string
rightLabel?: string
}
function MmGridOverlay() {
const lines: React.ReactNode[] = []
// Vertical lines every 10mm
for (let mm = 0; mm <= A4_WIDTH_MM; mm += 10) {
const x = (mm / A4_WIDTH_MM) * 100
const is50 = mm % 50 === 0
lines.push(
<line
key={`v-${mm}`}
x1={x} y1={0} x2={x} y2={100}
stroke={is50 ? 'rgba(59, 130, 246, 0.4)' : 'rgba(59, 130, 246, 0.15)'}
strokeWidth={is50 ? 0.12 : 0.05}
/>
)
// Label every 50mm
if (is50 && mm > 0) {
lines.push(
<text key={`vl-${mm}`} x={x} y={1.2} fill="rgba(59,130,246,0.6)" fontSize="1.2" textAnchor="middle">
{mm}
</text>
)
}
}
// Horizontal lines every 10mm
for (let mm = 0; mm <= A4_HEIGHT_MM; mm += 10) {
const y = (mm / A4_HEIGHT_MM) * 100
const is50 = mm % 50 === 0
lines.push(
<line
key={`h-${mm}`}
x1={0} y1={y} x2={100} y2={y}
stroke={is50 ? 'rgba(59, 130, 246, 0.4)' : 'rgba(59, 130, 246, 0.15)'}
strokeWidth={is50 ? 0.12 : 0.05}
/>
)
if (is50 && mm > 0) {
lines.push(
<text key={`hl-${mm}`} x={0.5} y={y + 0.6} fill="rgba(59,130,246,0.6)" fontSize="1.2">
{mm}
</text>
)
}
}
return (
<svg
viewBox="0 0 100 100"
preserveAspectRatio="none"
className="absolute inset-0 w-full h-full pointer-events-none"
style={{ zIndex: 10 }}
>
<g style={{ pointerEvents: 'none' }}>{lines}</g>
</svg>
)
}
export function ImageCompareView({
originalUrl,
deskewedUrl,
showGrid,
showGridLeft,
showBinarized,
binarizedUrl,
leftLabel,
rightLabel,
}: ImageCompareViewProps) {
const [leftError, setLeftError] = useState(false)
const [rightError, setRightError] = useState(false)
const rightUrl = showBinarized && binarizedUrl ? binarizedUrl : deskewedUrl
return (
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
{/* Left: Original */}
<div className="space-y-2">
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">{leftLabel || 'Original (unbearbeitet)'}</h3>
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
style={{ aspectRatio: '210/297' }}>
{originalUrl && !leftError ? (
<>
<img
src={originalUrl}
alt="Original Scan"
className="w-full h-full object-contain"
onError={() => setLeftError(true)}
/>
{showGridLeft && <MmGridOverlay />}
</>
) : (
<div className="flex items-center justify-center h-full text-gray-400">
{leftError ? 'Fehler beim Laden' : 'Noch kein Bild'}
</div>
)}
</div>
</div>
{/* Right: Deskewed with Grid */}
<div className="space-y-2">
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">
{rightLabel || `${showBinarized ? 'Binarisiert' : 'Begradigt'}${showGrid ? ' + Raster (mm)' : ''}`}
</h3>
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
style={{ aspectRatio: '210/297' }}>
{rightUrl && !rightError ? (
<>
<img
src={rightUrl}
alt={rightLabel || 'Bearbeitetes Bild'}
className="w-full h-full object-contain"
onError={() => setRightError(true)}
/>
{showGrid && <MmGridOverlay />}
</>
) : (
<div className="flex items-center justify-center h-full text-gray-400">
{rightError ? 'Fehler beim Laden' : `${rightLabel || 'Verarbeitung'} laeuft...`}
</div>
)}
</div>
</div>
</div>
)
}

View File

@@ -1,359 +0,0 @@
'use client'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { ColumnTypeKey, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
const COLUMN_TYPES: { value: ColumnTypeKey; label: string }[] = [
{ value: 'column_en', label: 'EN' },
{ value: 'column_de', label: 'DE' },
{ value: 'column_example', label: 'Beispiel' },
{ value: 'column_text', label: 'Text' },
{ value: 'page_ref', label: 'Seite' },
{ value: 'column_marker', label: 'Marker' },
{ value: 'column_ignore', label: 'Ignorieren' },
]
const TYPE_OVERLAY_COLORS: Record<string, string> = {
column_en: 'rgba(59, 130, 246, 0.12)',
column_de: 'rgba(34, 197, 94, 0.12)',
column_example: 'rgba(249, 115, 22, 0.12)',
column_text: 'rgba(6, 182, 212, 0.12)',
page_ref: 'rgba(168, 85, 247, 0.12)',
column_marker: 'rgba(239, 68, 68, 0.12)',
column_ignore: 'rgba(128, 128, 128, 0.06)',
}
const TYPE_BADGE_COLORS: Record<string, string> = {
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
}
// Default column type sequence for newly created columns
const DEFAULT_TYPE_SEQUENCE: ColumnTypeKey[] = [
'page_ref', 'column_en', 'column_de', 'column_example', 'column_text',
]
const MIN_DIVIDER_DISTANCE_PERCENT = 2 // Minimum 2% apart
interface ManualColumnEditorProps {
imageUrl: string
imageWidth: number
imageHeight: number
onApply: (columns: PageRegion[]) => void
onCancel: () => void
applying: boolean
mode?: 'manual' | 'ground-truth'
layout?: 'two-column' | 'stacked'
initialDividers?: number[]
initialColumnTypes?: ColumnTypeKey[]
}
export function ManualColumnEditor({
imageUrl,
imageWidth,
imageHeight,
onApply,
onCancel,
applying,
mode = 'manual',
layout = 'two-column',
initialDividers,
initialColumnTypes,
}: ManualColumnEditorProps) {
const containerRef = useRef<HTMLDivElement>(null)
const [dividers, setDividers] = useState<number[]>(initialDividers ?? [])
const [columnTypes, setColumnTypes] = useState<ColumnTypeKey[]>(initialColumnTypes ?? [])
const [dragging, setDragging] = useState<number | null>(null)
const [imageLoaded, setImageLoaded] = useState(false)
const isGT = mode === 'ground-truth'
// Sync columnTypes length when dividers change
useEffect(() => {
const numColumns = dividers.length + 1
setColumnTypes(prev => {
if (prev.length === numColumns) return prev
const next = [...prev]
while (next.length < numColumns) {
const idx = next.length
next.push(DEFAULT_TYPE_SEQUENCE[idx] || 'column_text')
}
while (next.length > numColumns) {
next.pop()
}
return next
})
}, [dividers.length])
const getXPercent = useCallback((clientX: number): number => {
if (!containerRef.current) return 0
const rect = containerRef.current.getBoundingClientRect()
const pct = ((clientX - rect.left) / rect.width) * 100
return Math.max(0, Math.min(100, pct))
}, [])
const canPlaceDivider = useCallback((xPct: number, excludeIndex?: number): boolean => {
for (let i = 0; i < dividers.length; i++) {
if (i === excludeIndex) continue
if (Math.abs(dividers[i] - xPct) < MIN_DIVIDER_DISTANCE_PERCENT) return false
}
return xPct > MIN_DIVIDER_DISTANCE_PERCENT && xPct < (100 - MIN_DIVIDER_DISTANCE_PERCENT)
}, [dividers])
// Click on image to add a divider
const handleImageClick = useCallback((e: React.MouseEvent) => {
if (dragging !== null) return
// Don't add if clicking on a divider handle
if ((e.target as HTMLElement).dataset.divider) return
const xPct = getXPercent(e.clientX)
if (!canPlaceDivider(xPct)) return
setDividers(prev => [...prev, xPct].sort((a, b) => a - b))
}, [dragging, getXPercent, canPlaceDivider])
// Drag handlers
const handleDividerMouseDown = useCallback((e: React.MouseEvent, index: number) => {
e.stopPropagation()
e.preventDefault()
setDragging(index)
}, [])
useEffect(() => {
if (dragging === null) return
const handleMouseMove = (e: MouseEvent) => {
const xPct = getXPercent(e.clientX)
if (canPlaceDivider(xPct, dragging)) {
setDividers(prev => {
const next = [...prev]
next[dragging] = xPct
return next.sort((a, b) => a - b)
})
}
}
const handleMouseUp = () => {
setDragging(null)
}
window.addEventListener('mousemove', handleMouseMove)
window.addEventListener('mouseup', handleMouseUp)
return () => {
window.removeEventListener('mousemove', handleMouseMove)
window.removeEventListener('mouseup', handleMouseUp)
}
}, [dragging, getXPercent, canPlaceDivider])
const removeDivider = useCallback((index: number) => {
setDividers(prev => prev.filter((_, i) => i !== index))
}, [])
const updateColumnType = useCallback((colIndex: number, type: ColumnTypeKey) => {
setColumnTypes(prev => {
const next = [...prev]
next[colIndex] = type
return next
})
}, [])
const handleApply = useCallback(() => {
// Build PageRegion array from dividers
const sorted = [...dividers].sort((a, b) => a - b)
const columns: PageRegion[] = []
for (let i = 0; i <= sorted.length; i++) {
const leftPct = i === 0 ? 0 : sorted[i - 1]
const rightPct = i === sorted.length ? 100 : sorted[i]
const x = Math.round((leftPct / 100) * imageWidth)
const w = Math.round(((rightPct - leftPct) / 100) * imageWidth)
columns.push({
type: columnTypes[i] || 'column_text',
x,
y: 0,
width: w,
height: imageHeight,
classification_confidence: 1.0,
classification_method: 'manual',
})
}
onApply(columns)
}, [dividers, columnTypes, imageWidth, imageHeight, onApply])
// Compute column regions for overlay
const sorted = [...dividers].sort((a, b) => a - b)
const columnRegions = Array.from({ length: sorted.length + 1 }, (_, i) => ({
leftPct: i === 0 ? 0 : sorted[i - 1],
rightPct: i === sorted.length ? 100 : sorted[i],
type: columnTypes[i] || 'column_text',
}))
return (
<div className="space-y-4">
{/* Layout: image + controls */}
<div className={layout === 'stacked' ? 'space-y-4' : 'grid grid-cols-2 gap-4'}>
{/* Left: Interactive image */}
<div>
<div className="flex items-center justify-between mb-1">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400">
Klicken um Trennlinien zu setzen
</div>
<button
onClick={onCancel}
className="text-xs px-2 py-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200"
>
Abbrechen
</button>
</div>
<div
ref={containerRef}
className="relative border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 cursor-crosshair select-none"
onClick={handleImageClick}
>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={imageUrl}
alt="Entzerrtes Bild"
className="w-full h-auto block"
draggable={false}
onLoad={() => setImageLoaded(true)}
/>
{imageLoaded && (
<>
{/* Column overlays */}
{columnRegions.map((region, i) => (
<div
key={`col-${i}`}
className="absolute top-0 bottom-0 pointer-events-none"
style={{
left: `${region.leftPct}%`,
width: `${region.rightPct - region.leftPct}%`,
backgroundColor: TYPE_OVERLAY_COLORS[region.type] || 'rgba(128,128,128,0.08)',
}}
>
<span className="absolute top-1 left-1/2 -translate-x-1/2 text-[10px] font-medium text-gray-600 dark:text-gray-300 bg-white/80 dark:bg-gray-800/80 px-1 rounded">
{i + 1}
</span>
</div>
))}
{/* Divider lines */}
{sorted.map((xPct, i) => (
<div
key={`div-${i}`}
data-divider="true"
className="absolute top-0 bottom-0 group"
style={{
left: `${xPct}%`,
transform: 'translateX(-50%)',
width: '12px',
cursor: 'col-resize',
zIndex: 10,
}}
onMouseDown={(e) => handleDividerMouseDown(e, i)}
>
{/* Visible line */}
<div
data-divider="true"
className="absolute top-0 bottom-0 left-1/2 -translate-x-1/2 w-0.5 border-l-2 border-dashed border-red-500"
/>
{/* Delete button */}
<button
data-divider="true"
onClick={(e) => {
e.stopPropagation()
removeDivider(i)
}}
className="absolute top-2 left-1/2 -translate-x-1/2 w-4 h-4 bg-red-500 text-white rounded-full text-[10px] leading-none flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity z-20"
title="Linie entfernen"
>
x
</button>
</div>
))}
</>
)}
</div>
</div>
{/* Right: Column type assignment + actions */}
<div className="space-y-4">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Spaltentypen
</div>
{dividers.length === 0 ? (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-6 text-center">
<div className="text-3xl mb-2">👆</div>
<p className="text-sm text-gray-500 dark:text-gray-400">
Klicken Sie auf das Bild links, um vertikale Trennlinien zwischen den Spalten zu setzen.
</p>
<p className="text-xs text-gray-400 dark:text-gray-500 mt-2">
Linien koennen per Drag verschoben und per Hover geloescht werden.
</p>
</div>
) : (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
<div className="text-sm text-gray-600 dark:text-gray-400">
<span className="font-medium text-gray-800 dark:text-gray-200">
{dividers.length} Linien = {dividers.length + 1} Spalten
</span>
</div>
<div className="grid gap-2">
{columnRegions.map((region, i) => (
<div key={i} className="flex items-center gap-3">
<span className={`w-16 text-center px-2 py-0.5 rounded text-xs font-medium ${TYPE_BADGE_COLORS[region.type] || 'bg-gray-100 text-gray-600'}`}>
Spalte {i + 1}
</span>
<select
value={columnTypes[i] || 'column_text'}
onChange={(e) => updateColumnType(i, e.target.value as ColumnTypeKey)}
className="text-sm border border-gray-200 dark:border-gray-600 rounded px-2 py-1 bg-white dark:bg-gray-700 text-gray-800 dark:text-gray-200"
>
{COLUMN_TYPES.map(t => (
<option key={t.value} value={t.value}>{t.label}</option>
))}
</select>
<span className="text-xs text-gray-400 font-mono">
{Math.round(region.rightPct - region.leftPct)}%
</span>
</div>
))}
</div>
</div>
)}
{/* Action buttons */}
<div className="flex flex-col gap-2">
<button
onClick={handleApply}
disabled={dividers.length === 0 || applying}
className="w-full px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed"
>
{applying
? 'Wird gespeichert...'
: isGT
? `${dividers.length + 1} Spalten als Ground Truth speichern`
: `${dividers.length + 1} Spalten uebernehmen`}
</button>
<button
onClick={() => setDividers([])}
disabled={dividers.length === 0}
className="text-xs px-3 py-2 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 disabled:opacity-50"
>
Alle Linien entfernen
</button>
</div>
</div>
</div>
</div>
)
}

View File

@@ -1,115 +0,0 @@
'use client'
import { PipelineStep, DocumentTypeResult } from '@/app/(admin)/ai/ocr-pipeline/types'
const DOC_TYPE_LABELS: Record<string, string> = {
vocab_table: 'Vokabeltabelle',
full_text: 'Volltext',
generic_table: 'Tabelle',
}
interface PipelineStepperProps {
steps: PipelineStep[]
currentStep: number
onStepClick: (index: number) => void
onReprocess?: (index: number) => void
docTypeResult?: DocumentTypeResult | null
onDocTypeChange?: (docType: DocumentTypeResult['doc_type']) => void
}
export function PipelineStepper({
steps,
currentStep,
onStepClick,
onReprocess,
docTypeResult,
onDocTypeChange,
}: PipelineStepperProps) {
return (
<div className="space-y-2">
<div className="flex items-center justify-between px-4 py-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
{steps.map((step, index) => {
const isActive = index === currentStep
const isCompleted = step.status === 'completed'
const isFailed = step.status === 'failed'
const isSkipped = step.status === 'skipped'
const isClickable = (index <= currentStep || isCompleted) && !isSkipped
return (
<div key={step.id} className="flex items-center">
{index > 0 && (
<div
className={`h-0.5 w-8 mx-1 ${
isSkipped
? 'bg-gray-200 dark:bg-gray-700 border-t border-dashed border-gray-400'
: index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
}`}
/>
)}
<div className="relative group">
<button
onClick={() => isClickable && onStepClick(index)}
disabled={!isClickable}
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium transition-all ${
isSkipped
? 'bg-gray-100 text-gray-400 dark:bg-gray-800 dark:text-gray-600 line-through'
: isActive
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
: isCompleted
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
: isFailed
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
: 'text-gray-400 dark:text-gray-500'
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
>
<span className="text-base">
{isSkipped ? '-' : isCompleted ? '\u2713' : isFailed ? '\u2717' : step.icon}
</span>
<span className="hidden sm:inline">{step.name}</span>
<span className="sm:hidden">{index + 1}</span>
</button>
{/* Reprocess button — shown on completed steps on hover */}
{isCompleted && onReprocess && (
<button
onClick={(e) => { e.stopPropagation(); onReprocess(index) }}
className="absolute -top-1 -right-1 w-4 h-4 bg-orange-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
title={`Ab hier neu verarbeiten`}
>
&#x21BB;
</button>
)}
</div>
</div>
)
})}
</div>
{/* Document type badge */}
{docTypeResult && (
<div className="flex items-center gap-2 px-4 py-2 bg-blue-50 dark:bg-blue-900/20 rounded-lg border border-blue-200 dark:border-blue-800 text-sm">
<span className="text-blue-600 dark:text-blue-400 font-medium">
Dokumenttyp:
</span>
{onDocTypeChange ? (
<select
value={docTypeResult.doc_type}
onChange={(e) => onDocTypeChange(e.target.value as DocumentTypeResult['doc_type'])}
className="bg-white dark:bg-gray-800 border border-blue-300 dark:border-blue-700 rounded px-2 py-0.5 text-sm text-blue-700 dark:text-blue-300"
>
<option value="vocab_table">Vokabeltabelle</option>
<option value="generic_table">Tabelle (generisch)</option>
<option value="full_text">Volltext</option>
</select>
) : (
<span className="text-blue-700 dark:text-blue-300">
{DOC_TYPE_LABELS[docTypeResult.doc_type] || docTypeResult.doc_type}
</span>
)}
<span className="text-blue-400 dark:text-blue-500 text-xs">
({Math.round(docTypeResult.confidence * 100)}% Konfidenz)
</span>
</div>
)}
</div>
)
}

View File

@@ -1,432 +0,0 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { ColumnResult, ColumnGroundTruth, PageRegion, SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
import { ColumnControls } from './ColumnControls'
import { ManualColumnEditor } from './ManualColumnEditor'
import type { ColumnTypeKey } from '@/app/(admin)/ai/ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
type ViewMode = 'normal' | 'ground-truth' | 'manual'
interface StepColumnDetectionProps {
sessionId: string | null
onNext: () => void
onBoxSessionsCreated?: (subSessions: SubSession[]) => void
}
/** Convert PageRegion[] to divider percentages + column types for ManualColumnEditor */
function columnsToEditorState(
columns: PageRegion[],
imageWidth: number
): { dividers: number[]; columnTypes: ColumnTypeKey[] } {
if (!columns.length || !imageWidth) return { dividers: [], columnTypes: [] }
const sorted = [...columns].sort((a, b) => a.x - b.x)
const dividers: number[] = []
const columnTypes: ColumnTypeKey[] = sorted.map(c => c.type)
for (let i = 1; i < sorted.length; i++) {
const xPct = (sorted[i].x / imageWidth) * 100
dividers.push(xPct)
}
return { dividers, columnTypes }
}
export function StepColumnDetection({ sessionId, onNext, onBoxSessionsCreated }: StepColumnDetectionProps) {
const [columnResult, setColumnResult] = useState<ColumnResult | null>(null)
const [detecting, setDetecting] = useState(false)
const [error, setError] = useState<string | null>(null)
const [viewMode, setViewMode] = useState<ViewMode>('normal')
const [applying, setApplying] = useState(false)
const [imageDimensions, setImageDimensions] = useState<{ width: number; height: number } | null>(null)
const [savedGtColumns, setSavedGtColumns] = useState<PageRegion[] | null>(null)
const [creatingBoxSessions, setCreatingBoxSessions] = useState(false)
const [existingSubSessions, setExistingSubSessions] = useState<SubSession[] | null>(null)
const [isSubSession, setIsSubSession] = useState(false)
// Fetch session info (image dimensions) + check for cached column result
useEffect(() => {
if (!sessionId || imageDimensions) return
const fetchSessionInfo = async () => {
try {
const infoRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (infoRes.ok) {
const info = await infoRes.json()
if (info.image_width && info.image_height) {
setImageDimensions({ width: info.image_width, height: info.image_height })
}
const isSub = !!info.parent_session_id
setIsSubSession(isSub)
if (info.sub_sessions && info.sub_sessions.length > 0) {
setExistingSubSessions(info.sub_sessions)
onBoxSessionsCreated?.(info.sub_sessions)
}
if (info.column_result) {
setColumnResult(info.column_result)
// Sub-session with pseudo-column already set → auto-advance
if (isSub) {
onNext()
return
}
return
}
// Sub-session without columns → auto-detect (creates pseudo-column)
if (isSub) {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns`, { method: 'POST' })
if (res.ok) {
const data: ColumnResult = await res.json()
setColumnResult(data)
onNext()
return
}
}
}
} catch (e) {
console.error('Failed to fetch session info:', e)
}
// No cached result - run auto-detection
runAutoDetection()
}
fetchSessionInfo()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId])
// Load saved GT if exists
useEffect(() => {
if (!sessionId) return
const fetchGt = async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`)
if (res.ok) {
const data = await res.json()
const corrected = data.columns_gt?.corrected_columns
if (corrected) setSavedGtColumns(corrected)
}
} catch {
// No saved GT - that's fine
}
}
fetchGt()
}, [sessionId])
const runAutoDetection = useCallback(async () => {
if (!sessionId) return
setDetecting(true)
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns`, {
method: 'POST',
})
if (!res.ok) {
const err = await res.json().catch(() => ({ detail: res.statusText }))
throw new Error(err.detail || 'Spaltenerkennung fehlgeschlagen')
}
const data: ColumnResult = await res.json()
setColumnResult(data)
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
} finally {
setDetecting(false)
}
}, [sessionId])
const handleRerun = useCallback(() => {
runAutoDetection()
}, [runAutoDetection])
const handleGroundTruth = useCallback(async (gt: ColumnGroundTruth) => {
if (!sessionId) return
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(gt),
})
} catch (e) {
console.error('Ground truth save failed:', e)
}
}, [sessionId])
const handleManualApply = useCallback(async (columns: PageRegion[]) => {
if (!sessionId) return
setApplying(true)
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns/manual`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ columns }),
})
if (!res.ok) {
const err = await res.json().catch(() => ({ detail: res.statusText }))
throw new Error(err.detail || 'Manuelle Spalten konnten nicht gespeichert werden')
}
const data = await res.json()
setColumnResult({
columns: data.columns,
duration_seconds: data.duration_seconds ?? 0,
})
setViewMode('normal')
} catch (e) {
setError(e instanceof Error ? e.message : 'Fehler beim Speichern')
} finally {
setApplying(false)
}
}, [sessionId])
const handleGtApply = useCallback(async (columns: PageRegion[]) => {
if (!sessionId) return
setApplying(true)
setError(null)
try {
const gt: ColumnGroundTruth = {
is_correct: false,
corrected_columns: columns,
}
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(gt),
})
setSavedGtColumns(columns)
setViewMode('normal')
} catch (e) {
setError(e instanceof Error ? e.message : 'Fehler beim Speichern')
} finally {
setApplying(false)
}
}, [sessionId])
// Count box zones from column result
const boxZones = columnResult?.zones?.filter(z => z.zone_type === 'box') || []
const boxCount = boxZones.length
const createBoxSessions = useCallback(async () => {
if (!sessionId) return
setCreatingBoxSessions(true)
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/create-box-sessions`, {
method: 'POST',
})
if (!res.ok) {
const err = await res.json().catch(() => ({ detail: res.statusText }))
throw new Error(err.detail || 'Box-Sessions konnten nicht erstellt werden')
}
const data = await res.json()
const subs: SubSession[] = data.sub_sessions.map((s: { id: string; name?: string; box_index: number }) => ({
id: s.id,
name: s.name || `Box ${s.box_index + 1}`,
box_index: s.box_index,
current_step: 1,
status: 'pending',
}))
setExistingSubSessions(subs)
onBoxSessionsCreated?.(subs)
} catch (e) {
setError(e instanceof Error ? e.message : 'Fehler beim Erstellen der Box-Sessions')
} finally {
setCreatingBoxSessions(false)
}
}, [sessionId, onBoxSessionsCreated])
if (!sessionId) {
return (
<div className="flex flex-col items-center justify-center py-16 text-center">
<div className="text-5xl mb-4">📊</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
Schritt 3: Spaltenerkennung
</h3>
<p className="text-gray-500 dark:text-gray-400 max-w-md">
Bitte zuerst Schritt 1 und 2 abschliessen.
</p>
</div>
)
}
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/columns-overlay`
// Pre-compute editor state from saved GT or auto columns for GT mode
const gtInitial = savedGtColumns
? columnsToEditorState(savedGtColumns, imageDimensions?.width ?? 1000)
: undefined
return (
<div className="space-y-4">
{/* Loading indicator */}
{detecting && (
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
Spaltenerkennung laeuft...
</div>
)}
{viewMode === 'manual' ? (
/* Manual column editor - overwrites column_result */
<ManualColumnEditor
imageUrl={dewarpedUrl}
imageWidth={imageDimensions?.width ?? 1000}
imageHeight={imageDimensions?.height ?? 1400}
onApply={handleManualApply}
onCancel={() => setViewMode('normal')}
applying={applying}
mode="manual"
/>
) : viewMode === 'ground-truth' ? (
/* GT mode: auto result (left, readonly) + GT editor (right) */
<div className="grid grid-cols-2 gap-4">
{/* Left: Auto result (readonly overlay) */}
<div>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Auto-Ergebnis (readonly)
</div>
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
{columnResult ? (
// eslint-disable-next-line @next/next/no-img-element
<img
src={`${overlayUrl}?t=${Date.now()}`}
alt="Auto Spalten-Overlay"
className="w-full h-auto"
/>
) : (
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
Keine Auto-Daten
</div>
)}
</div>
{/* Auto column list */}
{columnResult && (
<div className="mt-2 space-y-1">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400">
Auto: {columnResult.columns.length} Spalten
</div>
{columnResult.columns
.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
.map((col, i) => (
<div key={i} className="text-xs text-gray-500 dark:text-gray-400 font-mono">
{i + 1}. {col.type} x={col.x} w={col.width}
</div>
))}
</div>
)}
</div>
{/* Right: GT editor */}
<div>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Ground Truth Editor
</div>
<ManualColumnEditor
imageUrl={dewarpedUrl}
imageWidth={imageDimensions?.width ?? 1000}
imageHeight={imageDimensions?.height ?? 1400}
onApply={handleGtApply}
onCancel={() => setViewMode('normal')}
applying={applying}
mode="ground-truth"
layout="stacked"
initialDividers={gtInitial?.dividers}
initialColumnTypes={gtInitial?.columnTypes}
/>
</div>
</div>
) : (
/* Normal mode: overlay (left) vs clean (right) */
<div className="grid grid-cols-2 gap-4">
<div>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Mit Spalten-Overlay
</div>
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
{columnResult ? (
// eslint-disable-next-line @next/next/no-img-element
<img
src={`${overlayUrl}?t=${Date.now()}`}
alt="Spalten-Overlay"
className="w-full h-auto"
/>
) : (
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
{detecting ? 'Erkenne Spalten...' : 'Keine Daten'}
</div>
)}
</div>
</div>
<div>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Entzerrtes Bild
</div>
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={dewarpedUrl}
alt="Entzerrt"
className="w-full h-auto"
/>
</div>
</div>
</div>
)}
{/* Box zone info */}
{viewMode === 'normal' && boxCount > 0 && (
<div className="bg-amber-50 dark:bg-amber-900/20 border border-amber-200 dark:border-amber-700 rounded-xl p-4 flex items-center justify-between">
<div className="flex items-center gap-3">
<span className="text-2xl">📦</span>
<div>
<div className="text-sm font-medium text-amber-800 dark:text-amber-300">
{boxCount} Box{boxCount > 1 ? 'en' : ''} erkannt
</div>
<div className="text-xs text-amber-600 dark:text-amber-400">
Box-Bereiche werden separat verarbeitet
</div>
</div>
</div>
{existingSubSessions && existingSubSessions.length > 0 ? (
<div className="text-xs text-amber-700 dark:text-amber-300 font-medium">
{existingSubSessions.length} Box-Session{existingSubSessions.length > 1 ? 's' : ''} vorhanden
</div>
) : (
<button
onClick={createBoxSessions}
disabled={creatingBoxSessions}
className="px-4 py-2 bg-amber-600 text-white rounded-lg hover:bg-amber-700 transition-colors text-sm font-medium disabled:opacity-50 flex items-center gap-2"
>
{creatingBoxSessions && (
<div className="animate-spin w-3.5 h-3.5 border-2 border-white border-t-transparent rounded-full" />
)}
Box-Sessions erstellen
</button>
)}
</div>
)}
{/* Controls */}
{viewMode === 'normal' && (
<ColumnControls
columnResult={columnResult}
onRerun={handleRerun}
onManualMode={() => setViewMode('manual')}
onGtMode={() => setViewMode('ground-truth')}
onGroundTruth={handleGroundTruth}
onNext={onNext}
isDetecting={detecting}
savedGtColumns={savedGtColumns}
/>
)}
{error && (
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
{error}
</div>
)}
</div>
)
}

View File

@@ -1,19 +0,0 @@
'use client'
export function StepCoordinates() {
return (
<div className="flex flex-col items-center justify-center py-16 text-center">
<div className="text-5xl mb-4">📍</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
Schritt 5: Koordinatenzuweisung
</h3>
<p className="text-gray-500 dark:text-gray-400 max-w-md">
Exakte Positionszuweisung fuer jedes Wort auf der Seite.
Dieser Schritt wird in einer zukuenftigen Version implementiert.
</p>
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
Kommt bald
</div>
</div>
)
}

View File

@@ -1,200 +0,0 @@
'use client'
import { useEffect, useState } from 'react'
import type { CropResult } from '@/app/(admin)/ai/ocr-pipeline/types'
import { ImageCompareView } from './ImageCompareView'
const KLAUSUR_API = '/klausur-api'
interface StepCropProps {
sessionId: string | null
onNext: () => void
}
export function StepCrop({ sessionId, onNext }: StepCropProps) {
const [cropResult, setCropResult] = useState<CropResult | null>(null)
const [cropping, setCropping] = useState(false)
const [error, setError] = useState<string | null>(null)
const [hasRun, setHasRun] = useState(false)
// Auto-trigger crop on mount
useEffect(() => {
if (!sessionId || hasRun) return
setHasRun(true)
const runCrop = async () => {
setCropping(true)
setError(null)
try {
// Check if session already has crop result
const sessionRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (sessionRes.ok) {
const sessionData = await sessionRes.json()
if (sessionData.crop_result) {
setCropResult(sessionData.crop_result)
setCropping(false)
return
}
}
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/crop`, {
method: 'POST',
})
if (!res.ok) {
throw new Error('Zuschnitt fehlgeschlagen')
}
const data = await res.json()
setCropResult(data)
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
} finally {
setCropping(false)
}
}
runCrop()
}, [sessionId, hasRun])
const handleSkip = async () => {
if (!sessionId) return
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/crop/skip`, {
method: 'POST',
})
if (res.ok) {
const data = await res.json()
setCropResult(data)
}
} catch (e) {
console.error('Skip crop failed:', e)
}
onNext()
}
if (!sessionId) {
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
}
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
const croppedUrl = cropResult
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
: null
return (
<div className="space-y-4">
{/* Loading indicator */}
{cropping && (
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
Scannerraender werden erkannt...
</div>
)}
{/* Image comparison */}
<ImageCompareView
originalUrl={dewarpedUrl}
deskewedUrl={croppedUrl}
showGrid={false}
showBinarized={false}
binarizedUrl={null}
leftLabel="Entzerrt"
rightLabel="Zugeschnitten"
/>
{/* Crop result info */}
{cropResult && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="flex flex-wrap items-center gap-3 text-sm">
{(cropResult as Record<string, unknown>).multi_page ? (
<>
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-400 text-xs font-medium">
Mehrseitig: {(cropResult as Record<string, unknown>).page_count as number} Seiten erkannt
</span>
{((cropResult as Record<string, unknown>).sub_sessions as Array<{id: string; name: string; page_index: number}> | undefined)?.map((sub) => (
<span key={sub.id} className="text-gray-400 text-xs">
Seite {sub.page_index + 1}
</span>
))}
</>
) : cropResult.crop_applied ? (
<>
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
Zugeschnitten
</span>
{cropResult.detected_format && (
<>
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
<span className="text-gray-600 dark:text-gray-400">
Format: <span className="font-medium">{cropResult.detected_format}</span>
{cropResult.format_confidence != null && (
<span className="text-gray-400 ml-1">
({Math.round(cropResult.format_confidence * 100)}%)
</span>
)}
</span>
</>
)}
{cropResult.original_size && cropResult.cropped_size && (
<>
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
<span className="text-gray-400 text-xs">
{cropResult.original_size.width}x{cropResult.original_size.height} {cropResult.cropped_size.width}x{cropResult.cropped_size.height}
</span>
</>
)}
{cropResult.border_fractions && (
<>
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
<span className="text-gray-400 text-xs">
Raender: O={pct(cropResult.border_fractions.top)} U={pct(cropResult.border_fractions.bottom)} L={pct(cropResult.border_fractions.left)} R={pct(cropResult.border_fractions.right)}
</span>
</>
)}
</>
) : (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-green-50 dark:bg-green-900/20 text-green-700 dark:text-green-400 text-xs font-medium">
Kein Zuschnitt noetig
</span>
)}
{cropResult.duration_seconds != null && (
<span className="text-gray-400 text-xs ml-auto">
{cropResult.duration_seconds}s
</span>
)}
</div>
</div>
)}
{/* Action buttons */}
{cropResult && (
<div className="flex justify-between">
<button
onClick={handleSkip}
className="px-4 py-2 text-sm text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 transition-colors"
>
Ueberspringen
</button>
<button
onClick={onNext}
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
>
Weiter &rarr;
</button>
</div>
)}
{error && (
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
{error}
</div>
)}
</div>
)
}
function pct(v: number): string {
return `${(v * 100).toFixed(1)}%`
}

View File

@@ -1,183 +0,0 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { DeskewGroundTruth, DeskewResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
import { DeskewControls } from './DeskewControls'
import { ImageCompareView } from './ImageCompareView'
const KLAUSUR_API = '/klausur-api'
interface StepDeskewProps {
sessionId: string | null
onNext: () => void
}
export function StepDeskew({ sessionId, onNext }: StepDeskewProps) {
const [session, setSession] = useState<SessionInfo | null>(null)
const [deskewResult, setDeskewResult] = useState<DeskewResult | null>(null)
const [deskewing, setDeskewing] = useState(false)
const [applying, setApplying] = useState(false)
const [showBinarized, setShowBinarized] = useState(false)
const [showGrid, setShowGrid] = useState(true)
const [error, setError] = useState<string | null>(null)
const [hasAutoRun, setHasAutoRun] = useState(false)
// Load session and auto-trigger deskew
useEffect(() => {
if (!sessionId || session) return
const loadAndDeskew = async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (!res.ok) return
const data = await res.json()
const sessionInfo: SessionInfo = {
session_id: data.session_id,
filename: data.filename,
image_width: data.image_width,
image_height: data.image_height,
// Use oriented image as "before" view (deskew runs right after orientation)
original_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/oriented`,
}
setSession(sessionInfo)
// If deskew result already exists, use it
if (data.deskew_result) {
const dr: DeskewResult = {
...data.deskew_result,
deskewed_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/deskewed`,
binarized_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/binarized`,
}
setDeskewResult(dr)
return
}
// Auto-trigger deskew if not already done
if (!hasAutoRun) {
setHasAutoRun(true)
setDeskewing(true)
const deskewRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/deskew`, {
method: 'POST',
})
if (!deskewRes.ok) {
throw new Error('Begradigung fehlgeschlagen')
}
const deskewData: DeskewResult = await deskewRes.json()
deskewData.deskewed_image_url = `${KLAUSUR_API}${deskewData.deskewed_image_url}`
deskewData.binarized_image_url = `${KLAUSUR_API}${deskewData.binarized_image_url}`
setDeskewResult(deskewData)
}
} catch (e) {
setError(e instanceof Error ? e.message : 'Fehler beim Laden')
} finally {
setDeskewing(false)
}
}
loadAndDeskew()
}, [sessionId, session, hasAutoRun])
const handleManualDeskew = useCallback(async (angle: number) => {
if (!sessionId) return
setApplying(true)
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/deskew/manual`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ angle }),
})
if (!res.ok) throw new Error('Manuelle Begradigung fehlgeschlagen')
const data = await res.json()
setDeskewResult((prev) =>
prev
? {
...prev,
angle_applied: data.angle_applied,
method_used: data.method_used,
deskewed_image_url: `${KLAUSUR_API}${data.deskewed_image_url}?t=${Date.now()}`,
}
: null,
)
} catch (e) {
setError(e instanceof Error ? e.message : 'Fehler')
} finally {
setApplying(false)
}
}, [sessionId])
const handleGroundTruth = useCallback(async (gt: DeskewGroundTruth) => {
if (!sessionId) return
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/deskew`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(gt),
})
} catch (e) {
console.error('Ground truth save failed:', e)
}
}, [sessionId])
if (!sessionId) {
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
}
return (
<div className="space-y-4">
{/* Filename */}
{session && (
<div className="text-sm text-gray-500 dark:text-gray-400">
Datei: <span className="font-medium text-gray-700 dark:text-gray-300">{session.filename}</span>
{' '}({session.image_width} x {session.image_height} px)
</div>
)}
{/* Loading indicator */}
{deskewing && (
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
Begradigung laeuft (beide Methoden)...
</div>
)}
{/* Image comparison */}
{session && (
<ImageCompareView
originalUrl={session.original_image_url}
deskewedUrl={deskewResult?.deskewed_image_url ?? null}
showGrid={showGrid}
showBinarized={showBinarized}
binarizedUrl={deskewResult?.binarized_image_url ?? null}
leftLabel="Orientiert"
rightLabel="Begradigt"
/>
)}
{/* Controls */}
<DeskewControls
deskewResult={deskewResult}
showBinarized={showBinarized}
onToggleBinarized={() => setShowBinarized((v) => !v)}
showGrid={showGrid}
onToggleGrid={() => setShowGrid((v) => !v)}
onManualDeskew={handleManualDeskew}
onGroundTruth={handleGroundTruth}
onNext={onNext}
isApplying={applying}
/>
{error && (
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
{error}
</div>
)}
</div>
)
}

View File

@@ -1,204 +0,0 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { DeskewResult, DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
import { DewarpControls } from './DewarpControls'
import { ImageCompareView } from './ImageCompareView'
const KLAUSUR_API = '/klausur-api'
interface StepDewarpProps {
sessionId: string | null
onNext: () => void
}
export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
const [dewarpResult, setDewarpResult] = useState<DewarpResult | null>(null)
const [deskewResult, setDeskewResult] = useState<DeskewResult | null>(null)
const [dewarping, setDewarping] = useState(false)
const [applying, setApplying] = useState(false)
const [showGrid, setShowGrid] = useState(true)
const [error, setError] = useState<string | null>(null)
// Load session info to get deskew_result (for fine-tuning init values)
useEffect(() => {
if (!sessionId) return
const loadSession = async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (res.ok) {
const data = await res.json()
if (data.deskew_result) {
setDeskewResult(data.deskew_result)
}
}
} catch (e) {
console.error('Failed to load session info:', e)
}
}
loadSession()
}, [sessionId])
// Auto-trigger dewarp when component mounts with a sessionId
useEffect(() => {
if (!sessionId || dewarpResult) return
const runDewarp = async () => {
setDewarping(true)
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp`, {
method: 'POST',
})
if (!res.ok) {
const err = await res.json().catch(() => ({ detail: res.statusText }))
throw new Error(err.detail || 'Entzerrung fehlgeschlagen')
}
const data: DewarpResult = await res.json()
data.dewarped_image_url = `${KLAUSUR_API}${data.dewarped_image_url}`
setDewarpResult(data)
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
} finally {
setDewarping(false)
}
}
runDewarp()
}, [sessionId, dewarpResult])
const handleManualDewarp = useCallback(async (shearDegrees: number) => {
if (!sessionId) return
setApplying(true)
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ shear_degrees: shearDegrees }),
})
if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen')
const data = await res.json()
setDewarpResult((prev) =>
prev
? {
...prev,
method_used: data.method_used,
shear_degrees: data.shear_degrees,
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
}
: null,
)
} catch (e) {
setError(e instanceof Error ? e.message : 'Fehler')
} finally {
setApplying(false)
}
}, [sessionId])
const handleCombinedAdjust = useCallback(async (rotationDegrees: number, shearDegrees: number) => {
if (!sessionId) return
setApplying(true)
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/adjust-combined`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ rotation_degrees: rotationDegrees, shear_degrees: shearDegrees }),
})
if (!res.ok) throw new Error('Kombinierte Anpassung fehlgeschlagen')
const data = await res.json()
setDewarpResult((prev) =>
prev
? {
...prev,
method_used: data.method_used,
shear_degrees: data.shear_degrees,
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
}
: null,
)
} catch (e) {
setError(e instanceof Error ? e.message : 'Fehler')
} finally {
setApplying(false)
}
}, [sessionId])
const handleGroundTruth = useCallback(async (gt: DewarpGroundTruth) => {
if (!sessionId) return
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/dewarp`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(gt),
})
} catch (e) {
console.error('Ground truth save failed:', e)
}
}, [sessionId])
if (!sessionId) {
return (
<div className="flex flex-col items-center justify-center py-16 text-center">
<div className="text-5xl mb-4">🔧</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
Schritt 2: Entzerrung (Dewarp)
</h3>
<p className="text-gray-500 dark:text-gray-400 max-w-md">
Bitte zuerst Schritt 1 (Begradigung) abschliessen.
</p>
</div>
)
}
const deskewedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/deskewed`
const dewarpedUrl = dewarpResult?.dewarped_image_url ?? null
return (
<div className="space-y-4">
{/* Loading indicator */}
{dewarping && (
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
Entzerrung laeuft (beide Methoden)...
</div>
)}
{/* Image comparison: deskewed (left) vs dewarped (right) */}
<ImageCompareView
originalUrl={deskewedUrl}
deskewedUrl={dewarpedUrl}
showGrid={showGrid}
showGridLeft={showGrid}
showBinarized={false}
binarizedUrl={null}
leftLabel={`Begradigt (nach Deskew)${showGrid ? ' + Raster' : ''}`}
rightLabel={`Entzerrt${showGrid ? ' + Raster (mm)' : ''}`}
/>
{/* Controls */}
<DewarpControls
dewarpResult={dewarpResult}
deskewResult={deskewResult}
showGrid={showGrid}
onToggleGrid={() => setShowGrid((v) => !v)}
onManualDewarp={handleManualDewarp}
onCombinedAdjust={handleCombinedAdjust}
onGroundTruth={handleGroundTruth}
onNext={onNext}
isApplying={applying}
/>
{error && (
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
{error}
</div>
)}
</div>
)
}

View File

@@ -1,596 +0,0 @@
'use client'
import { useCallback, useEffect, useRef, useState } from 'react'
import type {
GridCell, ColumnMeta, ImageRegion, ImageStyle,
} from '@/app/(admin)/ai/ocr-pipeline/types'
import { IMAGE_STYLES as STYLES } from '@/app/(admin)/ai/ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
const COL_TYPE_COLORS: Record<string, string> = {
column_en: '#3b82f6',
column_de: '#22c55e',
column_example: '#f97316',
column_text: '#a855f7',
page_ref: '#06b6d4',
column_marker: '#6b7280',
}
interface StepGroundTruthProps {
sessionId: string | null
onNext: () => void
}
interface SessionData {
cells: GridCell[]
columnsUsed: ColumnMeta[]
imageWidth: number
imageHeight: number
originalImageUrl: string
}
export function StepGroundTruth({ sessionId, onNext }: StepGroundTruthProps) {
const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading')
const [error, setError] = useState('')
const [session, setSession] = useState<SessionData | null>(null)
const [imageRegions, setImageRegions] = useState<(ImageRegion & { generating?: boolean })[]>([])
const [detecting, setDetecting] = useState(false)
const [zoom, setZoom] = useState(100)
const [syncScroll, setSyncScroll] = useState(true)
const [notes, setNotes] = useState('')
const [score, setScore] = useState<number | null>(null)
const [drawingRegion, setDrawingRegion] = useState(false)
const [dragStart, setDragStart] = useState<{ x: number; y: number } | null>(null)
const [dragEnd, setDragEnd] = useState<{ x: number; y: number } | null>(null)
const leftPanelRef = useRef<HTMLDivElement>(null)
const rightPanelRef = useRef<HTMLDivElement>(null)
const reconRef = useRef<HTMLDivElement>(null)
const [reconWidth, setReconWidth] = useState(0)
// Track reconstruction container width for font size calculation
useEffect(() => {
const el = reconRef.current
if (!el) return
const obs = new ResizeObserver(entries => {
for (const entry of entries) setReconWidth(entry.contentRect.width)
})
obs.observe(el)
return () => obs.disconnect()
}, [session])
// Load session data
useEffect(() => {
if (!sessionId) return
loadSessionData()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId])
const loadSessionData = async () => {
if (!sessionId) return
setStatus('loading')
try {
const resp = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (!resp.ok) throw new Error(`Failed to load session: ${resp.status}`)
const data = await resp.json()
const wordResult = data.word_result || {}
setSession({
cells: wordResult.cells || [],
columnsUsed: wordResult.columns_used || [],
imageWidth: wordResult.image_width || data.image_width || 800,
imageHeight: wordResult.image_height || data.image_height || 600,
originalImageUrl: data.original_image_url
? `${KLAUSUR_API}${data.original_image_url}`
: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/original`,
})
// Load existing validation data
const valResp = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/validation`)
if (valResp.ok) {
const valData = await valResp.json()
const validation = valData.validation
if (validation) {
setImageRegions(validation.image_regions || [])
setNotes(validation.notes || '')
setScore(validation.score ?? null)
}
}
setStatus('ready')
} catch (e) {
setError(e instanceof Error ? e.message : String(e))
setStatus('error')
}
}
// Sync scroll between panels
const handleScroll = useCallback((source: 'left' | 'right') => {
if (!syncScroll) return
const from = source === 'left' ? leftPanelRef.current : rightPanelRef.current
const to = source === 'left' ? rightPanelRef.current : leftPanelRef.current
if (from && to) {
to.scrollTop = from.scrollTop
to.scrollLeft = from.scrollLeft
}
}, [syncScroll])
// Detect images via VLM
const handleDetectImages = async () => {
if (!sessionId) return
setDetecting(true)
try {
const resp = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/detect-images`,
{ method: 'POST' }
)
if (!resp.ok) throw new Error(`Detection failed: ${resp.status}`)
const data = await resp.json()
setImageRegions(data.regions || [])
} catch (e) {
setError(e instanceof Error ? e.message : String(e))
} finally {
setDetecting(false)
}
}
// Generate image for a region
const handleGenerateImage = async (index: number) => {
if (!sessionId) return
const region = imageRegions[index]
if (!region) return
setImageRegions(prev => prev.map((r, i) => i === index ? { ...r, generating: true } : r))
try {
const resp = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/generate-image`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
region_index: index,
prompt: region.prompt,
style: region.style,
}),
}
)
if (!resp.ok) throw new Error(`Generation failed: ${resp.status}`)
const data = await resp.json()
setImageRegions(prev => prev.map((r, i) =>
i === index ? { ...r, image_b64: data.image_b64, generating: false } : r
))
} catch (e) {
setImageRegions(prev => prev.map((r, i) => i === index ? { ...r, generating: false } : r))
setError(e instanceof Error ? e.message : String(e))
}
}
// Save validation
const handleSave = async () => {
if (!sessionId) {
setError('Keine Session-ID vorhanden')
return
}
setStatus('saving')
setError('')
try {
const resp = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/validate`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ notes, score: score ?? 0 }),
}
)
if (!resp.ok) {
const body = await resp.text().catch(() => '')
throw new Error(`Speichern fehlgeschlagen (${resp.status}): ${body}`)
}
setStatus('saved')
} catch (e) {
setError(e instanceof Error ? e.message : String(e))
setStatus('ready')
}
}
// Handle manual region drawing on reconstruction
const handleReconMouseDown = (e: React.MouseEvent<HTMLDivElement>) => {
if (!drawingRegion) return
const rect = e.currentTarget.getBoundingClientRect()
const x = ((e.clientX - rect.left) / rect.width) * 100
const y = ((e.clientY - rect.top) / rect.height) * 100
setDragStart({ x, y })
setDragEnd({ x, y })
}
const handleReconMouseMove = (e: React.MouseEvent<HTMLDivElement>) => {
if (!dragStart) return
const rect = e.currentTarget.getBoundingClientRect()
const x = ((e.clientX - rect.left) / rect.width) * 100
const y = ((e.clientY - rect.top) / rect.height) * 100
setDragEnd({ x, y })
}
const handleReconMouseUp = () => {
if (!dragStart || !dragEnd) return
const x = Math.min(dragStart.x, dragEnd.x)
const y = Math.min(dragStart.y, dragEnd.y)
const w = Math.abs(dragEnd.x - dragStart.x)
const h = Math.abs(dragEnd.y - dragStart.y)
if (w > 2 && h > 2) {
setImageRegions(prev => [...prev, {
bbox_pct: { x, y, w, h },
prompt: '',
description: 'Manually selected region',
image_b64: null,
style: 'educational' as ImageStyle,
}])
}
setDragStart(null)
setDragEnd(null)
setDrawingRegion(false)
}
const handleRemoveRegion = (index: number) => {
setImageRegions(prev => prev.filter((_, i) => i !== index))
}
if (status === 'loading') {
return (
<div className="flex items-center justify-center py-16">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-teal-500 mr-3" />
<span className="text-gray-500 dark:text-gray-400">Session wird geladen...</span>
</div>
)
}
if (status === 'error' && !session) {
return (
<div className="text-center py-16">
<p className="text-red-500">{error}</p>
<button onClick={loadSessionData} className="mt-4 px-4 py-2 bg-teal-600 text-white rounded hover:bg-teal-700">
Erneut laden
</button>
</div>
)
}
if (!session) return null
const aspect = session.imageHeight / session.imageWidth
return (
<div className="space-y-4">
{/* Header / Controls */}
<div className="flex items-center justify-between flex-wrap gap-2">
<h3 className="text-lg font-medium text-gray-800 dark:text-gray-200">
Validierung Original vs. Rekonstruktion
</h3>
<div className="flex items-center gap-3">
<button
onClick={handleDetectImages}
disabled={detecting}
className="px-3 py-1.5 text-sm bg-indigo-600 text-white rounded hover:bg-indigo-700 disabled:opacity-50"
>
{detecting ? 'Erkennung laeuft...' : 'Bilder erkennen'}
</button>
<label className="flex items-center gap-1.5 text-sm text-gray-600 dark:text-gray-400">
<input
type="checkbox"
checked={syncScroll}
onChange={e => setSyncScroll(e.target.checked)}
className="rounded"
/>
Sync Scroll
</label>
<div className="flex items-center gap-1.5">
<button onClick={() => setZoom(z => Math.max(50, z - 25))} className="px-2 py-1 text-sm border rounded dark:border-gray-600 hover:bg-gray-100 dark:hover:bg-gray-700">-</button>
<span className="text-sm text-gray-600 dark:text-gray-400 w-12 text-center">{zoom}%</span>
<button onClick={() => setZoom(z => Math.min(200, z + 25))} className="px-2 py-1 text-sm border rounded dark:border-gray-600 hover:bg-gray-100 dark:hover:bg-gray-700">+</button>
</div>
</div>
</div>
{error && (
<div className="p-2 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 text-sm rounded">
{error}
<button onClick={() => setError('')} className="ml-2 underline">Schliessen</button>
</div>
)}
{/* Side-by-side panels */}
<div className="grid grid-cols-2 gap-4" style={{ height: 'calc(100vh - 580px)', minHeight: 300 }}>
{/* Left: Original */}
<div className="border rounded-lg dark:border-gray-700 overflow-hidden flex flex-col">
<div className="px-3 py-1.5 bg-gray-50 dark:bg-gray-800 text-sm font-medium text-gray-600 dark:text-gray-400 border-b dark:border-gray-700">
Original
</div>
<div
ref={leftPanelRef}
className="flex-1 overflow-auto"
onScroll={() => handleScroll('left')}
>
<div style={{ width: `${zoom}%`, minWidth: '100%' }}>
<img
src={session.originalImageUrl}
alt="Original"
className="w-full h-auto"
draggable={false}
/>
</div>
</div>
</div>
{/* Right: Reconstruction */}
<div className="border rounded-lg dark:border-gray-700 overflow-hidden flex flex-col">
<div className="px-3 py-1.5 bg-gray-50 dark:bg-gray-800 text-sm font-medium text-gray-600 dark:text-gray-400 border-b dark:border-gray-700 flex items-center justify-between">
<span>Rekonstruktion</span>
<button
onClick={() => setDrawingRegion(!drawingRegion)}
className={`text-xs px-2 py-0.5 rounded ${drawingRegion ? 'bg-indigo-600 text-white' : 'bg-gray-200 dark:bg-gray-700 text-gray-600 dark:text-gray-400'}`}
>
{drawingRegion ? 'Region zeichnen...' : '+ Region'}
</button>
</div>
<div
ref={rightPanelRef}
className="flex-1 overflow-auto"
onScroll={() => handleScroll('right')}
>
<div style={{ width: `${zoom}%`, minWidth: '100%' }}>
{/* Reconstruction container */}
<div
ref={reconRef}
className="relative bg-white"
style={{
paddingBottom: `${aspect * 100}%`,
cursor: drawingRegion ? 'crosshair' : 'default',
}}
onMouseDown={handleReconMouseDown}
onMouseMove={handleReconMouseMove}
onMouseUp={handleReconMouseUp}
>
{/* Row separator lines — derive from cells */}
{(() => {
const rowYs = new Set<number>()
for (const cell of session.cells) {
if (cell.col_index === 0 && cell.bbox_pct) {
rowYs.add(cell.bbox_pct.y)
}
}
return Array.from(rowYs).map((y, i) => (
<div
key={`row-${i}`}
className="absolute left-0 right-0"
style={{
top: `${y}%`,
height: '1px',
backgroundColor: 'rgba(0,0,0,0.06)',
}}
/>
))
})()}
{/* Cell texts — black on white, font size derived from cell height */}
{session.cells.map(cell => {
if (!cell.bbox_pct || !cell.text) return null
// Container height in px = reconWidth * aspect
// Cell height in px = containerHeightPx * (bbox_pct.h / 100)
// Font size ≈ 70% of cell height
const containerH = reconWidth * aspect
const cellHeightPx = containerH * (cell.bbox_pct.h / 100)
const fontSize = Math.max(6, cellHeightPx * 0.7)
return (
<span
key={cell.cell_id}
className="absolute leading-none overflow-hidden whitespace-nowrap"
style={{
left: `${cell.bbox_pct.x}%`,
top: `${cell.bbox_pct.y}%`,
width: `${cell.bbox_pct.w}%`,
height: `${cell.bbox_pct.h}%`,
color: '#1a1a1a',
fontSize: `${fontSize}px`,
fontWeight: cell.is_bold ? 'bold' : 'normal',
fontFamily: "'Liberation Sans', 'DejaVu Sans', Arial, sans-serif",
display: 'flex',
alignItems: 'center',
padding: '0 1px',
}}
title={`${cell.cell_id}: ${cell.text}`}
>
{cell.text}
</span>
)
})}
{/* Generated images at region positions */}
{imageRegions.map((region, i) => (
<div
key={`region-${i}`}
className="absolute border-2 border-dashed border-indigo-400"
style={{
left: `${region.bbox_pct.x}%`,
top: `${region.bbox_pct.y}%`,
width: `${region.bbox_pct.w}%`,
height: `${region.bbox_pct.h}%`,
}}
>
{region.image_b64 ? (
<img src={region.image_b64} alt={region.description} className="w-full h-full object-cover" />
) : (
<div className="w-full h-full flex items-center justify-center bg-indigo-50/50 text-indigo-400 text-[0.5em]">
{region.generating ? '...' : `Bild ${i + 1}`}
</div>
)}
</div>
))}
{/* Drawing rectangle */}
{dragStart && dragEnd && (
<div
className="absolute border-2 border-dashed border-red-500 bg-red-100/20 pointer-events-none"
style={{
left: `${Math.min(dragStart.x, dragEnd.x)}%`,
top: `${Math.min(dragStart.y, dragEnd.y)}%`,
width: `${Math.abs(dragEnd.x - dragStart.x)}%`,
height: `${Math.abs(dragEnd.y - dragStart.y)}%`,
}}
/>
)}
</div>
</div>
</div>
</div>
</div>
{/* Image regions panel */}
{imageRegions.length > 0 && (
<div className="border rounded-lg dark:border-gray-700 p-4">
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-3">
Bildbereiche ({imageRegions.length} gefunden)
</h4>
<div className="space-y-3">
{imageRegions.map((region, i) => (
<div key={i} className="flex items-start gap-3 p-3 bg-gray-50 dark:bg-gray-800 rounded-lg">
{/* Preview thumbnail */}
<div className="w-16 h-16 flex-shrink-0 border rounded dark:border-gray-600 overflow-hidden bg-white">
{region.image_b64 ? (
<img src={region.image_b64} alt="" className="w-full h-full object-cover" />
) : (
<div className="w-full h-full flex items-center justify-center text-gray-400 text-xs">
{Math.round(region.bbox_pct.w)}x{Math.round(region.bbox_pct.h)}%
</div>
)}
</div>
{/* Prompt + controls */}
<div className="flex-1 min-w-0 space-y-2">
<div className="flex items-center gap-2">
<span className="text-xs text-gray-500 dark:text-gray-400 flex-shrink-0">
Bereich {i + 1}:
</span>
<input
type="text"
value={region.prompt}
onChange={e => {
setImageRegions(prev => prev.map((r, j) =>
j === i ? { ...r, prompt: e.target.value } : r
))
}}
placeholder="Beschreibung / Prompt..."
className="flex-1 text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
/>
</div>
<div className="flex items-center gap-2">
<select
value={region.style}
onChange={e => {
setImageRegions(prev => prev.map((r, j) =>
j === i ? { ...r, style: e.target.value as ImageStyle } : r
))
}}
className="text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
>
{STYLES.map(s => (
<option key={s.value} value={s.value}>{s.label}</option>
))}
</select>
<button
onClick={() => handleGenerateImage(i)}
disabled={!!region.generating || !region.prompt}
className="px-3 py-1 text-sm bg-teal-600 text-white rounded hover:bg-teal-700 disabled:opacity-50"
>
{region.generating ? 'Generiere...' : 'Generieren'}
</button>
<button
onClick={() => handleRemoveRegion(i)}
className="px-2 py-1 text-sm text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded"
>
Entfernen
</button>
</div>
{region.description && region.description !== region.prompt && (
<p className="text-xs text-gray-400">{region.description}</p>
)}
</div>
</div>
))}
</div>
</div>
)}
{/* Notes and score */}
<div className="border rounded-lg dark:border-gray-700 p-4 space-y-3">
<div className="flex items-center gap-4">
<label className="text-sm font-medium text-gray-700 dark:text-gray-300">
Bewertung (1-10):
</label>
<input
type="number"
min={1}
max={10}
value={score ?? ''}
onChange={e => setScore(e.target.value ? parseInt(e.target.value) : null)}
className="w-20 text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
/>
<div className="flex gap-1">
{[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(v => (
<button
key={v}
onClick={() => setScore(v)}
className={`w-7 h-7 text-xs rounded ${score === v ? 'bg-teal-600 text-white' : 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-200 dark:hover:bg-gray-600'}`}
>
{v}
</button>
))}
</div>
</div>
<div>
<label className="text-sm font-medium text-gray-700 dark:text-gray-300 block mb-1">
Notizen:
</label>
<textarea
value={notes}
onChange={e => setNotes(e.target.value)}
rows={3}
placeholder="Anmerkungen zur Qualitaet der Rekonstruktion..."
className="w-full text-sm px-3 py-2 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
/>
</div>
</div>
{/* Actions — sticky bottom bar */}
<div className="sticky bottom-0 bg-white dark:bg-gray-900 border-t dark:border-gray-700 py-3 px-1 -mx-1 flex items-center justify-between">
<div className="text-sm text-gray-500 dark:text-gray-400">
{status === 'saved' && <span className="text-green-600 dark:text-green-400">Validierung gespeichert</span>}
{status === 'saving' && <span>Speichere...</span>}
</div>
<div className="flex items-center gap-3">
<button
onClick={handleSave}
disabled={status === 'saving'}
className="px-4 py-2 text-sm bg-gray-600 text-white rounded hover:bg-gray-700 disabled:opacity-50"
>
Speichern
</button>
<button
onClick={async () => {
await handleSave()
onNext()
}}
disabled={status === 'saving'}
className="px-4 py-2 text-sm bg-teal-600 text-white rounded hover:bg-teal-700 disabled:opacity-50"
>
Abschliessen
</button>
</div>
</div>
</div>
)
}

View File

@@ -1,922 +0,0 @@
'use client'
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-pipeline/types'
import { usePixelWordPositions } from './usePixelWordPositions'
const KLAUSUR_API = '/klausur-api'
interface LlmChange {
row_index: number
field: 'english' | 'german' | 'example'
old: string
new: string
}
interface StepLlmReviewProps {
sessionId: string | null
onNext: () => void
}
interface ReviewMeta {
total_entries: number
to_review: number
skipped: number
model: string
skipped_indices?: number[]
}
interface StreamProgress {
current: number
total: number
}
const FIELD_LABELS: Record<string, string> = {
english: 'EN',
german: 'DE',
example: 'Beispiel',
source_page: 'Seite',
marker: 'Marker',
text: 'Text',
}
/** Map column type to WordEntry field name */
const COL_TYPE_TO_FIELD: Record<string, string> = {
column_en: 'english',
column_de: 'german',
column_example: 'example',
page_ref: 'source_page',
column_marker: 'marker',
column_text: 'text',
}
/** Column type → color class */
const COL_TYPE_COLOR: Record<string, string> = {
column_en: 'text-blue-600 dark:text-blue-400',
column_de: 'text-green-600 dark:text-green-400',
column_example: 'text-orange-600 dark:text-orange-400',
page_ref: 'text-cyan-600 dark:text-cyan-400',
column_marker: 'text-gray-500 dark:text-gray-400',
column_text: 'text-gray-700 dark:text-gray-300',
}
type RowStatus = 'pending' | 'active' | 'reviewed' | 'corrected' | 'skipped'
export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
// Core state
const [status, setStatus] = useState<'idle' | 'loading' | 'ready' | 'running' | 'done' | 'error' | 'applied'>('idle')
const [meta, setMeta] = useState<ReviewMeta | null>(null)
const [changes, setChanges] = useState<LlmChange[]>([])
const [progress, setProgress] = useState<StreamProgress | null>(null)
const [totalDuration, setTotalDuration] = useState(0)
const [error, setError] = useState('')
const [accepted, setAccepted] = useState<Set<number>>(new Set())
const [applying, setApplying] = useState(false)
// Full vocab table state
const [vocabEntries, setVocabEntries] = useState<WordEntry[]>([])
const [columnsUsed, setColumnsUsed] = useState<ColumnMeta[]>([])
const [activeRowIndices, setActiveRowIndices] = useState<Set<number>>(new Set())
const [reviewedRows, setReviewedRows] = useState<Set<number>>(new Set())
const [skippedRows, setSkippedRows] = useState<Set<number>>(new Set())
const [correctedMap, setCorrectedMap] = useState<Map<number, LlmChange[]>>(new Map())
// Image
const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null)
// Overlay view state
const [viewMode, setViewMode] = useState<'table' | 'overlay'>('table')
const [fontScale, setFontScale] = useState(0.7)
const [leftPaddingPct, setLeftPaddingPct] = useState(0)
const [globalBold, setGlobalBold] = useState(false)
const [cells, setCells] = useState<GridCell[]>([])
const reconRef = useRef<HTMLDivElement>(null)
const [reconWidth, setReconWidth] = useState(0)
// Pixel-analysed word positions via shared hook
const overlayImageUrl = sessionId
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
: ''
const cellWordPositions = usePixelWordPositions(overlayImageUrl, cells, viewMode === 'overlay')
const tableRef = useRef<HTMLDivElement>(null)
const activeRowRef = useRef<HTMLTableRowElement>(null)
// Track reconstruction container width for font size calculation
useEffect(() => {
const el = reconRef.current
if (!el) return
const obs = new ResizeObserver(entries => {
for (const entry of entries) setReconWidth(entry.contentRect.width)
})
obs.observe(el)
return () => obs.disconnect()
}, [viewMode])
// Load session data on mount
useEffect(() => {
if (!sessionId) return
loadSessionData()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId])
const loadSessionData = async () => {
if (!sessionId) return
setStatus('loading')
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (!res.ok) throw new Error(`HTTP ${res.status}`)
const data = await res.json()
const wordResult: GridResult | undefined = data.word_result
if (!wordResult) {
setError('Keine Worterkennungsdaten gefunden. Bitte zuerst Schritt 5 abschliessen.')
setStatus('error')
return
}
const entries = wordResult.vocab_entries || wordResult.entries || []
setVocabEntries(entries)
setColumnsUsed(wordResult.columns_used || [])
setCells(wordResult.cells || [])
// Check if LLM review was already run
const llmReview = wordResult.llm_review
if (llmReview && llmReview.changes) {
const existingChanges: LlmChange[] = llmReview.changes as LlmChange[]
setChanges(existingChanges)
setTotalDuration(llmReview.duration_ms || 0)
// Mark all rows as reviewed
const allReviewed = new Set(entries.map((_: WordEntry, i: number) => i))
setReviewedRows(allReviewed)
// Build corrected map
const cMap = new Map<number, LlmChange[]>()
for (const c of existingChanges) {
const existing = cMap.get(c.row_index) || []
existing.push(c)
cMap.set(c.row_index, existing)
}
setCorrectedMap(cMap)
// Default: all accepted
setAccepted(new Set(existingChanges.map((_: LlmChange, i: number) => i)))
setMeta({
total_entries: entries.length,
to_review: llmReview.entries_corrected !== undefined ? entries.length : entries.length,
skipped: 0,
model: llmReview.model_used || 'unknown',
})
setStatus('done')
} else {
setStatus('ready')
}
} catch (e: unknown) {
setError(e instanceof Error ? e.message : String(e))
setStatus('error')
}
}
const runReview = useCallback(async () => {
if (!sessionId) return
setStatus('running')
setError('')
setChanges([])
setProgress(null)
setMeta(null)
setTotalDuration(0)
setActiveRowIndices(new Set())
setReviewedRows(new Set())
setSkippedRows(new Set())
setCorrectedMap(new Map())
try {
const res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review?stream=true`,
{ method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({}) },
)
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
const reader = res.body!.getReader()
const decoder = new TextDecoder()
let buffer = ''
let allChanges: LlmChange[] = []
let allReviewed = new Set<number>()
let allSkipped = new Set<number>()
let cMap = new Map<number, LlmChange[]>()
while (true) {
const { done, value } = await reader.read()
if (done) break
buffer += decoder.decode(value, { stream: true })
while (buffer.includes('\n\n')) {
const idx = buffer.indexOf('\n\n')
const chunk = buffer.slice(0, idx).trim()
buffer = buffer.slice(idx + 2)
if (!chunk.startsWith('data: ')) continue
const dataStr = chunk.slice(6)
let event: any
try { event = JSON.parse(dataStr) } catch { continue }
if (event.type === 'meta') {
setMeta({
total_entries: event.total_entries,
to_review: event.to_review,
skipped: event.skipped,
model: event.model,
skipped_indices: event.skipped_indices,
})
// Mark skipped rows
if (event.skipped_indices) {
allSkipped = new Set(event.skipped_indices)
setSkippedRows(allSkipped)
}
}
if (event.type === 'batch') {
const batchChanges: LlmChange[] = event.changes || []
const batchRows: number[] = event.entries_reviewed || []
// Update active rows (currently being reviewed)
setActiveRowIndices(new Set(batchRows))
// Accumulate changes
allChanges = [...allChanges, ...batchChanges]
setChanges(allChanges)
setProgress(event.progress)
// Update corrected map
for (const c of batchChanges) {
const existing = cMap.get(c.row_index) || []
existing.push(c)
cMap.set(c.row_index, [...existing])
}
setCorrectedMap(new Map(cMap))
// Mark batch rows as reviewed
for (const r of batchRows) {
allReviewed.add(r)
}
setReviewedRows(new Set(allReviewed))
// Scroll to active row in table
setTimeout(() => {
activeRowRef.current?.scrollIntoView({ behavior: 'smooth', block: 'center' })
}, 50)
}
if (event.type === 'complete') {
setActiveRowIndices(new Set())
setTotalDuration(event.duration_ms)
setAccepted(new Set(allChanges.map((_: LlmChange, i: number) => i)))
// Mark all non-skipped as reviewed
const allEntryIndices = vocabEntries.map((_: WordEntry, i: number) => i)
for (const i of allEntryIndices) {
if (!allSkipped.has(i)) allReviewed.add(i)
}
setReviewedRows(new Set(allReviewed))
setStatus('done')
}
if (event.type === 'error') {
throw new Error(event.detail || 'Unbekannter Fehler')
}
}
}
// If stream ended without complete event
if (allChanges.length === 0) {
setStatus('done')
}
} catch (e: unknown) {
const msg = e instanceof Error ? e.message : String(e)
setError(msg)
setStatus('error')
}
}, [sessionId, vocabEntries])
const toggleChange = (index: number) => {
setAccepted(prev => {
const next = new Set(prev)
if (next.has(index)) next.delete(index)
else next.add(index)
return next
})
}
const toggleAll = () => {
if (accepted.size === changes.length) {
setAccepted(new Set())
} else {
setAccepted(new Set(changes.map((_: LlmChange, i: number) => i)))
}
}
const applyChanges = useCallback(async () => {
if (!sessionId) return
setApplying(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review/apply`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ accepted_indices: Array.from(accepted) }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
setStatus('applied')
} catch (e: unknown) {
setError(e instanceof Error ? e.message : String(e))
} finally {
setApplying(false)
}
}, [sessionId, accepted])
const getRowStatus = (rowIndex: number): RowStatus => {
if (activeRowIndices.has(rowIndex)) return 'active'
if (skippedRows.has(rowIndex)) return 'skipped'
if (correctedMap.has(rowIndex)) return 'corrected'
if (reviewedRows.has(rowIndex)) return 'reviewed'
return 'pending'
}
const dewarpedUrl = sessionId
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
: ''
// Snap all cells in the same column to consistent x/w positions
// Uses the median x and width per col_index so columns align vertically
const colPositions = useMemo(() => {
const byCol = new Map<number, { xs: number[]; ws: number[] }>()
for (const cell of cells) {
if (!cell.bbox_pct) continue
const entry = byCol.get(cell.col_index) || { xs: [], ws: [] }
entry.xs.push(cell.bbox_pct.x)
entry.ws.push(cell.bbox_pct.w)
byCol.set(cell.col_index, entry)
}
const result = new Map<number, { x: number; w: number }>()
for (const [colIdx, { xs, ws }] of byCol) {
xs.sort((a, b) => a - b)
ws.sort((a, b) => a - b)
const medianX = xs[Math.floor(xs.length / 2)]
const medianW = ws[Math.floor(ws.length / 2)]
result.set(colIdx, { x: medianX, w: medianW })
}
return result
}, [cells])
if (!sessionId) {
return <div className="text-center py-12 text-gray-400">Bitte zuerst eine Session auswaehlen.</div>
}
// --- Loading session data ---
if (status === 'loading' || status === 'idle') {
return (
<div className="flex items-center gap-3 justify-center py-12">
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
<span className="text-gray-500">Session-Daten werden geladen...</span>
</div>
)
}
// --- Error ---
if (status === 'error') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4"></div>
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">Fehler bei OCR-Zeichenkorrektur</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">{error}</p>
<div className="flex gap-3">
<button onClick={() => { setError(''); loadSessionData() }}
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm">
Erneut versuchen
</button>
<button onClick={onNext}
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm">
Ueberspringen
</button>
</div>
</div>
)
}
// --- Applied ---
if (status === 'applied') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4"></div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Korrekturen uebernommen</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
{accepted.size} von {changes.length} Korrekturen wurden angewendet.
</p>
<button onClick={onNext}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
Weiter
</button>
</div>
)
}
// Active entry for highlighting on image
const activeEntry = vocabEntries.find((_: WordEntry, i: number) => activeRowIndices.has(i))
const pct = progress ? Math.round((progress.current / progress.total) * 100) : 0
/** Handle inline edit of a cell in the overlay */
const handleCellEdit = (cellId: string, rowIndex: number, newText: string | null) => {
if (newText === null) return
setCells(prev => prev.map(c => c.cell_id === cellId ? { ...c, text: newText } : c))
// Also update vocabEntries if this cell maps to a known field
const cell = cells.find(c => c.cell_id === cellId)
if (cell) {
const field = COL_TYPE_TO_FIELD[cell.col_type]
if (field) {
setVocabEntries(prev => prev.map((e, i) =>
i === rowIndex ? { ...e, [field]: newText } : e
))
}
}
}
// --- Ready / Running / Done: 2-column layout ---
return (
<div className="space-y-4">
{/* Header */}
<div className="flex items-center justify-between">
<div>
<h3 className="text-base font-medium text-gray-700 dark:text-gray-300">
Schritt 6: Korrektur
</h3>
<p className="text-xs text-gray-400 mt-0.5">
{status === 'ready' && `${vocabEntries.length} Eintraege bereit zur Pruefung`}
{status === 'running' && meta && `${meta.model} · ${meta.to_review} zu pruefen, ${meta.skipped} uebersprungen`}
{status === 'done' && (
<>
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden
{meta && <> · {meta.skipped} uebersprungen</>}
{' '}· {totalDuration}ms · {meta?.model}
</>
)}
</p>
</div>
<div className="flex items-center gap-2">
{status === 'ready' && (
<button onClick={runReview}
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium">
Korrektur starten
</button>
)}
{status === 'running' && (
<div className="flex items-center gap-2 text-sm text-teal-600 dark:text-teal-400">
<div className="animate-spin rounded-full h-4 w-4 border-b-2 border-teal-500" />
{progress ? `${progress.current}/${progress.total}` : 'Startet...'}
</div>
)}
{status === 'done' && changes.length > 0 && (
<button onClick={toggleAll}
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
{accepted.size === changes.length ? 'Keine' : 'Alle'} auswaehlen
</button>
)}
</div>
</div>
{/* Progress bar (while running) */}
{status === 'running' && progress && (
<div className="space-y-1">
<div className="flex justify-between text-xs text-gray-400">
<span>{progress.current} / {progress.total} Eintraege geprueft</span>
<span>{pct}%</span>
</div>
<div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-2">
<div className="bg-teal-500 h-2 rounded-full transition-all duration-500" style={{ width: `${pct}%` }} />
</div>
</div>
)}
{/* View mode toggle */}
<div className="flex items-center gap-1">
<button
onClick={() => setViewMode('table')}
className={`px-3 py-1.5 text-xs rounded-l-lg border transition-colors ${
viewMode === 'table'
? 'bg-teal-600 text-white border-teal-600'
: 'bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
}`}
>
Tabelle
</button>
<button
onClick={() => setViewMode('overlay')}
className={`px-3 py-1.5 text-xs rounded-r-lg border transition-colors ${
viewMode === 'overlay'
? 'bg-teal-600 text-white border-teal-600'
: 'bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
}`}
>
Overlay
</button>
</div>
{/* Overlay toolbar */}
{viewMode === 'overlay' && (
<div className="flex items-center gap-4 flex-wrap bg-gray-50 dark:bg-gray-800/50 rounded-lg px-3 py-2">
<label className="flex items-center gap-2 text-xs text-gray-600 dark:text-gray-400">
Schrift
<input
type="range" min={30} max={120} value={Math.round(fontScale * 100)}
onChange={e => setFontScale(Number(e.target.value) / 100)}
className="w-24 h-1 accent-teal-600"
/>
<span className="w-8 text-right font-mono">{Math.round(fontScale * 100)}%</span>
</label>
<label className="flex items-center gap-2 text-xs text-gray-600 dark:text-gray-400">
Einrueckung
<input
type="range" min={0} max={20} step={0.5} value={leftPaddingPct}
onChange={e => setLeftPaddingPct(Number(e.target.value))}
className="w-24 h-1 accent-teal-600"
/>
<span className="w-8 text-right font-mono">{leftPaddingPct}%</span>
</label>
<button
onClick={() => setGlobalBold(b => !b)}
className={`px-2 py-1 text-xs rounded border transition-colors font-bold ${
globalBold
? 'bg-teal-600 text-white border-teal-600'
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
}`}
>
B
</button>
</div>
)}
{/* 2-column layout: Image + Table/Overlay */}
<div className={`grid gap-4 ${viewMode === 'overlay' ? 'grid-cols-2' : 'grid-cols-3'}`}>
{/* Left: Dewarped Image with highlight overlay */}
<div className="col-span-1">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Originalbild
</div>
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative sticky top-4">
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={dewarpedUrl}
alt="Dewarped"
className="w-full h-auto"
onLoad={(e) => {
const img = e.target as HTMLImageElement
setImageNaturalSize({ w: img.naturalWidth, h: img.naturalHeight })
}}
/>
{/* Highlight overlay for active row */}
{activeEntry?.bbox && (
<div
className="absolute border-2 border-yellow-400 bg-yellow-400/20 pointer-events-none animate-pulse"
style={{
left: `${activeEntry.bbox.x}%`,
top: `${activeEntry.bbox.y}%`,
width: `${activeEntry.bbox.w}%`,
height: `${activeEntry.bbox.h}%`,
}}
/>
)}
</div>
</div>
{/* Right: Table or Overlay */}
<div className={viewMode === 'table' ? 'col-span-2' : 'col-span-1'} ref={tableRef}>
{viewMode === 'table' ? (
<>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
{columnsUsed.length === 1 && columnsUsed[0]?.type === 'column_text' ? 'Tabelle' : 'Vokabeltabelle'} ({vocabEntries.length} Eintraege)
</div>
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
<div className="max-h-[70vh] overflow-y-auto">
<table className="w-full text-sm">
<thead className="sticky top-0 z-10">
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium w-10">#</th>
{columnsUsed.length > 0 ? (
columnsUsed.map((col, i) => {
const field = COL_TYPE_TO_FIELD[col.type]
if (!field) return null
return (
<th key={i} className={`px-2 py-2 text-left font-medium ${COL_TYPE_COLOR[col.type] || 'text-gray-500 dark:text-gray-400'}`}>
{FIELD_LABELS[field] || field}
</th>
)
})
) : (
<>
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">EN</th>
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">DE</th>
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Beispiel</th>
</>
)}
<th className="px-2 py-2 text-center text-gray-500 dark:text-gray-400 font-medium w-16">Status</th>
</tr>
</thead>
<tbody>
{vocabEntries.map((entry, idx) => {
const rowStatus = getRowStatus(idx)
const rowChanges = correctedMap.get(idx)
const rowBg = {
pending: '',
active: 'bg-yellow-50 dark:bg-yellow-900/20',
reviewed: '',
corrected: 'bg-teal-50/50 dark:bg-teal-900/10',
skipped: 'bg-gray-50 dark:bg-gray-800/50',
}[rowStatus]
return (
<tr
key={idx}
ref={rowStatus === 'active' ? activeRowRef : undefined}
className={`border-b border-gray-100 dark:border-gray-700/50 ${rowBg} ${
rowStatus === 'active' ? 'ring-1 ring-yellow-400 ring-inset' : ''
}`}
>
<td className="px-2 py-1.5 text-gray-400 font-mono text-xs">{idx}</td>
{columnsUsed.length > 0 ? (
columnsUsed.map((col, i) => {
const field = COL_TYPE_TO_FIELD[col.type]
if (!field) return null
const text = (entry as Record<string, unknown>)[field] as string || ''
return (
<td key={i} className="px-2 py-1.5 text-xs">
<CellContent text={text} field={field} rowChanges={rowChanges} />
</td>
)
})
) : (
<>
<td className="px-2 py-1.5">
<CellContent text={entry.english} field="english" rowChanges={rowChanges} />
</td>
<td className="px-2 py-1.5">
<CellContent text={entry.german} field="german" rowChanges={rowChanges} />
</td>
<td className="px-2 py-1.5 text-xs">
<CellContent text={entry.example} field="example" rowChanges={rowChanges} />
</td>
</>
)}
<td className="px-2 py-1.5 text-center">
<StatusIcon status={rowStatus} />
</td>
</tr>
)
})}
</tbody>
</table>
</div>
</div>
</>
) : (
<>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Text-Rekonstruktion ({cells.filter(c => c.text).length} Zellen)
</div>
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden bg-white dark:bg-white">
<div
ref={reconRef}
className="relative"
style={{
aspectRatio: imageNaturalSize ? `${imageNaturalSize.w} / ${imageNaturalSize.h}` : '3 / 4',
}}
>
{cells.map(cell => {
if (!cell.bbox_pct || !cell.text) return null
const col = colPositions.get(cell.col_index)
const cellX = col?.x ?? cell.bbox_pct.x
const cellW = col?.w ?? cell.bbox_pct.w
const aspect = imageNaturalSize ? imageNaturalSize.h / imageNaturalSize.w : 4 / 3
const containerH = reconWidth * aspect
const cellHeightPx = containerH * (cell.bbox_pct.h / 100)
const wordPos = cellWordPositions.get(cell.cell_id)
// Pixel-analysed: render word-groups at detected positions
if (wordPos) {
return wordPos.map((wp, i) => {
// Auto font-size from pixel analysis, scaled by user slider
const autoFontPx = cellHeightPx * wp.fontRatio * fontScale
const fs = Math.max(6, autoFontPx)
return (
<span
key={`${cell.cell_id}_${i}`}
className="absolute leading-none pointer-events-none select-none"
style={{
left: `${wp.xPct}%`,
top: `${cell.bbox_pct.y}%`,
width: `${wp.wPct}%`,
height: `${cell.bbox_pct.h}%`,
fontSize: `${fs}px`,
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
fontFamily: "'Liberation Sans', Arial, sans-serif",
display: 'flex',
alignItems: 'center',
whiteSpace: 'nowrap',
overflow: 'visible',
color: '#1a1a1a',
}}
>
{wp.text}
</span>
)
})
}
// Fallback: no pixel data — single span for entire cell
const fontSize = Math.max(6, cellHeightPx * fontScale)
return (
<span
key={cell.cell_id}
className="absolute leading-none pointer-events-none select-none"
style={{
left: `${cellX}%`,
top: `${cell.bbox_pct.y}%`,
width: `${cellW}%`,
height: `${cell.bbox_pct.h}%`,
fontSize: `${fontSize}px`,
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
paddingLeft: `${leftPaddingPct}%`,
fontFamily: "'Liberation Sans', Arial, sans-serif",
display: 'flex',
alignItems: 'center',
whiteSpace: 'pre',
overflow: 'visible',
color: '#1a1a1a',
}}
>
{cell.text}
</span>
)
})}
</div>
</div>
</>
)}
</div>
</div>
{/* Done state: summary + actions */}
{status === 'done' && (
<div className="space-y-4">
{/* Summary */}
<div className="bg-gray-50 dark:bg-gray-800/50 rounded-lg p-3 text-xs text-gray-500 dark:text-gray-400">
{changes.length === 0 ? (
<span>Keine Korrekturen noetig alle Eintraege sind korrekt.</span>
) : (
<span>
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden ·{' '}
{accepted.size} ausgewaehlt ·{' '}
{meta?.skipped || 0} uebersprungen (Lautschrift) ·{' '}
{totalDuration}ms
</span>
)}
</div>
{/* Corrections detail list (if any) */}
{changes.length > 0 && (
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
<div className="bg-gray-50 dark:bg-gray-800 px-3 py-2 border-b border-gray-200 dark:border-gray-700">
<span className="text-xs font-medium text-gray-600 dark:text-gray-400">
Korrekturvorschlaege ({accepted.size}/{changes.length} ausgewaehlt)
</span>
</div>
<table className="w-full text-sm">
<thead>
<tr className="bg-gray-50/50 dark:bg-gray-800/50 border-b border-gray-200 dark:border-gray-700">
<th className="w-10 px-3 py-1.5 text-center">
<input type="checkbox" checked={accepted.size === changes.length} onChange={toggleAll}
className="rounded border-gray-300 dark:border-gray-600" />
</th>
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Zeile</th>
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Feld</th>
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Vorher</th>
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Nachher</th>
</tr>
</thead>
<tbody>
{changes.map((change, idx) => (
<tr key={idx} className={`border-b border-gray-100 dark:border-gray-700/50 ${
accepted.has(idx) ? 'bg-teal-50/50 dark:bg-teal-900/10' : ''
}`}>
<td className="px-3 py-1.5 text-center">
<input type="checkbox" checked={accepted.has(idx)} onChange={() => toggleChange(idx)}
className="rounded border-gray-300 dark:border-gray-600" />
</td>
<td className="px-2 py-1.5 text-gray-500 dark:text-gray-400 font-mono text-xs">R{change.row_index}</td>
<td className="px-2 py-1.5">
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
{FIELD_LABELS[change.field] || change.field}
</span>
</td>
<td className="px-2 py-1.5"><span className="line-through text-red-500 dark:text-red-400 text-xs">{change.old}</span></td>
<td className="px-2 py-1.5"><span className="text-green-600 dark:text-green-400 font-medium text-xs">{change.new}</span></td>
</tr>
))}
</tbody>
</table>
</div>
)}
{/* Actions */}
<div className="flex items-center justify-between pt-2">
<p className="text-xs text-gray-400">
{changes.length > 0 ? `${accepted.size} von ${changes.length} ausgewaehlt` : ''}
</p>
<div className="flex gap-3">
{changes.length > 0 && (
<button onClick={onNext}
className="px-4 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
Alle ablehnen
</button>
)}
{changes.length > 0 ? (
<button onClick={applyChanges} disabled={applying || accepted.size === 0}
className="px-5 py-2 text-sm bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors font-medium">
{applying ? 'Wird uebernommen...' : `${accepted.size} Korrektur${accepted.size !== 1 ? 'en' : ''} uebernehmen`}
</button>
) : (
<button onClick={onNext}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
Weiter
</button>
)}
</div>
</div>
</div>
)}
</div>
)
}
/** Cell content with inline diff for corrections */
function CellContent({ text, field, rowChanges }: {
text: string
field: string
rowChanges?: LlmChange[]
}) {
const change = rowChanges?.find(c => c.field === field)
if (!text && !change) {
return <span className="text-gray-300 dark:text-gray-600">&mdash;</span>
}
if (change) {
return (
<span>
<span className="line-through text-red-400 dark:text-red-500 text-xs mr-1">{change.old}</span>
<span className="text-green-600 dark:text-green-400 font-medium text-xs">{change.new}</span>
</span>
)
}
return <span className="text-gray-700 dark:text-gray-300 text-xs">{text}</span>
}
/** Status icon for each row */
function StatusIcon({ status }: { status: RowStatus }) {
switch (status) {
case 'pending':
return <span className="text-gray-300 dark:text-gray-600 text-xs"></span>
case 'active':
return (
<span className="inline-block w-3 h-3 rounded-full bg-yellow-400 animate-pulse" title="Wird geprueft" />
)
case 'reviewed':
return (
<svg className="w-4 h-4 text-green-500 inline-block" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M5 13l4 4L19 7" />
</svg>
)
case 'corrected':
return (
<span className="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-teal-100 dark:bg-teal-900/30 text-teal-700 dark:text-teal-400">
korr.
</span>
)
case 'skipped':
return (
<span className="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400">
skip
</span>
)
}
}

View File

@@ -1,247 +0,0 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { OrientationResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
import { ImageCompareView } from './ImageCompareView'
const KLAUSUR_API = '/klausur-api'
interface StepOrientationProps {
sessionId?: string | null
onNext: (sessionId: string) => void
}
export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOrientationProps) {
const [session, setSession] = useState<SessionInfo | null>(null)
const [orientationResult, setOrientationResult] = useState<OrientationResult | null>(null)
const [uploading, setUploading] = useState(false)
const [detecting, setDetecting] = useState(false)
const [error, setError] = useState<string | null>(null)
const [dragOver, setDragOver] = useState(false)
const [sessionName, setSessionName] = useState('')
// Reload session data when navigating back
useEffect(() => {
if (!existingSessionId || session) return
const loadSession = async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}`)
if (!res.ok) return
const data = await res.json()
const sessionInfo: SessionInfo = {
session_id: data.session_id,
filename: data.filename,
image_width: data.image_width,
image_height: data.image_height,
original_image_url: `${KLAUSUR_API}${data.original_image_url}`,
}
setSession(sessionInfo)
if (data.orientation_result) {
setOrientationResult(data.orientation_result)
}
} catch (e) {
console.error('Failed to reload session:', e)
}
}
loadSession()
}, [existingSessionId, session])
const handleUpload = useCallback(async (file: File) => {
setUploading(true)
setError(null)
setOrientationResult(null)
try {
const formData = new FormData()
formData.append('file', file)
if (sessionName.trim()) {
formData.append('name', sessionName.trim())
}
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, {
method: 'POST',
body: formData,
})
if (!res.ok) {
const err = await res.json().catch(() => ({ detail: res.statusText }))
throw new Error(err.detail || 'Upload fehlgeschlagen')
}
const data: SessionInfo = await res.json()
data.original_image_url = `${KLAUSUR_API}${data.original_image_url}`
setSession(data)
// Auto-trigger orientation detection
setDetecting(true)
const orientRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${data.session_id}/orientation`, {
method: 'POST',
})
if (!orientRes.ok) {
throw new Error('Orientierungserkennung fehlgeschlagen')
}
const orientData = await orientRes.json()
setOrientationResult({
orientation_degrees: orientData.orientation_degrees,
corrected: orientData.corrected,
duration_seconds: orientData.duration_seconds,
})
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
} finally {
setUploading(false)
setDetecting(false)
}
}, [sessionName])
const handleDrop = useCallback((e: React.DragEvent) => {
e.preventDefault()
setDragOver(false)
const file = e.dataTransfer.files[0]
if (file) handleUpload(file)
}, [handleUpload])
const handleFileInput = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0]
if (file) handleUpload(file)
}, [handleUpload])
// Upload area (no session yet)
if (!session) {
return (
<div className="space-y-4">
{/* Session name input */}
<div>
<label className="block text-sm font-medium text-gray-600 dark:text-gray-400 mb-1">
Session-Name (optional)
</label>
<input
type="text"
value={sessionName}
onChange={(e) => setSessionName(e.target.value)}
placeholder="z.B. Unit 3 Seite 42"
className="w-full max-w-sm px-3 py-2 text-sm border rounded-lg dark:bg-gray-800 dark:border-gray-600 dark:text-gray-200 focus:outline-none focus:ring-2 focus:ring-teal-500"
/>
</div>
<div
onDragOver={(e) => { e.preventDefault(); setDragOver(true) }}
onDragLeave={() => setDragOver(false)}
onDrop={handleDrop}
className={`border-2 border-dashed rounded-xl p-12 text-center transition-colors ${
dragOver
? 'border-teal-400 bg-teal-50 dark:bg-teal-900/20'
: 'border-gray-300 dark:border-gray-600 hover:border-teal-400'
}`}
>
{uploading ? (
<div className="text-gray-500">
<div className="animate-spin inline-block w-8 h-8 border-2 border-teal-500 border-t-transparent rounded-full mb-3" />
<p>Wird hochgeladen...</p>
</div>
) : (
<>
<div className="text-4xl mb-3">📄</div>
<p className="text-gray-600 dark:text-gray-400 mb-2">
PDF oder Bild hierher ziehen
</p>
<p className="text-sm text-gray-400 mb-4">oder</p>
<label className="inline-block px-4 py-2 bg-teal-600 text-white rounded-lg cursor-pointer hover:bg-teal-700 transition-colors">
Datei auswaehlen
<input
type="file"
accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
onChange={handleFileInput}
className="hidden"
/>
</label>
</>
)}
</div>
{error && (
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
{error}
</div>
)}
</div>
)
}
// Session active: show orientation result
const orientedUrl = orientationResult
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.session_id}/image/oriented`
: null
return (
<div className="space-y-4">
{/* Filename */}
<div className="text-sm text-gray-500 dark:text-gray-400">
Datei: <span className="font-medium text-gray-700 dark:text-gray-300">{session.filename}</span>
{' '}({session.image_width} x {session.image_height} px)
</div>
{/* Loading indicator */}
{detecting && (
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
Orientierung wird erkannt...
</div>
)}
{/* Image comparison */}
<ImageCompareView
originalUrl={session.original_image_url}
deskewedUrl={orientedUrl}
showGrid={false}
showBinarized={false}
binarizedUrl={null}
leftLabel="Original"
rightLabel="Orientiert"
/>
{/* Orientation result badge */}
{orientationResult && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="flex items-center gap-3 text-sm">
{orientationResult.corrected ? (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
🔄 {orientationResult.orientation_degrees}° korrigiert
</span>
) : (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-green-50 dark:bg-green-900/20 text-green-700 dark:text-green-400 text-xs font-medium">
0° (keine Drehung noetig)
</span>
)}
<span className="text-gray-400 text-xs">
{orientationResult.duration_seconds}s
</span>
</div>
</div>
)}
{/* Next button */}
{orientationResult && (
<div className="flex justify-end">
<button
onClick={() => onNext(session.session_id)}
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
>
Weiter &rarr;
</button>
</div>
)}
{error && (
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
{error}
</div>
)}
</div>
)
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,263 +0,0 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { RowResult, RowGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
interface StepRowDetectionProps {
sessionId: string | null
onNext: () => void
}
export function StepRowDetection({ sessionId, onNext }: StepRowDetectionProps) {
const [rowResult, setRowResult] = useState<RowResult | null>(null)
const [detecting, setDetecting] = useState(false)
const [error, setError] = useState<string | null>(null)
const [gtNotes, setGtNotes] = useState('')
const [gtSaved, setGtSaved] = useState(false)
useEffect(() => {
if (!sessionId) return
const fetchSession = async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (res.ok) {
const info = await res.json()
if (info.row_result) {
setRowResult(info.row_result)
return
}
}
} catch (e) {
console.error('Failed to fetch session info:', e)
}
// No cached result — run auto
runAutoDetection()
}
fetchSession()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId])
const runAutoDetection = useCallback(async () => {
if (!sessionId) return
setDetecting(true)
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/rows`, {
method: 'POST',
})
if (!res.ok) {
const err = await res.json().catch(() => ({ detail: res.statusText }))
throw new Error(err.detail || 'Zeilenerkennung fehlgeschlagen')
}
const data: RowResult = await res.json()
setRowResult(data)
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
} finally {
setDetecting(false)
}
}, [sessionId])
const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
if (!sessionId) return
const gt: RowGroundTruth = {
is_correct: isCorrect,
notes: gtNotes || undefined,
}
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/rows`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(gt),
})
setGtSaved(true)
} catch (e) {
console.error('Ground truth save failed:', e)
}
}, [sessionId, gtNotes])
if (!sessionId) {
return (
<div className="flex flex-col items-center justify-center py-16 text-center">
<div className="text-5xl mb-4">📏</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
Schritt 4: Zeilenerkennung
</h3>
<p className="text-gray-500 dark:text-gray-400 max-w-md">
Bitte zuerst Schritte 1-3 abschliessen.
</p>
</div>
)
}
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/rows-overlay`
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
const rowTypeColors: Record<string, string> = {
header: 'bg-gray-200 dark:bg-gray-600 text-gray-700 dark:text-gray-300',
content: 'bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300',
footer: 'bg-gray-200 dark:bg-gray-600 text-gray-700 dark:text-gray-300',
}
return (
<div className="space-y-4">
{/* Loading */}
{detecting && (
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
Zeilenerkennung laeuft...
</div>
)}
{/* Images: overlay vs clean */}
<div className="grid grid-cols-2 gap-4">
<div>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Mit Zeilen-Overlay
</div>
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
{rowResult ? (
// eslint-disable-next-line @next/next/no-img-element
<img
src={`${overlayUrl}?t=${Date.now()}`}
alt="Zeilen-Overlay"
className="w-full h-auto"
/>
) : (
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
{detecting ? 'Erkenne Zeilen...' : 'Keine Daten'}
</div>
)}
</div>
</div>
<div>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Entzerrtes Bild
</div>
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={dewarpedUrl}
alt="Entzerrt"
className="w-full h-auto"
/>
</div>
</div>
</div>
{/* Row summary */}
{rowResult && (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
<div className="flex items-center justify-between">
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Ergebnis: {rowResult.total_rows} Zeilen erkannt
</h4>
<span className="text-xs text-gray-400">
{rowResult.duration_seconds}s
</span>
</div>
{/* Type summary badges */}
<div className="flex gap-2">
{Object.entries(rowResult.summary).map(([type, count]) => (
<span
key={type}
className={`px-2 py-0.5 rounded text-xs font-medium ${rowTypeColors[type] || 'bg-gray-100 text-gray-600'}`}
>
{type}: {count}
</span>
))}
</div>
{/* Row list */}
<div className="max-h-64 overflow-y-auto space-y-1">
{rowResult.rows.map((row) => (
<div
key={row.index}
className={`flex items-center gap-3 px-3 py-1.5 rounded text-xs font-mono ${
row.row_type === 'header' || row.row_type === 'footer'
? 'bg-gray-50 dark:bg-gray-700/50 text-gray-500'
: 'text-gray-600 dark:text-gray-400'
}`}
>
<span className="w-8 text-right text-gray-400">R{row.index}</span>
<span className={`px-1.5 py-0.5 rounded text-[10px] uppercase font-semibold ${rowTypeColors[row.row_type] || ''}`}>
{row.row_type}
</span>
<span>y={row.y}</span>
<span>h={row.height}px</span>
<span>{row.word_count} Woerter</span>
{row.gap_before > 0 && (
<span className="text-gray-400">gap={row.gap_before}px</span>
)}
</div>
))}
</div>
</div>
)}
{/* Controls */}
{rowResult && (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
<div className="flex items-center gap-3">
<button
onClick={() => runAutoDetection()}
disabled={detecting}
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-50"
>
Erneut erkennen
</button>
<div className="flex-1" />
{/* Ground truth */}
{!gtSaved ? (
<>
<input
type="text"
placeholder="Notizen (optional)"
value={gtNotes}
onChange={(e) => setGtNotes(e.target.value)}
className="px-2 py-1 text-xs border rounded dark:bg-gray-700 dark:border-gray-600 w-48"
/>
<button
onClick={() => handleGroundTruth(true)}
className="px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700"
>
Korrekt
</button>
<button
onClick={() => handleGroundTruth(false)}
className="px-3 py-1.5 text-xs bg-red-600 text-white rounded-lg hover:bg-red-700"
>
Fehlerhaft
</button>
</>
) : (
<span className="text-xs text-green-600 dark:text-green-400">
Ground Truth gespeichert
</span>
)}
<button
onClick={onNext}
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium"
>
Weiter
</button>
</div>
</div>
)}
{error && (
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
{error}
</div>
)}
</div>
)
}

View File

@@ -1,339 +0,0 @@
'use client'
import { useEffect, useState } from 'react'
import type { StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
interface StepStructureDetectionProps {
sessionId: string | null
onNext: () => void
}
const COLOR_HEX: Record<string, string> = {
red: '#dc2626',
orange: '#ea580c',
yellow: '#ca8a04',
green: '#16a34a',
blue: '#2563eb',
purple: '#9333ea',
}
export function StepStructureDetection({ sessionId, onNext }: StepStructureDetectionProps) {
const [result, setResult] = useState<StructureResult | null>(null)
const [detecting, setDetecting] = useState(false)
const [error, setError] = useState<string | null>(null)
const [hasRun, setHasRun] = useState(false)
const [overlayTs, setOverlayTs] = useState(0)
// Auto-trigger detection on mount
useEffect(() => {
if (!sessionId || hasRun) return
setHasRun(true)
const runDetection = async () => {
setDetecting(true)
setError(null)
try {
// Check if session already has structure result
const sessionRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (sessionRes.ok) {
const sessionData = await sessionRes.json()
if (sessionData.structure_result) {
setResult(sessionData.structure_result)
setOverlayTs(Date.now())
setDetecting(false)
return
}
}
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
method: 'POST',
})
if (!res.ok) {
throw new Error('Strukturerkennung fehlgeschlagen')
}
const data = await res.json()
setResult(data)
setOverlayTs(Date.now())
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
} finally {
setDetecting(false)
}
}
runDetection()
}, [sessionId, hasRun])
const handleRerun = async () => {
if (!sessionId) return
setDetecting(true)
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
method: 'POST',
})
if (!res.ok) throw new Error('Erneute Erkennung fehlgeschlagen')
const data = await res.json()
setResult(data)
setOverlayTs(Date.now())
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
} finally {
setDetecting(false)
}
}
if (!sessionId) {
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
}
const croppedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/structure-overlay${overlayTs ? `?t=${overlayTs}` : ''}`
return (
<div className="space-y-4">
{/* Loading indicator */}
{detecting && (
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
Dokumentstruktur wird analysiert...
</div>
)}
{/* Two-column image comparison */}
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
{/* Left: Original document */}
<div className="space-y-2">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Original
</div>
<div className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden" style={{ aspectRatio: '210/297' }}>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={croppedUrl}
alt="Originaldokument"
className="w-full h-full object-contain"
onError={(e) => {
(e.target as HTMLImageElement).style.display = 'none'
}}
/>
</div>
</div>
{/* Right: Structure overlay */}
<div className="space-y-2">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Erkannte Struktur
</div>
<div className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden" style={{ aspectRatio: '210/297' }}>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={overlayUrl}
alt="Strukturerkennung"
className="w-full h-full object-contain"
onError={(e) => {
(e.target as HTMLImageElement).style.display = 'none'
}}
/>
</div>
</div>
</div>
{/* Result info */}
{result && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4 space-y-3">
{/* Summary badges */}
<div className="flex flex-wrap items-center gap-3 text-sm">
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-teal-50 dark:bg-teal-900/20 text-teal-700 dark:text-teal-400 text-xs font-medium">
{result.zones.length} Zone(n)
</span>
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
{result.boxes.length} Box(en)
</span>
{result.graphics && result.graphics.length > 0 && (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-400 text-xs font-medium">
{result.graphics.length} Grafik(en)
</span>
)}
{result.has_words && (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-400 text-xs font-medium">
{result.word_count} Woerter
</span>
)}
{(result.border_ghosts_removed ?? 0) > 0 && (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400 text-xs font-medium">
{result.border_ghosts_removed} Rahmenlinien entfernt
</span>
)}
<span className="text-gray-400 text-xs ml-auto">
{result.image_width}x{result.image_height}px | {result.duration_seconds}s
</span>
</div>
{/* Boxes detail */}
{result.boxes.length > 0 && (
<div>
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Erkannte Boxen</h4>
<div className="space-y-1.5">
{result.boxes.map((box, i) => (
<div key={i} className="flex items-center gap-3 text-xs">
<span
className="w-3 h-3 rounded-sm flex-shrink-0 border border-gray-300 dark:border-gray-600"
style={{ backgroundColor: box.bg_color_hex || '#6b7280' }}
/>
<span className="text-gray-600 dark:text-gray-400">
Box {i + 1}:
</span>
<span className="font-mono text-gray-500">
{box.w}x{box.h}px @ ({box.x}, {box.y})
</span>
{box.bg_color_name && box.bg_color_name !== 'unknown' && box.bg_color_name !== 'white' && (
<span className="px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-500">
{box.bg_color_name}
</span>
)}
{box.border_thickness > 0 && (
<span className="text-gray-400">
Rahmen: {box.border_thickness}px
</span>
)}
<span className="text-gray-400">
{Math.round(box.confidence * 100)}%
</span>
</div>
))}
</div>
</div>
)}
{/* Zones detail */}
<div>
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Seitenzonen</h4>
<div className="flex flex-wrap gap-2">
{result.zones.map((zone) => (
<span
key={zone.index}
className={`inline-flex items-center gap-1 px-2 py-1 rounded text-[11px] font-medium ${
zone.zone_type === 'box'
? 'bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-300 border border-amber-200 dark:border-amber-800'
: 'bg-gray-50 dark:bg-gray-800 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-700'
}`}
>
{zone.zone_type === 'box' ? 'Box' : 'Inhalt'} {zone.index}
<span className="text-[10px] font-normal opacity-70">
({zone.w}x{zone.h})
</span>
</span>
))}
</div>
</div>
{/* Graphics / visual elements */}
{result.graphics && result.graphics.length > 0 && (
<div>
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
Graphische Elemente ({result.graphics.length})
</h4>
{/* Summary by shape */}
{(() => {
const shapeCounts: Record<string, number> = {}
for (const g of result.graphics) {
shapeCounts[g.shape] = (shapeCounts[g.shape] || 0) + 1
}
return (
<div className="flex flex-wrap gap-2 mb-2">
{Object.entries(shapeCounts)
.sort(([, a], [, b]) => b - a)
.map(([shape, count]) => (
<span
key={shape}
className="inline-flex items-center gap-1 px-2 py-1 rounded text-[11px] bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-300 border border-purple-200 dark:border-purple-800"
>
{shape === 'arrow' ? '→' : shape === 'circle' ? '●' : shape === 'line' ? '─' : shape === 'exclamation' ? '❗' : shape === 'dot' ? '•' : shape === 'illustration' ? '🖼' : '◆'}
{' '}{shape} <span className="font-semibold">×{count}</span>
</span>
))}
</div>
)
})()}
{/* Individual graphics list */}
<div className="space-y-1.5 max-h-40 overflow-y-auto">
{result.graphics.map((g, i) => (
<div key={i} className="flex items-center gap-3 text-xs">
<span
className="w-3 h-3 rounded-full flex-shrink-0 border border-gray-300 dark:border-gray-600"
style={{ backgroundColor: g.color_hex || '#6b7280' }}
/>
<span className="text-gray-600 dark:text-gray-400 font-medium min-w-[60px]">
{g.shape}
</span>
<span className="font-mono text-gray-500">
{g.w}x{g.h}px @ ({g.x}, {g.y})
</span>
<span className="text-gray-400">
{g.color_name}
</span>
<span className="text-gray-400">
{Math.round(g.confidence * 100)}%
</span>
</div>
))}
</div>
</div>
)}
{/* Color regions */}
{Object.keys(result.color_pixel_counts).length > 0 && (
<div>
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Erkannte Farben</h4>
<div className="flex flex-wrap gap-2">
{Object.entries(result.color_pixel_counts)
.sort(([, a], [, b]) => b - a)
.map(([name, count]) => (
<span key={name} className="inline-flex items-center gap-1.5 px-2 py-1 rounded text-[11px] bg-gray-50 dark:bg-gray-800 border border-gray-200 dark:border-gray-700">
<span
className="w-2.5 h-2.5 rounded-full"
style={{ backgroundColor: COLOR_HEX[name] || '#6b7280' }}
/>
<span className="text-gray-600 dark:text-gray-400">{name}</span>
<span className="text-gray-400 text-[10px]">{count.toLocaleString()}px</span>
</span>
))}
</div>
</div>
)}
</div>
)}
{/* Action buttons */}
{result && (
<div className="flex justify-between">
<button
onClick={handleRerun}
disabled={detecting}
className="px-4 py-2 text-sm text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 transition-colors disabled:opacity-50"
>
Erneut erkennen
</button>
<button
onClick={onNext}
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
>
Weiter &rarr;
</button>
</div>
)}
{error && (
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
{error}
</div>
)}
</div>
)
}

View File

@@ -1,936 +0,0 @@
'use client'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { GridResult, GridCell, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
/** Render text with \n as line breaks */
function MultilineText({ text }: { text: string }) {
if (!text) return <span className="text-gray-300 dark:text-gray-600">&mdash;</span>
const lines = text.split('\n')
if (lines.length === 1) return <>{text}</>
return <>{lines.map((line, i) => (
<span key={i}>{line}{i < lines.length - 1 && <br />}</span>
))}</>
}
/** Column type → human-readable header */
function colTypeLabel(colType: string): string {
const labels: Record<string, string> = {
column_en: 'English',
column_de: 'Deutsch',
column_example: 'Example',
column_text: 'Text',
column_marker: 'Marker',
page_ref: 'Seite',
}
return labels[colType] || colType.replace('column_', '')
}
/** Column type → color class */
function colTypeColor(colType: string): string {
const colors: Record<string, string> = {
column_en: 'text-blue-600 dark:text-blue-400',
column_de: 'text-green-600 dark:text-green-400',
column_example: 'text-orange-600 dark:text-orange-400',
column_text: 'text-purple-600 dark:text-purple-400',
column_marker: 'text-gray-500 dark:text-gray-400',
}
return colors[colType] || 'text-gray-600 dark:text-gray-400'
}
interface StepWordRecognitionProps {
sessionId: string | null
onNext: () => void
goToStep: (step: number) => void
/** Skip _heal_row_gaps in cell grid (better overlay positioning) */
skipHealGaps?: boolean
}
export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps = false }: StepWordRecognitionProps) {
const [gridResult, setGridResult] = useState<GridResult | null>(null)
const [detecting, setDetecting] = useState(false)
const [error, setError] = useState<string | null>(null)
const [gtNotes, setGtNotes] = useState('')
const [gtSaved, setGtSaved] = useState(false)
// Step-through labeling state
const [activeIndex, setActiveIndex] = useState(0)
const [editedEntries, setEditedEntries] = useState<WordEntry[]>([])
const [editedCells, setEditedCells] = useState<GridCell[]>([])
const [mode, setMode] = useState<'overview' | 'labeling'>('overview')
const [ocrEngine, setOcrEngine] = useState<'auto' | 'tesseract' | 'rapid' | 'paddle'>('auto')
const [usedEngine, setUsedEngine] = useState<string>('')
const [pronunciation, setPronunciation] = useState<'british' | 'american'>('british')
const [gridMethod, setGridMethod] = useState<'v2' | 'words_first'>('v2')
// Streaming progress state
const [streamProgress, setStreamProgress] = useState<{ current: number; total: number } | null>(null)
const enRef = useRef<HTMLInputElement>(null)
const tableEndRef = useRef<HTMLDivElement>(null)
const isVocab = gridResult?.layout === 'vocab'
useEffect(() => {
if (!sessionId) return
// Always run fresh detection — word-lookup is fast (~0.03s)
// and avoids stale cached results from previous pipeline versions.
runAutoDetection()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId])
const applyGridResult = (data: GridResult) => {
setGridResult(data)
setUsedEngine(data.ocr_engine || '')
if (data.layout === 'vocab' && data.entries) {
initEntries(data.entries)
}
if (data.cells) {
setEditedCells(data.cells.map(c => ({ ...c, status: c.status || 'pending' })))
}
}
const initEntries = (entries: WordEntry[]) => {
setEditedEntries(entries.map(e => ({ ...e, status: e.status || 'pending' })))
setActiveIndex(0)
}
const runAutoDetection = useCallback(async (engine?: string) => {
if (!sessionId) return
const eng = engine || ocrEngine
setDetecting(true)
setError(null)
setStreamProgress(null)
setEditedCells([])
setEditedEntries([])
setGridResult(null)
try {
// PP-OCRv5 forces words_first on the backend, so align frontend accordingly
const effectiveGridMethod = eng === 'paddle' ? 'words_first' : gridMethod
const useStream = effectiveGridMethod === 'v2'
// Retry once if initial request fails (e.g. after container restart,
// session cache may not be warm yet when navigating via wizard)
let res: Response | null = null
for (let attempt = 0; attempt < 2; attempt++) {
res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?stream=${useStream ? 'true' : 'false'}&engine=${eng}&pronunciation=${pronunciation}${skipHealGaps ? '&skip_heal_gaps=true' : ''}&grid_method=${effectiveGridMethod}`,
{ method: 'POST' },
)
if (res.ok) break
if (attempt === 0 && (res.status === 400 || res.status === 404)) {
// Wait briefly for cache to warm up, then retry
await new Promise(r => setTimeout(r, 2000))
continue
}
break
}
if (!res || !res.ok) {
const err = await res?.json().catch(() => ({ detail: res?.statusText })) || { detail: 'Worterkennung fehlgeschlagen' }
throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
}
// words_first / pp-ocrv5 returns plain JSON (no streaming)
if (!useStream) {
const data = await res.json() as GridResult
applyGridResult(data)
return
}
const reader = res.body!.getReader()
const decoder = new TextDecoder()
let buffer = ''
let streamLayout: string | null = null
let streamColumnsUsed: GridResult['columns_used'] = []
let streamGridShape: GridResult['grid_shape'] | null = null
let streamCells: GridCell[] = []
while (true) {
const { done, value } = await reader.read()
if (done) break
buffer += decoder.decode(value, { stream: true })
// Parse SSE events (separated by \n\n)
while (buffer.includes('\n\n')) {
const idx = buffer.indexOf('\n\n')
const chunk = buffer.slice(0, idx).trim()
buffer = buffer.slice(idx + 2)
if (!chunk.startsWith('data: ')) continue
const dataStr = chunk.slice(6) // strip "data: "
let event: any
try {
event = JSON.parse(dataStr)
} catch {
continue
}
if (event.type === 'meta') {
streamLayout = event.layout || 'generic'
streamGridShape = event.grid_shape || null
// Show partial grid result so UI renders structure
setGridResult(prev => ({
...prev,
layout: event.layout || 'generic',
grid_shape: event.grid_shape,
columns_used: [],
cells: [],
summary: { total_cells: event.grid_shape?.total_cells || 0, non_empty_cells: 0, low_confidence: 0 },
duration_seconds: 0,
ocr_engine: '',
} as GridResult))
}
if (event.type === 'columns') {
streamColumnsUsed = event.columns_used || []
setGridResult(prev => prev ? { ...prev, columns_used: streamColumnsUsed } : prev)
}
if (event.type === 'cell') {
const cell: GridCell = { ...event.cell, status: 'pending' }
streamCells = [...streamCells, cell]
setEditedCells(streamCells)
setStreamProgress(event.progress)
// Auto-scroll table to bottom
setTimeout(() => tableEndRef.current?.scrollIntoView({ behavior: 'smooth', block: 'nearest' }), 16)
}
if (event.type === 'complete') {
// Build final GridResult
const finalResult: GridResult = {
cells: streamCells,
grid_shape: streamGridShape || { rows: 0, cols: 0, total_cells: streamCells.length },
columns_used: streamColumnsUsed,
layout: streamLayout || 'generic',
image_width: 0,
image_height: 0,
duration_seconds: event.duration_seconds || 0,
ocr_engine: event.ocr_engine || '',
summary: event.summary || {},
}
// If vocab: apply post-processed entries from complete event
if (event.vocab_entries) {
finalResult.entries = event.vocab_entries
finalResult.vocab_entries = event.vocab_entries
finalResult.entry_count = event.vocab_entries.length
}
applyGridResult(finalResult)
setUsedEngine(event.ocr_engine || '')
setStreamProgress(null)
}
}
}
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
} finally {
setDetecting(false)
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId, ocrEngine, pronunciation, gridMethod])
const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
if (!sessionId) return
const gt: WordGroundTruth = {
is_correct: isCorrect,
corrected_entries: isCorrect ? undefined : (isVocab ? editedEntries : undefined),
notes: gtNotes || undefined,
}
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/words`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(gt),
})
setGtSaved(true)
} catch (e) {
console.error('Ground truth save failed:', e)
}
}, [sessionId, gtNotes, editedEntries, isVocab])
// Vocab mode: update entry field
const updateEntry = (index: number, field: 'english' | 'german' | 'example', value: string) => {
setEditedEntries(prev => prev.map((e, i) =>
i === index ? { ...e, [field]: value, status: 'edited' as const } : e
))
}
// Generic mode: update cell text
const updateCell = (cellId: string, value: string) => {
setEditedCells(prev => prev.map(c =>
c.cell_id === cellId ? { ...c, text: value, status: 'edited' as const } : c
))
}
// Step-through: confirm current row (always cell-based)
const confirmEntry = () => {
const rowCells = getRowCells(activeIndex)
const cellIds = new Set(rowCells.map(c => c.cell_id))
setEditedCells(prev => prev.map(c =>
cellIds.has(c.cell_id) ? { ...c, status: c.status === 'edited' ? 'edited' : 'confirmed' } : c
))
const maxIdx = getUniqueRowCount() - 1
if (activeIndex < maxIdx) {
setActiveIndex(activeIndex + 1)
}
}
// Step-through: skip current row
const skipEntry = () => {
const rowCells = getRowCells(activeIndex)
const cellIds = new Set(rowCells.map(c => c.cell_id))
setEditedCells(prev => prev.map(c =>
cellIds.has(c.cell_id) ? { ...c, status: 'skipped' as const } : c
))
const maxIdx = getUniqueRowCount() - 1
if (activeIndex < maxIdx) {
setActiveIndex(activeIndex + 1)
}
}
// Helper: get unique row indices from cells
const getUniqueRowCount = () => {
if (!editedCells.length) return 0
return new Set(editedCells.map(c => c.row_index)).size
}
// Helper: get cells for a given row index (by position in sorted unique rows)
const getRowCells = (rowPosition: number) => {
const uniqueRows = [...new Set(editedCells.map(c => c.row_index))].sort((a, b) => a - b)
const rowIdx = uniqueRows[rowPosition]
return editedCells.filter(c => c.row_index === rowIdx)
}
// Focus english input when active entry changes in labeling mode
useEffect(() => {
if (mode === 'labeling' && enRef.current) {
enRef.current.focus()
}
}, [activeIndex, mode])
// Keyboard shortcuts in labeling mode
useEffect(() => {
if (mode !== 'labeling') return
const handler = (e: KeyboardEvent) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault()
confirmEntry()
} else if (e.key === 'ArrowDown' && e.ctrlKey) {
e.preventDefault()
skipEntry()
} else if (e.key === 'ArrowUp' && e.ctrlKey) {
e.preventDefault()
if (activeIndex > 0) setActiveIndex(activeIndex - 1)
}
}
window.addEventListener('keydown', handler)
return () => window.removeEventListener('keydown', handler)
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [mode, activeIndex, editedEntries, editedCells])
if (!sessionId) {
return (
<div className="flex flex-col items-center justify-center py-16 text-center">
<div className="text-5xl mb-4">🔤</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
Schritt 5: Worterkennung
</h3>
<p className="text-gray-500 dark:text-gray-400 max-w-md">
Bitte zuerst Schritte 1-4 abschliessen.
</p>
</div>
)
}
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/words-overlay`
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
const confColor = (conf: number) => {
if (conf >= 70) return 'text-green-600 dark:text-green-400'
if (conf >= 50) return 'text-yellow-600 dark:text-yellow-400'
return 'text-red-600 dark:text-red-400'
}
const statusBadge = (status?: string) => {
const map: Record<string, string> = {
pending: 'bg-gray-100 dark:bg-gray-700 text-gray-500',
confirmed: 'bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-400',
edited: 'bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-400',
skipped: 'bg-orange-100 dark:bg-orange-900/30 text-orange-700 dark:text-orange-400',
}
return map[status || 'pending'] || map.pending
}
const summary = gridResult?.summary
const columnsUsed = gridResult?.columns_used || []
const gridShape = gridResult?.grid_shape
// Counts for labeling progress (always cell-based)
const confirmedRowIds = new Set(
editedCells.filter(c => c.status === 'confirmed' || c.status === 'edited').map(c => c.row_index)
)
const confirmedCount = confirmedRowIds.size
const totalCount = getUniqueRowCount()
// Group cells by row for generic table display
const cellsByRow: Map<number, GridCell[]> = new Map()
for (const cell of editedCells) {
const existing = cellsByRow.get(cell.row_index) || []
existing.push(cell)
cellsByRow.set(cell.row_index, existing)
}
const sortedRowIndices = [...cellsByRow.keys()].sort((a, b) => a - b)
return (
<div className="space-y-4">
{/* Loading with streaming progress */}
{detecting && (
<div className="space-y-1">
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
{streamProgress
? `Zelle ${streamProgress.current}/${streamProgress.total} erkannt...`
: 'Worterkennung startet...'}
</div>
{streamProgress && streamProgress.total > 0 && (
<div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-1.5">
<div
className="bg-teal-500 h-1.5 rounded-full transition-all duration-150"
style={{ width: `${(streamProgress.current / streamProgress.total) * 100}%` }}
/>
</div>
)}
</div>
)}
{/* Layout badge + Mode toggle */}
{gridResult && (
<div className="flex items-center gap-2">
{/* Layout badge */}
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
isVocab
? 'bg-indigo-100 dark:bg-indigo-900/30 text-indigo-700 dark:text-indigo-300'
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}>
{isVocab ? 'Vokabel-Layout' : 'Generisch'}
</span>
{gridShape && (
<span className="text-[10px] text-gray-400">
{gridShape.rows}×{gridShape.cols} = {gridShape.total_cells} Zellen
</span>
)}
<div className="flex-1" />
<button
onClick={() => setMode('overview')}
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
mode === 'overview'
? 'bg-teal-600 text-white'
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
}`}
>
Uebersicht
</button>
<button
onClick={() => setMode('labeling')}
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
mode === 'labeling'
? 'bg-teal-600 text-white'
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
}`}
>
Labeling ({confirmedCount}/{totalCount})
</button>
</div>
)}
{/* Overview mode */}
{mode === 'overview' && (
<>
{/* Images: overlay vs clean */}
<div className="grid grid-cols-2 gap-4">
<div>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Mit Grid-Overlay
</div>
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
{gridResult ? (
// eslint-disable-next-line @next/next/no-img-element
<img
src={`${overlayUrl}?t=${Date.now()}`}
alt="Wort-Overlay"
className="w-full h-auto"
/>
) : (
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
{detecting ? 'Erkenne Woerter...' : 'Keine Daten'}
</div>
)}
</div>
</div>
<div>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Entzerrtes Bild
</div>
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={dewarpedUrl}
alt="Entzerrt"
className="w-full h-auto"
/>
</div>
</div>
</div>
{/* Result summary (only after streaming completes) */}
{gridResult && summary && !detecting && (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
<div className="flex items-center justify-between">
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Ergebnis: {summary.non_empty_cells}/{summary.total_cells} Zellen mit Text
({sortedRowIndices.length} Zeilen, {columnsUsed.length} Spalten)
</h4>
<span className="text-xs text-gray-400">
{gridResult.duration_seconds}s
</span>
</div>
{/* Summary badges */}
<div className="flex gap-2 flex-wrap">
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
Zellen: {summary.non_empty_cells}/{summary.total_cells}
</span>
{columnsUsed.map((col, i) => (
<span key={i} className={`px-2 py-0.5 rounded text-xs font-medium bg-gray-100 dark:bg-gray-700 ${colTypeColor(col.type)}`}>
C{col.index}: {colTypeLabel(col.type)}
</span>
))}
{summary.low_confidence > 0 && (
<span className="px-2 py-0.5 rounded text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
Unsicher: {summary.low_confidence}
</span>
)}
</div>
{/* Entry/Cell table */}
<div className="max-h-80 overflow-y-auto">
{/* Unified dynamic table — columns driven by columns_used */}
<table className="w-full text-xs">
<thead className="sticky top-0 bg-white dark:bg-gray-800">
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
<th className="py-1 pr-2 w-12">Zeile</th>
{columnsUsed.map((col, i) => (
<th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
{colTypeLabel(col.type)}
</th>
))}
<th className="py-1 w-12 text-right">Conf</th>
</tr>
</thead>
<tbody>
{sortedRowIndices.map((rowIdx, posIdx) => {
const rowCells = cellsByRow.get(rowIdx) || []
const avgConf = rowCells.length
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
: 0
return (
<tr
key={rowIdx}
className={`border-b dark:border-gray-700/50 ${
posIdx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
}`}
onClick={() => { setActiveIndex(posIdx); setMode('labeling') }}
>
<td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
R{String(rowIdx).padStart(2, '0')}
</td>
{columnsUsed.map((col) => {
const cell = rowCells.find(c => c.col_index === col.index)
return (
<td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
<MultilineText text={cell?.text || ''} />
</td>
)
})}
<td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
{avgConf}%
</td>
</tr>
)
})}
</tbody>
</table>
<div ref={tableEndRef} />
</div>
</div>
)}
{/* Streaming cell table (shown while detecting, before complete) */}
{detecting && editedCells.length > 0 && !gridResult?.summary?.non_empty_cells && (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Live: {editedCells.length} Zellen erkannt...
</h4>
<div className="max-h-80 overflow-y-auto">
<table className="w-full text-xs">
<thead className="sticky top-0 bg-white dark:bg-gray-800">
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
<th className="py-1 pr-2 w-12">Zelle</th>
{columnsUsed.map((col, i) => (
<th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
{colTypeLabel(col.type)}
</th>
))}
<th className="py-1 w-12 text-right">Conf</th>
</tr>
</thead>
<tbody>
{(() => {
const liveByRow: Map<number, GridCell[]> = new Map()
for (const cell of editedCells) {
const existing = liveByRow.get(cell.row_index) || []
existing.push(cell)
liveByRow.set(cell.row_index, existing)
}
const liveSorted = [...liveByRow.keys()].sort((a, b) => a - b)
return liveSorted.map(rowIdx => {
const rowCells = liveByRow.get(rowIdx) || []
const avgConf = rowCells.length
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
: 0
return (
<tr key={rowIdx} className="border-b dark:border-gray-700/50 animate-fade-in">
<td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
R{String(rowIdx).padStart(2, '0')}
</td>
{columnsUsed.map((col) => {
const cell = rowCells.find(c => c.col_index === col.index)
return (
<td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300">
<MultilineText text={cell?.text || ''} />
</td>
)
})}
<td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
{avgConf}%
</td>
</tr>
)
})
})()}
</tbody>
</table>
<div ref={tableEndRef} />
</div>
</div>
)}
</>
)}
{/* Labeling mode */}
{mode === 'labeling' && editedCells.length > 0 && (
<div className="grid grid-cols-3 gap-4">
{/* Left 2/3: Image with highlighted active row */}
<div className="col-span-2">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Zeile {activeIndex + 1} von {getUniqueRowCount()}
</div>
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative">
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={`${overlayUrl}?t=${Date.now()}`}
alt="Wort-Overlay"
className="w-full h-auto"
/>
{/* Highlight overlay for active row */}
{(() => {
const rowCells = getRowCells(activeIndex)
return rowCells.map(cell => (
<div
key={cell.cell_id}
className="absolute border-2 border-yellow-400 bg-yellow-400/10 pointer-events-none"
style={{
left: `${cell.bbox_pct.x}%`,
top: `${cell.bbox_pct.y}%`,
width: `${cell.bbox_pct.w}%`,
height: `${cell.bbox_pct.h}%`,
}}
/>
))
})()}
</div>
</div>
{/* Right 1/3: Editable fields */}
<div className="space-y-3">
{/* Navigation */}
<div className="flex items-center justify-between">
<button
onClick={() => setActiveIndex(Math.max(0, activeIndex - 1))}
disabled={activeIndex === 0}
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
>
Zurueck
</button>
<span className="text-xs text-gray-500">
{activeIndex + 1} / {getUniqueRowCount()}
</span>
<button
onClick={() => setActiveIndex(Math.min(
getUniqueRowCount() - 1,
activeIndex + 1
))}
disabled={activeIndex >= getUniqueRowCount() - 1}
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
>
Weiter
</button>
</div>
{/* Status badge */}
<div className="flex items-center gap-2">
{(() => {
const rowCells = getRowCells(activeIndex)
const avgConf = rowCells.length
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
: 0
return (
<span className={`text-xs font-mono ${confColor(avgConf)}`}>
{avgConf}% Konfidenz
</span>
)
})()}
</div>
{/* Editable fields — one per column, driven by columns_used */}
<div className="space-y-2">
{(() => {
const rowCells = getRowCells(activeIndex)
return columnsUsed.map((col, colIdx) => {
const cell = rowCells.find(c => c.col_index === col.index)
if (!cell) return null
return (
<div key={col.index}>
<div className="flex items-center gap-1 mb-0.5">
<label className={`text-[10px] font-medium ${colTypeColor(col.type)}`}>
{colTypeLabel(col.type)}
</label>
<span className="text-[9px] text-gray-400">{cell.cell_id}</span>
</div>
{/* Cell crop */}
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative mb-1">
<CellCrop imageUrl={dewarpedUrl} bbox={cell.bbox_pct} />
</div>
<textarea
ref={colIdx === 0 ? enRef as any : undefined}
rows={Math.max(1, (cell.text || '').split('\n').length)}
value={cell.text || ''}
onChange={(e) => updateCell(cell.cell_id, e.target.value)}
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
/>
</div>
)
})
})()}
</div>
{/* Action buttons */}
<div className="flex gap-2">
<button
onClick={confirmEntry}
className="flex-1 px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700 font-medium"
>
Bestaetigen (Enter)
</button>
<button
onClick={skipEntry}
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600"
>
Skip
</button>
</div>
{/* Shortcuts hint */}
<div className="text-[10px] text-gray-400 space-y-0.5">
<div>Enter = Bestaetigen & weiter</div>
<div>Ctrl+Down = Ueberspringen</div>
<div>Ctrl+Up = Zurueck</div>
</div>
{/* Row list (compact) */}
<div className="border-t dark:border-gray-700 pt-2 mt-2">
<div className="text-[10px] font-medium text-gray-500 dark:text-gray-400 mb-1">
Alle Zeilen
</div>
<div className="max-h-48 overflow-y-auto space-y-0.5">
{sortedRowIndices.map((rowIdx, posIdx) => {
const rowCells = cellsByRow.get(rowIdx) || []
const textParts = rowCells.filter(c => c.text).map(c => c.text.replace(/\n/g, ' '))
return (
<div
key={rowIdx}
onClick={() => setActiveIndex(posIdx)}
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
posIdx === activeIndex
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
}`}
>
<span className="w-6 text-right text-gray-400 font-mono">R{String(rowIdx).padStart(2, '0')}</span>
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
{textParts.join(' \u2192 ') || '\u2014'}
</span>
</div>
)
})}
</div>
</div>
</div>
</div>
)}
{/* Controls */}
{gridResult && (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
<div className="flex items-center gap-3 flex-wrap">
{/* Grid method selector */}
<select
value={gridMethod}
onChange={(e) => setGridMethod(e.target.value as 'v2' | 'words_first')}
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
>
<option value="v2">Standard (v2)</option>
<option value="words_first">Words-First</option>
</select>
{/* OCR Engine selector */}
<select
value={ocrEngine}
onChange={(e) => setOcrEngine(e.target.value as 'auto' | 'tesseract' | 'rapid' | 'paddle')}
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
>
<option value="auto">Auto (RapidOCR wenn verfuegbar)</option>
<option value="rapid">RapidOCR (ONNX)</option>
<option value="tesseract">Tesseract</option>
<option value="paddle">PP-OCRv5 (lokal)</option>
</select>
{/* Pronunciation selector (only for vocab) */}
{isVocab && (
<select
value={pronunciation}
onChange={(e) => setPronunciation(e.target.value as 'british' | 'american')}
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
>
<option value="british">Britisch (RP)</option>
<option value="american">Amerikanisch</option>
</select>
)}
<button
onClick={() => runAutoDetection()}
disabled={detecting}
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-50"
>
Erneut erkennen
</button>
{/* Show which engine was used */}
{usedEngine && (
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
usedEngine === 'rapid' || usedEngine === 'paddle'
? 'bg-purple-100 dark:bg-purple-900/30 text-purple-700 dark:text-purple-300'
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}>
{usedEngine === 'paddle' ? 'pp-ocrv5' : usedEngine}
</span>
)}
<button
onClick={() => goToStep(3)}
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 text-orange-600 dark:text-orange-400 border-orange-300 dark:border-orange-700"
>
Zeilen korrigieren (Step 4)
</button>
<div className="flex-1" />
{/* Ground truth */}
{!gtSaved ? (
<>
<input
type="text"
placeholder="Notizen (optional)"
value={gtNotes}
onChange={(e) => setGtNotes(e.target.value)}
className="px-2 py-1 text-xs border rounded dark:bg-gray-700 dark:border-gray-600 w-48"
/>
<button
onClick={() => handleGroundTruth(true)}
className="px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700"
>
Korrekt
</button>
<button
onClick={() => handleGroundTruth(false)}
className="px-3 py-1.5 text-xs bg-red-600 text-white rounded-lg hover:bg-red-700"
>
Fehlerhaft
</button>
</>
) : (
<span className="text-xs text-green-600 dark:text-green-400">
Ground Truth gespeichert
</span>
)}
<button
onClick={onNext}
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium"
>
Weiter
</button>
</div>
</div>
)}
{error && (
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
{error}
</div>
)}
</div>
)
}
/**
* CellCrop: Shows a cropped portion of the dewarped image based on percent bbox.
* Uses CSS background-image + background-position for efficient cropping.
*/
function CellCrop({ imageUrl, bbox }: { imageUrl: string; bbox: { x: number; y: number; w: number; h: number } }) {
// Scale factor: how much to zoom into the cell
const scaleX = 100 / bbox.w
const scaleY = 100 / bbox.h
const scale = Math.min(scaleX, scaleY, 8) // Cap zoom at 8x
return (
<div
className="w-full h-full"
style={{
backgroundImage: `url(${imageUrl})`,
backgroundSize: `${scale * 100}%`,
backgroundPosition: `${-bbox.x * scale}% ${-bbox.y * scale}%`,
backgroundRepeat: 'no-repeat',
}}
/>
)
}

View File

@@ -1,176 +0,0 @@
/**
* Tests for useSlideWordPositions hook.
*
* The hook computes word positions from OCR word_boxes or pixel projection.
* Since Canvas/Image are not available in jsdom, we test the pure computation
* logic by extracting and verifying the WordPosition interface contract.
*/
import { describe, it, expect } from 'vitest'
// ---------------------------------------------------------------------------
// WordPosition interface (mirrored from useSlideWordPositions.ts)
// ---------------------------------------------------------------------------
interface WordPosition {
xPct: number
wPct: number
yPct: number
hPct: number
text: string
fontRatio: number
}
// ---------------------------------------------------------------------------
// Pure computation functions extracted from the hook for testing
// ---------------------------------------------------------------------------
/**
* Word-box path: compute WordPosition from an OCR word_box.
* Replicates the word_boxes.map() logic in useSlideWordPositions.
*/
function wordBoxToPosition(
box: { text: string; left: number; top: number; width: number; height: number },
imgW: number,
imgH: number,
): WordPosition {
return {
xPct: (box.left / imgW) * 100,
wPct: (box.width / imgW) * 100,
yPct: (box.top / imgH) * 100,
hPct: (box.height / imgH) * 100,
text: box.text,
fontRatio: 1.0,
}
}
/**
* Fallback path (no word_boxes): spread tokens evenly across cell bbox.
* Replicates the fallback logic in useSlideWordPositions.
*/
function fallbackPositions(
tokens: string[],
bboxPct: { x: number; y: number; w: number; h: number },
): WordPosition[] {
const fallbackW = bboxPct.w / tokens.length
return tokens.map((t, i) => ({
xPct: bboxPct.x + i * fallbackW,
wPct: fallbackW,
yPct: bboxPct.y,
hPct: bboxPct.h,
text: t,
fontRatio: 1.0,
}))
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
describe('wordBoxToPosition (word-box path)', () => {
it('should compute percentage positions from pixel coordinates', () => {
const box = { text: 'hello', left: 100, top: 200, width: 80, height: 20 }
const wp = wordBoxToPosition(box, 1000, 2000)
expect(wp.xPct).toBeCloseTo(10, 1) // 100/1000 * 100
expect(wp.wPct).toBeCloseTo(8, 1) // 80/1000 * 100
expect(wp.yPct).toBeCloseTo(10, 1) // 200/2000 * 100
expect(wp.hPct).toBeCloseTo(1, 1) // 20/2000 * 100
expect(wp.text).toBe('hello')
expect(wp.fontRatio).toBe(1.0)
})
it('should produce different yPct for words on different lines', () => {
const imgW = 1000, imgH = 2000
const word1 = wordBoxToPosition({ text: 'line1', left: 50, top: 100, width: 60, height: 20 }, imgW, imgH)
const word2 = wordBoxToPosition({ text: 'line2', left: 50, top: 130, width: 60, height: 20 }, imgW, imgH)
expect(word1.yPct).not.toEqual(word2.yPct)
expect(word2.yPct).toBeGreaterThan(word1.yPct)
})
it('should handle word at origin', () => {
const wp = wordBoxToPosition({ text: 'a', left: 0, top: 0, width: 50, height: 25 }, 500, 500)
expect(wp.xPct).toBe(0)
expect(wp.yPct).toBe(0)
expect(wp.wPct).toBeCloseTo(10, 1)
expect(wp.hPct).toBeCloseTo(5, 1)
})
it('should handle word at bottom-right corner', () => {
const wp = wordBoxToPosition({ text: 'z', left: 900, top: 1900, width: 100, height: 100 }, 1000, 2000)
expect(wp.xPct).toBe(90)
expect(wp.yPct).toBe(95)
expect(wp.wPct).toBe(10)
expect(wp.hPct).toBe(5)
})
})
describe('fallbackPositions (no word_boxes)', () => {
it('should spread tokens evenly across cell width', () => {
const bbox = { x: 10, y: 20, w: 60, h: 5 }
const positions = fallbackPositions(['apple', 'Apfel'], bbox)
expect(positions.length).toBe(2)
expect(positions[0].xPct).toBeCloseTo(10, 1)
expect(positions[1].xPct).toBeCloseTo(40, 1) // 10 + 30
expect(positions[0].wPct).toBeCloseTo(30, 1)
expect(positions[1].wPct).toBeCloseTo(30, 1)
})
it('should use cell bbox for Y position (all words same Y)', () => {
const bbox = { x: 5, y: 30, w: 80, h: 4 }
const positions = fallbackPositions(['a', 'b', 'c'], bbox)
for (const wp of positions) {
expect(wp.yPct).toBe(30)
expect(wp.hPct).toBe(4)
}
})
it('should handle single token', () => {
const bbox = { x: 15, y: 25, w: 50, h: 6 }
const positions = fallbackPositions(['word'], bbox)
expect(positions.length).toBe(1)
expect(positions[0].xPct).toBe(15)
expect(positions[0].wPct).toBe(50)
expect(positions[0].yPct).toBe(25)
expect(positions[0].hPct).toBe(6)
})
})
describe('WordPosition yPct/hPct contract', () => {
it('word-box path: yPct comes from box.top, not cell bbox', () => {
// This is the key fix: multi-line cells should NOT stack words at cell center
const cellBbox = { x: 10, y: 20, w: 60, h: 10 } // cell spans y=20% to y=30%
const imgW = 1000, imgH = 1000
// Two words on different lines within the same cell
const word1 = wordBoxToPosition({ text: 'line1', left: 100, top: 200, width: 80, height: 20 }, imgW, imgH)
const word2 = wordBoxToPosition({ text: 'line2', left: 100, top: 260, width: 80, height: 20 }, imgW, imgH)
// word1 should be at y=20%, word2 at y=26% — NOT both at cellBbox.y (20%)
expect(word1.yPct).toBeCloseTo(20, 1)
expect(word2.yPct).toBeCloseTo(26, 1)
expect(word1.yPct).not.toEqual(word2.yPct)
// Both should have individual heights from their box, not cell height
expect(word1.hPct).toBeCloseTo(2, 1)
expect(word2.hPct).toBeCloseTo(2, 1)
// Cell height would be 10% — word height is 2%, confirming per-word sizing
expect(word1.hPct).toBeLessThan(cellBbox.h)
})
it('fallback path: yPct equals cell bbox.y (no per-word data)', () => {
const bbox = { x: 10, y: 45, w: 30, h: 8 }
const positions = fallbackPositions(['a', 'b'], bbox)
// Without word_boxes, all words use cell bbox Y — expected behavior
expect(positions[0].yPct).toBe(bbox.y)
expect(positions[1].yPct).toBe(bbox.y)
expect(positions[0].hPct).toBe(bbox.h)
expect(positions[1].hPct).toBe(bbox.h)
})
})

View File

@@ -1,198 +0,0 @@
import { useEffect, useState } from 'react'
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
export interface WordPosition {
xPct: number
wPct: number
text: string
fontRatio: number
}
/**
* Shared hook: analyse dark-pixel clusters on an image to determine
* the exact horizontal position & auto-font-size of word groups in each cell.
*
* When rotation=180, the image is rotated 180° before pixel analysis.
* Cell coordinates are transformed to the rotated space for reading,
* and cluster positions are mirrored back to the original coordinate system.
*
* Returns a Map<cell_id, WordPosition[]>.
*/
export function usePixelWordPositions(
imageUrl: string,
cells: GridCell[],
active: boolean,
rotation: 0 | 180 = 0,
): Map<string, WordPosition[]> {
const [cellWordPositions, setCellWordPositions] = useState<Map<string, WordPosition[]>>(new Map())
useEffect(() => {
if (!active || cells.length === 0 || !imageUrl) return
const img = new Image()
img.crossOrigin = 'anonymous'
img.onload = () => {
const imgW = img.naturalWidth
const imgH = img.naturalHeight
const canvas = document.createElement('canvas')
canvas.width = imgW
canvas.height = imgH
const ctx = canvas.getContext('2d')
if (!ctx) return
if (rotation === 180) {
// Draw image rotated 180°
ctx.translate(imgW, imgH)
ctx.rotate(Math.PI)
ctx.drawImage(img, 0, 0)
ctx.setTransform(1, 0, 0, 1, 0, 0) // reset transform for measureText
} else {
ctx.drawImage(img, 0, 0)
}
const refFontSize = 40
const fontFam = "'Liberation Sans', Arial, sans-serif"
ctx.font = `${refFontSize}px ${fontFam}`
const positions = new Map<string, WordPosition[]>()
for (const cell of cells) {
if (!cell.bbox_pct || !cell.text) continue
// Split by 3+ whitespace into word-groups
const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
// Cell pixel region — when rotated 180°, transform coordinates
let cx: number, cy: number
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
if (rotation === 180) {
// In rotated image: (x,y) maps to (W-x-w, H-y-h)
cx = Math.round((100 - cell.bbox_pct.x - cell.bbox_pct.w) / 100 * imgW)
cy = Math.round((100 - cell.bbox_pct.y - cell.bbox_pct.h) / 100 * imgH)
} else {
cx = Math.round(cell.bbox_pct.x / 100 * imgW)
cy = Math.round(cell.bbox_pct.y / 100 * imgH)
}
if (cw <= 0 || ch <= 0) continue
// Clamp to image bounds
if (cx < 0) cx = 0
if (cy < 0) cy = 0
if (cx + cw > imgW || cy + ch > imgH) continue
const imageData = ctx.getImageData(cx, cy, cw, ch)
// Vertical projection: count dark pixels per column
const proj = new Float32Array(cw)
for (let y = 0; y < ch; y++) {
for (let x = 0; x < cw; x++) {
const idx = (y * cw + x) * 4
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
if (lum < 128) proj[x]++
}
}
// Find dark-pixel clusters (word groups on the image)
const threshold = Math.max(1, ch * 0.03)
const minGap = Math.max(5, Math.round(cw * 0.02))
let clusters: { start: number; end: number }[] = []
let inCluster = false
let clStart = 0
let gap = 0
for (let x = 0; x < cw; x++) {
if (proj[x] >= threshold) {
if (!inCluster) { clStart = x; inCluster = true }
gap = 0
} else if (inCluster) {
gap++
if (gap > minGap) {
clusters.push({ start: clStart, end: x - gap })
inCluster = false
gap = 0
}
}
}
if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
if (clusters.length === 0) continue
// When rotated 180°, mirror clusters back to original coordinate system
// A cluster at (start, end) in rotated space = (cw-1-end, cw-1-start) in original
if (rotation === 180) {
clusters = clusters.map(c => ({
start: cw - 1 - c.end,
end: cw - 1 - c.start,
})).reverse() // reverse to restore left-to-right order in original space
}
const wordPos: WordPosition[] = []
if (groups.length <= 1) {
// Single group: position at first cluster, merge all clusters for width
const firstCl = clusters[0]
const lastCl = clusters[clusters.length - 1]
const clusterW = lastCl.end - firstCl.start + 1
const measured = ctx.measureText(cell.text.trim())
const autoFontPx = refFontSize * (clusterW / measured.width)
const fontRatio = Math.min(autoFontPx / ch, 1.0)
wordPos.push({
xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w,
wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w,
text: cell.text.trim(),
fontRatio,
})
} else if (clusters.length >= groups.length) {
// Multiple groups: match to clusters left-to-right
for (let i = 0; i < groups.length; i++) {
const cl = clusters[i]
const clusterW = cl.end - cl.start + 1
const measured = ctx.measureText(groups[i])
const autoFontPx = refFontSize * (clusterW / measured.width)
const fontRatio = Math.min(autoFontPx / ch, 1.0)
wordPos.push({
xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
text: groups[i],
fontRatio,
})
}
} else {
continue // fewer clusters than groups — skip
}
positions.set(cell.cell_id, wordPos)
}
// Normalise: find the most common fontRatio (mode) and apply it to all
const allRatios: number[] = []
for (const wps of positions.values()) {
for (const wp of wps) allRatios.push(wp.fontRatio)
}
if (allRatios.length > 0) {
// Bucket ratios to 2 decimal places, find mode
const buckets = new Map<number, number>()
for (const r of allRatios) {
const key = Math.round(r * 50) / 50 // round to nearest 0.02
buckets.set(key, (buckets.get(key) || 0) + 1)
}
let modeRatio = allRatios[0]
let modeCount = 0
for (const [ratio, count] of buckets) {
if (count > modeCount) { modeRatio = ratio; modeCount = count }
}
// Apply mode to all word positions
for (const wps of positions.values()) {
for (const wp of wps) wp.fontRatio = modeRatio
}
}
setCellWordPositions(positions)
}
img.src = imageUrl
}, [active, cells, imageUrl, rotation])
return cellWordPositions
}

View File

@@ -234,6 +234,28 @@ export const MODULE_REGISTRY: BackendModule[] = [
},
priority: 'high'
},
{
id: 'llm-compare',
name: 'LLM Vergleich',
description: 'Vergleich verschiedener KI-Modelle und Provider',
category: 'ai',
backend: {
service: 'python-backend',
port: 8000,
basePath: '/api/llm',
endpoints: [
{ path: '/providers', method: 'GET', description: 'Verfuegbare Provider' },
{ path: '/compare', method: 'POST', description: 'Modelle vergleichen' },
{ path: '/benchmark', method: 'POST', description: 'Benchmark ausfuehren' },
]
},
frontend: {
adminV2Page: '/ai/llm-compare',
oldAdminPage: '/admin/llm-compare',
status: 'connected'
},
priority: 'medium'
},
{
id: 'magic-help',
name: 'Magic Help (TrOCR)',

View File

@@ -5,7 +5,7 @@
* All DSGVO and Compliance modules are now consolidated under the SDK.
*/
export type CategoryId = 'communication' | 'ai' | 'education' | 'website' | 'sdk-docs'
export type CategoryId = 'compliance-sdk' | 'ai' | 'education' | 'website' | 'sdk-docs'
export interface NavModule {
id: string
@@ -31,47 +31,23 @@ export interface NavCategory {
export const navigation: NavCategory[] = [
// =========================================================================
// Kommunikation — Video, Voice, Alerts
// Compliance SDK - Alle Datenschutz-, Compliance- und SDK-Module
// =========================================================================
{
id: 'communication',
name: 'Kommunikation',
icon: 'mail',
color: '#f59e0b', // Amber-500
colorClass: 'communication',
description: 'Video & Chat, Voice Service, E-Mail, Alerts',
id: 'compliance-sdk',
name: 'Compliance SDK',
icon: 'shield',
color: '#8b5cf6', // Violet-500
colorClass: 'compliance-sdk',
description: 'DSGVO, Audit, GRC & SDK-Werkzeuge',
modules: [
{
id: 'mail',
name: 'Unified Inbox',
href: '/communication/mail',
description: 'E-Mail-Konten & KI-Analyse',
purpose: 'E-Mail-Konten verwalten und KI-Kategorisierung nutzen. IMAP/SMTP Konfiguration, Vorlagen und Audit-Log.',
audience: ['Support', 'Admins'],
},
{
id: 'video-chat',
name: 'Video & Chat',
href: '/communication/video-chat',
description: 'Matrix & Jitsi Monitoring',
purpose: 'Dashboard fuer Matrix Synapse und Jitsi Meet. Service-Status, aktive Meetings, Traffic-Analyse und Ressourcen-Empfehlungen.',
audience: ['Admins', 'DevOps'],
},
{
id: 'voice-service',
name: 'Voice Service',
href: '/communication/matrix',
description: 'PersonaPlex-7B & TaskOrchestrator',
purpose: 'Voice-First Interface Konfiguration und Architektur-Dokumentation. Live Demo, Task States, Intents und DSGVO-Informationen.',
audience: ['Entwickler', 'Admins'],
},
{
id: 'alerts',
name: 'Alerts Monitoring',
href: '/communication/alerts',
description: 'Google Alerts & Feed-Ueberwachung',
purpose: 'Google Alerts und RSS-Feeds fuer relevante Neuigkeiten ueberwachen. Topics, Regeln, Relevanz-Profil und Digest-Generierung.',
audience: ['Marketing', 'Admins'],
id: 'catalog-manager',
name: 'Katalogverwaltung',
href: '/dashboard/catalog-manager',
description: 'SDK-Kataloge & Auswahltabellen',
purpose: 'Zentrale Verwaltung aller Dropdown- und Auswahltabellen im SDK. Systemkataloge (Risiken, Massnahmen, Vorlagen) anzeigen und benutzerdefinierte Eintraege ergaenzen, bearbeiten und loeschen.',
audience: ['DSB', 'Compliance Officer', 'Administratoren'],
},
],
},
@@ -132,6 +108,16 @@ export const navigation: NavCategory[] = [
// -----------------------------------------------------------------------
// KI-Werkzeuge: Standalone-Tools fuer Entwicklung & QA
// -----------------------------------------------------------------------
{
id: 'llm-compare',
name: 'LLM Vergleich',
href: '/ai/llm-compare',
description: 'KI-Provider Vergleich',
purpose: 'Vergleichen Sie verschiedene LLM-Anbieter (Ollama, OpenAI, Anthropic) hinsichtlich Qualitaet, Geschwindigkeit und Kosten. Standalone-Werkzeug fuer Modell-Evaluation.',
audience: ['Entwickler', 'Data Scientists'],
oldAdminPath: '/admin/llm-compare',
subgroup: 'KI-Werkzeuge',
},
{
id: 'ocr-compare',
name: 'OCR Vergleich',
@@ -141,24 +127,6 @@ export const navigation: NavCategory[] = [
audience: ['Entwickler', 'Data Scientists', 'Lehrer'],
subgroup: 'KI-Werkzeuge',
},
{
id: 'ocr-pipeline',
name: 'OCR Pipeline',
href: '/ai/ocr-pipeline',
description: 'Schrittweise Seitenrekonstruktion',
purpose: 'Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. 6-Schritt-Pipeline mit Ground Truth Validierung.',
audience: ['Entwickler', 'Data Scientists'],
subgroup: 'KI-Werkzeuge',
},
{
id: 'ocr-overlay',
name: 'OCR Overlay',
href: '/ai/ocr-overlay',
description: 'Ganzseitige Overlay-Rekonstruktion',
purpose: 'Arbeitsblatt ohne Spaltenerkennung direkt als Overlay rekonstruieren. Vereinfachte 7-Schritt-Pipeline.',
audience: ['Entwickler'],
subgroup: 'KI-Werkzeuge',
},
{
id: 'test-quality',
name: 'Test Quality (BQAS)',

View File

@@ -23,7 +23,7 @@ export const roles: Role[] = [
name: 'Entwickler',
description: 'Voller Zugriff auf alle Bereiche',
icon: 'code',
visibleCategories: ['communication', 'ai', 'education', 'website'],
visibleCategories: ['compliance-sdk', 'ai', 'education', 'website'],
color: 'bg-primary-100 border-primary-300 text-primary-700',
},
{
@@ -31,7 +31,7 @@ export const roles: Role[] = [
name: 'Manager',
description: 'Executive Uebersicht',
icon: 'chart',
visibleCategories: ['communication', 'website'],
visibleCategories: ['compliance-sdk', 'website'],
color: 'bg-blue-100 border-blue-300 text-blue-700',
},
{
@@ -39,7 +39,7 @@ export const roles: Role[] = [
name: 'Auditor',
description: 'Compliance Pruefung',
icon: 'clipboard',
visibleCategories: ['communication'],
visibleCategories: ['compliance-sdk'],
color: 'bg-amber-100 border-amber-300 text-amber-700',
},
{
@@ -47,7 +47,7 @@ export const roles: Role[] = [
name: 'DSB',
description: 'Datenschutzbeauftragter',
icon: 'shield',
visibleCategories: ['communication'],
visibleCategories: ['compliance-sdk'],
color: 'bg-purple-100 border-purple-300 text-purple-700',
},
]

View File

@@ -2,8 +2,6 @@
const nextConfig = {
output: 'standalone',
reactStrictMode: true,
// Force unique build ID to bust browser caches on each deploy
generateBuildId: () => `build-${Date.now()}`,
// TODO: Remove after fixing type incompatibilities from restore
typescript: {
ignoreBuildErrors: true,

View File

@@ -8,7 +8,6 @@
"name": "breakpilot-admin-v2",
"version": "1.0.0",
"dependencies": {
"bpmn-js": "^18.0.1",
"jspdf": "^4.1.0",
"jszip": "^3.10.1",
"lucide-react": "^0.468.0",
@@ -16,7 +15,6 @@
"react": "^18.3.1",
"react-dom": "^18.3.1",
"reactflow": "^11.11.4",
"recharts": "^2.15.0",
"uuid": "^13.0.0"
},
"devDependencies": {
@@ -430,16 +428,6 @@
"node": ">=6.9.0"
}
},
"node_modules/@bpmn-io/diagram-js-ui": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@bpmn-io/diagram-js-ui/-/diagram-js-ui-0.2.3.tgz",
"integrity": "sha512-OGyjZKvGK8tHSZ0l7RfeKhilGoOGtFDcoqSGYkX0uhFlo99OVZ9Jn1K7TJGzcE9BdKwvA5Y5kGqHEhdTxHvFfw==",
"license": "MIT",
"dependencies": {
"htm": "^3.1.1",
"preact": "^10.11.2"
}
},
"node_modules/@csstools/color-helpers": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-5.1.0.tgz",
@@ -3008,39 +2996,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/bpmn-js": {
"version": "18.12.0",
"resolved": "https://registry.npmjs.org/bpmn-js/-/bpmn-js-18.12.0.tgz",
"integrity": "sha512-Dg2O+r7jpBwLgWGpManc7P4ZfZQfxTVi2xNtXR3Q2G5Hx1RVYVFoNsQED8+FPCgjy6m7ZQbxKP1sjCJt5rbtBg==",
"license": "SEE LICENSE IN LICENSE",
"dependencies": {
"bpmn-moddle": "^10.0.0",
"diagram-js": "^15.9.0",
"diagram-js-direct-editing": "^3.3.0",
"ids": "^3.0.0",
"inherits-browser": "^0.1.0",
"min-dash": "^5.0.0",
"min-dom": "^5.2.0",
"tiny-svg": "^4.1.4"
},
"engines": {
"node": "*"
}
},
"node_modules/bpmn-moddle": {
"version": "10.0.0",
"resolved": "https://registry.npmjs.org/bpmn-moddle/-/bpmn-moddle-10.0.0.tgz",
"integrity": "sha512-vXePD5jkatcILmM3zwJG/m6IIHIghTGB7WvgcdEraEw8E8VdJHrTgrvBUhbzqaXJpnsGQz15QS936xeBY6l9aA==",
"license": "MIT",
"dependencies": {
"min-dash": "^5.0.0",
"moddle": "^8.0.0",
"moddle-xml": "^12.0.0"
},
"engines": {
"node": ">= 20.12"
}
},
"node_modules/braces": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
@@ -3198,15 +3153,6 @@
"integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==",
"license": "MIT"
},
"node_modules/clsx": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz",
"integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==",
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/commander": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz",
@@ -3316,20 +3262,9 @@
"version": "3.2.3",
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
"integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==",
"devOptional": true,
"license": "MIT"
},
"node_modules/d3-array": {
"version": "3.2.4",
"resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz",
"integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==",
"license": "ISC",
"dependencies": {
"internmap": "1 - 2"
},
"engines": {
"node": ">=12"
}
},
"node_modules/d3-color": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz",
@@ -3370,15 +3305,6 @@
"node": ">=12"
}
},
"node_modules/d3-format": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz",
"integrity": "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==",
"license": "ISC",
"engines": {
"node": ">=12"
}
},
"node_modules/d3-interpolate": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz",
@@ -3391,31 +3317,6 @@
"node": ">=12"
}
},
"node_modules/d3-path": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz",
"integrity": "sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==",
"license": "ISC",
"engines": {
"node": ">=12"
}
},
"node_modules/d3-scale": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz",
"integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==",
"license": "ISC",
"dependencies": {
"d3-array": "2.10.0 - 3",
"d3-format": "1 - 3",
"d3-interpolate": "1.2.0 - 3",
"d3-time": "2.1.1 - 3",
"d3-time-format": "2 - 4"
},
"engines": {
"node": ">=12"
}
},
"node_modules/d3-selection": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz",
@@ -3425,42 +3326,6 @@
"node": ">=12"
}
},
"node_modules/d3-shape": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/d3-shape/-/d3-shape-3.2.0.tgz",
"integrity": "sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==",
"license": "ISC",
"dependencies": {
"d3-path": "^3.1.0"
},
"engines": {
"node": ">=12"
}
},
"node_modules/d3-time": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz",
"integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==",
"license": "ISC",
"dependencies": {
"d3-array": "2 - 3"
},
"engines": {
"node": ">=12"
}
},
"node_modules/d3-time-format": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz",
"integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==",
"license": "ISC",
"dependencies": {
"d3-time": "1 - 3"
},
"engines": {
"node": ">=12"
}
},
"node_modules/d3-timer": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz",
@@ -3544,12 +3409,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/decimal.js-light": {
"version": "2.5.1",
"resolved": "https://registry.npmjs.org/decimal.js-light/-/decimal.js-light-2.5.1.tgz",
"integrity": "sha512-qIMFpTMZmny+MMIitAB6D7iVPEorVw6YQRWkvarTkT4tBeSLLiHzcwj6q0MmYSFCiVpiqPJTJEYIrpcPzVEIvg==",
"license": "MIT"
},
"node_modules/dequal": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
@@ -3570,51 +3429,6 @@
"node": ">=8"
}
},
"node_modules/diagram-js": {
"version": "15.9.1",
"resolved": "https://registry.npmjs.org/diagram-js/-/diagram-js-15.9.1.tgz",
"integrity": "sha512-2JsGmyeTo6o39beq2e/UkTfMopQSM27eXBUzbYQ+1m5VhEnQDkcjcrnRCjcObLMzzXSE/LSJyYhji90sqBFodQ==",
"license": "MIT",
"dependencies": {
"@bpmn-io/diagram-js-ui": "^0.2.3",
"clsx": "^2.1.1",
"didi": "^11.0.0",
"inherits-browser": "^0.1.0",
"min-dash": "^5.0.0",
"min-dom": "^5.2.0",
"object-refs": "^0.4.0",
"path-intersection": "^4.1.0",
"tiny-svg": "^4.1.4"
},
"engines": {
"node": "*"
}
},
"node_modules/diagram-js-direct-editing": {
"version": "3.3.0",
"resolved": "https://registry.npmjs.org/diagram-js-direct-editing/-/diagram-js-direct-editing-3.3.0.tgz",
"integrity": "sha512-EjXYb35J3qBU8lLz5U81hn7wNykVmF7U5DXZ7BvPok2IX7rmPz+ZyaI5AEMiqaC6lpSnHqPxFcPgKEiJcAiv5w==",
"license": "MIT",
"dependencies": {
"min-dash": "^5.0.0",
"min-dom": "^5.2.0"
},
"engines": {
"node": "*"
},
"peerDependencies": {
"diagram-js": "*"
}
},
"node_modules/didi": {
"version": "11.0.0",
"resolved": "https://registry.npmjs.org/didi/-/didi-11.0.0.tgz",
"integrity": "sha512-PzCfRzQttvFpVcYMbSF7h8EsWjeJpVjWH4qDhB5LkMi1ILvHq4Ob0vhM2wLFziPkbUBi+PAo7ODbe2sacR7nJQ==",
"license": "MIT",
"engines": {
"node": ">= 20.12"
}
},
"node_modules/didyoumean": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz",
@@ -3637,28 +3451,6 @@
"license": "MIT",
"peer": true
},
"node_modules/dom-helpers": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/dom-helpers/-/dom-helpers-5.2.1.tgz",
"integrity": "sha512-nRCa7CK3VTrM2NmGkIy4cbK7IZlgBE/PYMn55rrXefr5xXDP0LdtfPnblFDoVdcAfslJ7or6iqAUnx0CCGIWQA==",
"license": "MIT",
"dependencies": {
"@babel/runtime": "^7.8.7",
"csstype": "^3.0.2"
}
},
"node_modules/domify": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/domify/-/domify-3.0.0.tgz",
"integrity": "sha512-bs2yO68JDFOm6rKv8f0EnrM2cENduhRkpqOtt/s5l5JBA/eqGBZCzLPmdYoHtJ6utgLGgcBajFsEQbl12pT0lQ==",
"license": "MIT",
"engines": {
"node": ">=20"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/dompurify": {
"version": "3.3.1",
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.1.tgz",
@@ -3758,12 +3550,6 @@
"@types/estree": "^1.0.0"
}
},
"node_modules/eventemitter3": {
"version": "4.0.7",
"resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz",
"integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==",
"license": "MIT"
},
"node_modules/expect-type": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz",
@@ -3774,15 +3560,6 @@
"node": ">=12.0.0"
}
},
"node_modules/fast-equals": {
"version": "5.4.0",
"resolved": "https://registry.npmjs.org/fast-equals/-/fast-equals-5.4.0.tgz",
"integrity": "sha512-jt2DW/aNFNwke7AUd+Z+e6pz39KO5rzdbbFCg2sGafS4mk13MI7Z8O5z9cADNn5lhGODIgLwug6TZO2ctf7kcw==",
"license": "MIT",
"engines": {
"node": ">=6.0.0"
}
},
"node_modules/fast-glob": {
"version": "3.3.3",
"resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz",
@@ -3928,12 +3705,6 @@
"node": ">= 0.4"
}
},
"node_modules/htm": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/htm/-/htm-3.1.1.tgz",
"integrity": "sha512-983Vyg8NwUE7JkZ6NmOqpCZ+sh1bKv2iYTlUkzlWmA5JD2acKoxd4KVxbMmxX/85mtfdnDmTFoNKcg5DGAvxNQ==",
"license": "Apache-2.0"
},
"node_modules/html-encoding-sniffer": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-6.0.0.tgz",
@@ -3989,15 +3760,6 @@
"node": ">= 14"
}
},
"node_modules/ids": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/ids/-/ids-3.0.1.tgz",
"integrity": "sha512-mr0zAgpgA/hzCrHB0DnoTG6xZjNC3ABs4eaksXrpVtfaDatA2SVdDb1ZPLjmKjqzp4kexQRuHXwDWQILVK8FZQ==",
"license": "MIT",
"engines": {
"node": ">= 20.12"
}
},
"node_modules/immediate": {
"version": "3.0.6",
"resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
@@ -4020,21 +3782,6 @@
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
"license": "ISC"
},
"node_modules/inherits-browser": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/inherits-browser/-/inherits-browser-0.1.0.tgz",
"integrity": "sha512-CJHHvW3jQ6q7lzsXPpapLdMx5hDpSF3FSh45pwsj6bKxJJ8Nl8v43i5yXnr3BdfOimGHKyniewQtnAIp3vyJJw==",
"license": "ISC"
},
"node_modules/internmap": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz",
"integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==",
"license": "ISC",
"engines": {
"node": ">=12"
}
},
"node_modules/iobuffer": {
"version": "5.4.0",
"resolved": "https://registry.npmjs.org/iobuffer/-/iobuffer-5.4.0.tgz",
@@ -4262,12 +4009,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/lodash": {
"version": "4.17.23",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz",
"integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
"license": "MIT"
},
"node_modules/loose-envify": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
@@ -4351,22 +4092,6 @@
"node": ">=8.6"
}
},
"node_modules/min-dash": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/min-dash/-/min-dash-5.0.0.tgz",
"integrity": "sha512-EGuoBnVL7/Fnv2sqakpX5WGmZehZ3YMmLayT7sM8E9DRU74kkeyMg4Rik1lsOkR2GbFNeBca4/L+UfU6gF0Edw==",
"license": "MIT"
},
"node_modules/min-dom": {
"version": "5.3.0",
"resolved": "https://registry.npmjs.org/min-dom/-/min-dom-5.3.0.tgz",
"integrity": "sha512-0w5FEBgPAyHhmFojW3zxd7we3D+m5XYS3E/06OyvxmbHJoiQVa4Nagj6RWvoAKYRw5xth6cP5TMePc5cR1M9hA==",
"license": "MIT",
"dependencies": {
"domify": "^3.0.0",
"min-dash": "^5.0.0"
}
},
"node_modules/min-indent": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz",
@@ -4377,31 +4102,6 @@
"node": ">=4"
}
},
"node_modules/moddle": {
"version": "8.1.0",
"resolved": "https://registry.npmjs.org/moddle/-/moddle-8.1.0.tgz",
"integrity": "sha512-dBddc1CNuZHgro8nQWwfPZ2BkyLWdnxoNpPu9d+XKPN96DAiiBOeBw527ft++ebDuFez5PMdaR3pgUgoOaUGrA==",
"license": "MIT",
"dependencies": {
"min-dash": "^5.0.0"
}
},
"node_modules/moddle-xml": {
"version": "12.0.0",
"resolved": "https://registry.npmjs.org/moddle-xml/-/moddle-xml-12.0.0.tgz",
"integrity": "sha512-NJc2+sCe4tvuGlaUBcoZcYf6j9f+z+qxHOyGm/LB3ZrlJXVPPHoBTg/KXgDRCufdBJhJ3AheFs3QU/abABNzRg==",
"license": "MIT",
"dependencies": {
"min-dash": "^5.0.0",
"saxen": "^11.0.2"
},
"engines": {
"node": ">= 18"
},
"peerDependencies": {
"moddle": ">= 6.2.0"
}
},
"node_modules/ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
@@ -4540,6 +4240,7 @@
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
"integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=0.10.0"
@@ -4555,15 +4256,6 @@
"node": ">= 6"
}
},
"node_modules/object-refs": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/object-refs/-/object-refs-0.4.0.tgz",
"integrity": "sha512-6kJqKWryKZmtte6QYvouas0/EIJKPI1/MMIuRsiBlNuhIMfqYTggzX2F1AJ2+cDs288xyi9GL7FyasHINR98BQ==",
"license": "MIT",
"engines": {
"node": "*"
}
},
"node_modules/obug": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/obug/-/obug-2.1.1.tgz",
@@ -4594,15 +4286,6 @@
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/path-intersection": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/path-intersection/-/path-intersection-4.1.0.tgz",
"integrity": "sha512-urUP6WvhnxbHPdHYl6L7Yrc6+1ny6uOFKPCzPxTSUSYGHG0o94RmI7SvMMaScNAM5RtTf08bg4skc6/kjfne3A==",
"license": "MIT",
"engines": {
"node": ">= 14.20"
}
},
"node_modules/path-parse": {
"version": "1.0.7",
"resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
@@ -4872,16 +4555,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/preact": {
"version": "10.28.4",
"resolved": "https://registry.npmjs.org/preact/-/preact-10.28.4.tgz",
"integrity": "sha512-uKFfOHWuSNpRFVTnljsCluEFq57OKT+0QdOiQo8XWnQ/pSvg7OpX5eNOejELXJMWy+BwM2nobz0FkvzmnpCNsQ==",
"license": "MIT",
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/preact"
}
},
"node_modules/pretty-format": {
"version": "27.5.1",
"resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz",
@@ -4904,23 +4577,6 @@
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
"license": "MIT"
},
"node_modules/prop-types": {
"version": "15.8.1",
"resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
"integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
"license": "MIT",
"dependencies": {
"loose-envify": "^1.4.0",
"object-assign": "^4.1.1",
"react-is": "^16.13.1"
}
},
"node_modules/prop-types/node_modules/react-is": {
"version": "16.13.1",
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
"license": "MIT"
},
"node_modules/punycode": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
@@ -5005,37 +4661,6 @@
"node": ">=0.10.0"
}
},
"node_modules/react-smooth": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/react-smooth/-/react-smooth-4.0.4.tgz",
"integrity": "sha512-gnGKTpYwqL0Iii09gHobNolvX4Kiq4PKx6eWBCYYix+8cdw+cGo3do906l1NBPKkSWx1DghC1dlWG9L2uGd61Q==",
"license": "MIT",
"dependencies": {
"fast-equals": "^5.0.1",
"prop-types": "^15.8.1",
"react-transition-group": "^4.4.5"
},
"peerDependencies": {
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
}
},
"node_modules/react-transition-group": {
"version": "4.4.5",
"resolved": "https://registry.npmjs.org/react-transition-group/-/react-transition-group-4.4.5.tgz",
"integrity": "sha512-pZcd1MCJoiKiBR2NRxeCRg13uCXbydPnmB4EOeRrY7480qNWO8IIgQG6zlDkm6uRMsURXPuKq0GWtiM59a5Q6g==",
"license": "BSD-3-Clause",
"dependencies": {
"@babel/runtime": "^7.5.5",
"dom-helpers": "^5.0.1",
"loose-envify": "^1.4.0",
"prop-types": "^15.6.2"
},
"peerDependencies": {
"react": ">=16.6.0",
"react-dom": ">=16.6.0"
}
},
"node_modules/reactflow": {
"version": "11.11.4",
"resolved": "https://registry.npmjs.org/reactflow/-/reactflow-11.11.4.tgz",
@@ -5092,44 +4717,6 @@
"node": ">=8.10.0"
}
},
"node_modules/recharts": {
"version": "2.15.4",
"resolved": "https://registry.npmjs.org/recharts/-/recharts-2.15.4.tgz",
"integrity": "sha512-UT/q6fwS3c1dHbXv2uFgYJ9BMFHu3fwnd7AYZaEQhXuYQ4hgsxLvsUXzGdKeZrW5xopzDCvuA2N41WJ88I7zIw==",
"license": "MIT",
"dependencies": {
"clsx": "^2.0.0",
"eventemitter3": "^4.0.1",
"lodash": "^4.17.21",
"react-is": "^18.3.1",
"react-smooth": "^4.0.4",
"recharts-scale": "^0.4.4",
"tiny-invariant": "^1.3.1",
"victory-vendor": "^36.6.8"
},
"engines": {
"node": ">=14"
},
"peerDependencies": {
"react": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
"react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
}
},
"node_modules/recharts-scale": {
"version": "0.4.5",
"resolved": "https://registry.npmjs.org/recharts-scale/-/recharts-scale-0.4.5.tgz",
"integrity": "sha512-kivNFO+0OcUNu7jQquLXAxz1FIwZj8nrj+YkOKc5694NbjCvcT6aSZiIzNzd2Kul4o4rTto8QVR9lMNtxD4G1w==",
"license": "MIT",
"dependencies": {
"decimal.js-light": "^2.4.1"
}
},
"node_modules/recharts/node_modules/react-is": {
"version": "18.3.1",
"resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
"integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
"license": "MIT"
},
"node_modules/redent": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz",
@@ -5278,15 +4865,6 @@
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
"license": "MIT"
},
"node_modules/saxen": {
"version": "11.0.2",
"resolved": "https://registry.npmjs.org/saxen/-/saxen-11.0.2.tgz",
"integrity": "sha512-WDb4gqac8uiJzOdOdVpr9NWh9NrJMm7Brn5GX2Poj+mjE/QTXqYQENr8T/mom54dDDgbd3QjwTg23TRHYiWXRA==",
"license": "MIT",
"engines": {
"node": ">= 20.12"
}
},
"node_modules/saxes": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
@@ -5582,21 +5160,6 @@
"node": ">=0.8"
}
},
"node_modules/tiny-invariant": {
"version": "1.3.3",
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
"integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==",
"license": "MIT"
},
"node_modules/tiny-svg": {
"version": "4.1.4",
"resolved": "https://registry.npmjs.org/tiny-svg/-/tiny-svg-4.1.4.tgz",
"integrity": "sha512-cBaEACCbouYrQc9RG+eTXnPYosX1Ijqty/I6DdXovwDd89Pwu4jcmpOR7BuFEF9YCcd7/AWwasE0207WMK7hdw==",
"license": "MIT",
"engines": {
"node": ">= 20"
}
},
"node_modules/tinybench": {
"version": "2.9.0",
"resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz",
@@ -5844,28 +5407,6 @@
"uuid": "dist-node/bin/uuid"
}
},
"node_modules/victory-vendor": {
"version": "36.9.2",
"resolved": "https://registry.npmjs.org/victory-vendor/-/victory-vendor-36.9.2.tgz",
"integrity": "sha512-PnpQQMuxlwYdocC8fIJqVXvkeViHYzotI+NJrCuav0ZYFoq912ZHBk3mCeuj+5/VpodOjPe1z0Fk2ihgzlXqjQ==",
"license": "MIT AND ISC",
"dependencies": {
"@types/d3-array": "^3.0.3",
"@types/d3-ease": "^3.0.0",
"@types/d3-interpolate": "^3.0.1",
"@types/d3-scale": "^4.0.2",
"@types/d3-shape": "^3.1.0",
"@types/d3-time": "^3.0.0",
"@types/d3-timer": "^3.0.0",
"d3-array": "^3.1.6",
"d3-ease": "^3.0.1",
"d3-interpolate": "^3.0.1",
"d3-scale": "^4.0.2",
"d3-shape": "^3.1.0",
"d3-time": "^3.0.0",
"d3-timer": "^3.0.1"
}
},
"node_modules/vite": {
"version": "7.3.1",
"resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz",

View File

@@ -18,6 +18,7 @@
"test:all": "vitest run && playwright test --project=chromium"
},
"dependencies": {
"bpmn-js": "^18.0.1",
"jspdf": "^4.1.0",
"jszip": "^3.10.1",
"lucide-react": "^0.468.0",
@@ -26,7 +27,6 @@
"react-dom": "^18.3.1",
"reactflow": "^11.11.4",
"recharts": "^2.15.0",
"fabric": "^6.0.0",
"uuid": "^13.0.0"
},
"devDependencies": {

File diff suppressed because one or more lines are too long

View File

@@ -119,6 +119,13 @@ export const AI_PIPELINE_MODULES: AIModuleLink[] = [
* Kein direkter Datenfluss zur Pipeline.
*/
export const AI_TOOLS_MODULES: AIModuleLink[] = [
{
id: 'llm-compare',
name: 'LLM Vergleich',
href: '/ai/llm-compare',
description: 'KI-Provider Vergleich & Evaluation',
icon: '⚖️',
},
{
id: 'test-quality',
name: 'Test Quality (BQAS)',
@@ -205,7 +212,27 @@ export const AI_MODULE_RELATIONS: Record<string, AIModuleLink[]> = {
},
],
// KI-Werkzeuge Relations (Standalone-Tools)
'llm-compare': [
{
id: 'test-quality',
name: 'Test Quality (BQAS)',
href: '/ai/test-quality',
description: 'Golden Suite & Synthetic Tests',
},
{
id: 'agents',
name: 'Agent Management',
href: '/ai/agents',
description: 'Multi-Agent System',
},
],
'test-quality': [
{
id: 'llm-compare',
name: 'LLM Vergleich',
href: '/ai/llm-compare',
description: 'KI-Provider vergleichen',
},
{
id: 'klausur-korrektur',
name: 'Klausur-Korrektur',

323
docker-compose.coolify.yml Normal file
View File

@@ -0,0 +1,323 @@
# =========================================================
# BreakPilot Lehrer — KI-Lehrerplattform (Coolify)
# =========================================================
# Requires: breakpilot-core must be running
# Deployed via Coolify. SSL termination handled by Traefik.
# External services (managed separately in Coolify):
# - PostgreSQL, Qdrant, S3-compatible storage
# =========================================================
networks:
breakpilot-network:
external: true
name: breakpilot-network
volumes:
klausur_uploads:
eh_uploads:
ocr_labeling:
paddle_models:
lehrer_backend_data:
opensearch_data:
services:
# =========================================================
# FRONTEND
# =========================================================
admin-lehrer:
build:
context: ./admin-lehrer
dockerfile: Dockerfile
args:
NEXT_PUBLIC_API_URL: ${NEXT_PUBLIC_API_URL:-https://api-lehrer.breakpilot.ai}
NEXT_PUBLIC_OLD_ADMIN_URL: ${NEXT_PUBLIC_OLD_ADMIN_URL:-}
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
container_name: bp-lehrer-admin
expose:
- "3000"
volumes:
- lehrer_backend_data:/app/data
environment:
NODE_ENV: production
BACKEND_URL: http://backend-lehrer:8001
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
KLAUSUR_SERVICE_URL: http://klausur-service:8086
OLLAMA_URL: ${OLLAMA_URL:-}
depends_on:
backend-lehrer:
condition: service_started
labels:
- "traefik.enable=true"
- "traefik.http.routers.admin-lehrer.rule=Host(`admin-lehrer.breakpilot.ai`)"
- "traefik.http.routers.admin-lehrer.entrypoints=https"
- "traefik.http.routers.admin-lehrer.tls=true"
- "traefik.http.routers.admin-lehrer.tls.certresolver=letsencrypt"
- "traefik.http.services.admin-lehrer.loadbalancer.server.port=3000"
restart: unless-stopped
networks:
- breakpilot-network
studio-v2:
build:
context: ./studio-v2
dockerfile: Dockerfile
args:
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
container_name: bp-lehrer-studio-v2
expose:
- "3001"
environment:
NODE_ENV: production
BACKEND_URL: http://backend-lehrer:8001
depends_on:
- backend-lehrer
labels:
- "traefik.enable=true"
- "traefik.http.routers.studio.rule=Host(`app.breakpilot.ai`)"
- "traefik.http.routers.studio.entrypoints=https"
- "traefik.http.routers.studio.tls=true"
- "traefik.http.routers.studio.tls.certresolver=letsencrypt"
- "traefik.http.services.studio.loadbalancer.server.port=3001"
restart: unless-stopped
networks:
- breakpilot-network
website:
build:
context: ./website
dockerfile: Dockerfile
args:
NEXT_PUBLIC_BILLING_API_URL: ${NEXT_PUBLIC_BILLING_API_URL:-https://api-core.breakpilot.ai}
NEXT_PUBLIC_APP_URL: ${NEXT_PUBLIC_APP_URL:-https://app.breakpilot.ai}
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY: ${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY:-}
container_name: bp-lehrer-website
expose:
- "3000"
environment:
NODE_ENV: production
VAST_API_KEY: ${VAST_API_KEY:-}
CONTROL_API_KEY: ${CONTROL_API_KEY:-}
BACKEND_URL: http://backend-lehrer:8001
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
EDU_SEARCH_URL: ${EDU_SEARCH_URL:-}
EDU_SEARCH_API_KEY: ${EDU_SEARCH_API_KEY:-}
depends_on:
- backend-lehrer
labels:
- "traefik.enable=true"
- "traefik.http.routers.website.rule=Host(`www.breakpilot.ai`)"
- "traefik.http.routers.website.entrypoints=https"
- "traefik.http.routers.website.tls=true"
- "traefik.http.routers.website.tls.certresolver=letsencrypt"
- "traefik.http.services.website.loadbalancer.server.port=3000"
restart: unless-stopped
networks:
- breakpilot-network
# =========================================================
# BACKEND
# =========================================================
backend-lehrer:
build:
context: ./backend-lehrer
dockerfile: Dockerfile
container_name: bp-lehrer-backend
user: "0:0"
expose:
- "8001"
volumes:
- lehrer_backend_data:/app/data
environment:
PORT: 8001
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}?options=-csearch_path%3Dlehrer,core,public
JWT_SECRET: ${JWT_SECRET}
ENVIRONMENT: production
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
KLAUSUR_SERVICE_URL: http://klausur-service:8086
TROCR_SERVICE_URL: ${TROCR_SERVICE_URL:-}
CAMUNDA_URL: ${CAMUNDA_URL:-}
VALKEY_URL: redis://bp-core-valkey:6379/0
SESSION_TTL_HOURS: ${SESSION_TTL_HOURS:-24}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
DEBUG: "false"
ALERTS_AGENT_ENABLED: ${ALERTS_AGENT_ENABLED:-false}
VAST_API_KEY: ${VAST_API_KEY:-}
VAST_INSTANCE_ID: ${VAST_INSTANCE_ID:-}
CONTROL_API_KEY: ${CONTROL_API_KEY:-}
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
OLLAMA_ENABLED: ${OLLAMA_ENABLED:-false}
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-}
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-}
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-}
OLLAMA_TIMEOUT: ${OLLAMA_TIMEOUT:-120}
GAME_USE_DATABASE: ${GAME_USE_DATABASE:-true}
GAME_REQUIRE_AUTH: ${GAME_REQUIRE_AUTH:-true}
GAME_REQUIRE_BILLING: ${GAME_REQUIRE_BILLING:-true}
GAME_LLM_MODEL: ${GAME_LLM_MODEL:-}
SMTP_HOST: ${SMTP_HOST}
SMTP_PORT: ${SMTP_PORT:-587}
SMTP_USERNAME: ${SMTP_USERNAME}
SMTP_PASSWORD: ${SMTP_PASSWORD}
SMTP_FROM_NAME: ${SMTP_FROM_NAME:-BreakPilot}
SMTP_FROM_ADDR: ${SMTP_FROM_ADDR:-noreply@breakpilot.ai}
RAG_SERVICE_URL: http://bp-core-rag-service:8097
labels:
- "traefik.enable=true"
- "traefik.http.routers.backend-lehrer.rule=Host(`api-lehrer.breakpilot.ai`)"
- "traefik.http.routers.backend-lehrer.entrypoints=https"
- "traefik.http.routers.backend-lehrer.tls=true"
- "traefik.http.routers.backend-lehrer.tls.certresolver=letsencrypt"
- "traefik.http.services.backend-lehrer.loadbalancer.server.port=8001"
restart: unless-stopped
networks:
- breakpilot-network
# =========================================================
# MICROSERVICES
# =========================================================
klausur-service:
build:
context: ./klausur-service
dockerfile: Dockerfile
container_name: bp-lehrer-klausur-service
expose:
- "8086"
volumes:
- klausur_uploads:/app/uploads
- eh_uploads:/app/eh-uploads
- ocr_labeling:/app/ocr-labeling
- paddle_models:/root/.paddlex
environment:
JWT_SECRET: ${JWT_SECRET}
BACKEND_URL: http://backend-lehrer:8001
SCHOOL_SERVICE_URL: http://school-service:8084
ENVIRONMENT: production
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}
EMBEDDING_SERVICE_URL: http://bp-core-embedding-service:8087
QDRANT_URL: ${QDRANT_URL}
MINIO_ENDPOINT: ${S3_ENDPOINT}
MINIO_ACCESS_KEY: ${S3_ACCESS_KEY}
MINIO_SECRET_KEY: ${S3_SECRET_KEY}
MINIO_BUCKET: ${S3_BUCKET:-breakpilot-rag}
MINIO_SECURE: ${S3_SECURE:-true}
PADDLEOCR_SERVICE_URL: ${PADDLEOCR_SERVICE_URL:-}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
OLLAMA_ENABLED: ${OLLAMA_ENABLED:-false}
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-}
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-}
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-}
RAG_SERVICE_URL: http://bp-core-rag-service:8097
depends_on:
school-service:
condition: service_started
healthcheck:
test: ["CMD", "curl", "-f", "http://127.0.0.1:8086/health"]
interval: 30s
timeout: 30s
retries: 3
start_period: 10s
labels:
- "traefik.enable=true"
- "traefik.http.routers.klausur.rule=Host(`klausur.breakpilot.ai`)"
- "traefik.http.routers.klausur.entrypoints=https"
- "traefik.http.routers.klausur.tls=true"
- "traefik.http.routers.klausur.tls.certresolver=letsencrypt"
- "traefik.http.services.klausur.loadbalancer.server.port=8086"
restart: unless-stopped
networks:
- breakpilot-network
school-service:
build:
context: ./school-service
dockerfile: Dockerfile
container_name: bp-lehrer-school-service
expose:
- "8084"
environment:
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}
JWT_SECRET: ${JWT_SECRET}
PORT: 8084
ENVIRONMENT: production
ALLOWED_ORIGINS: "*"
LLM_GATEWAY_URL: http://backend-lehrer:8001/llm
restart: unless-stopped
networks:
- breakpilot-network
# =========================================================
# EDU SEARCH
# =========================================================
opensearch:
image: opensearchproject/opensearch:2.11.1
container_name: bp-lehrer-opensearch
environment:
- cluster.name=edu-search-cluster
- node.name=opensearch-node1
- discovery.type=single-node
- bootstrap.memory_lock=true
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD:-Admin123!}
- plugins.security.disabled=true
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
volumes:
- opensearch_data:/usr/share/opensearch/data
healthcheck:
test: ["CMD-SHELL", "curl -s http://localhost:9200 >/dev/null || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 60s
restart: unless-stopped
networks:
- breakpilot-network
edu-search-service:
build:
context: ./edu-search-service
dockerfile: Dockerfile
container_name: bp-lehrer-edu-search
expose:
- "8088"
environment:
PORT: 8088
OPENSEARCH_URL: http://opensearch:9200
OPENSEARCH_USERNAME: admin
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-Admin123!}
INDEX_NAME: bp_documents_v1
EDU_SEARCH_API_KEY: ${EDU_SEARCH_API_KEY:-}
USER_AGENT: "BreakpilotEduCrawler/1.0 (+contact: security@breakpilot.com)"
RATE_LIMIT_PER_SEC: "0.2"
MAX_DEPTH: "4"
MAX_PAGES_PER_RUN: "500"
DB_HOST: ${POSTGRES_HOST}
DB_PORT: ${POSTGRES_PORT:-5432}
DB_USER: ${POSTGRES_USER}
DB_PASSWORD: ${POSTGRES_PASSWORD}
DB_NAME: ${POSTGRES_DB}
DB_SSLMODE: disable
STAFF_CRAWLER_EMAIL: crawler@breakpilot.de
depends_on:
opensearch:
condition: service_healthy
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8088/v1/health"]
interval: 30s
timeout: 3s
start_period: 10s
retries: 3
restart: unless-stopped
networks:
- breakpilot-network

View File

@@ -15,24 +15,11 @@ volumes:
eh_uploads:
ocr_labeling:
paddle_models:
lighton_models:
paddleocr_models:
transcription_models:
transcription_temp:
lehrer_backend_data:
opensearch_data:
# Communication (Jitsi + Matrix)
synapse_data:
synapse_db_data:
jitsi_web_config:
jitsi_web_crontabs:
jitsi_transcripts:
jitsi_prosody_config:
jitsi_prosody_plugins:
jitsi_jicofo_config:
jitsi_jvb_config:
# Voice
voice_session_data:
services:
@@ -167,6 +154,7 @@ services:
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
KLAUSUR_SERVICE_URL: http://klausur-service:8086
TROCR_SERVICE_URL: http://paddleocr-service:8095
CAMUNDA_URL: http://bp-core-camunda:8080
VALKEY_URL: redis://bp-core-valkey:6379/0
SESSION_TTL_HOURS: ${SESSION_TTL_HOURS:-24}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
@@ -221,7 +209,6 @@ services:
- eh_uploads:/app/eh-uploads
- ocr_labeling:/app/ocr-labeling
- paddle_models:/root/.paddlex
- lighton_models:/root/.cache/huggingface
environment:
JWT_SECRET: ${JWT_SECRET:-your-super-secret-jwt-key-change-in-production}
BACKEND_URL: http://backend-lehrer:8001
@@ -236,8 +223,6 @@ services:
MINIO_BUCKET: ${MINIO_BUCKET:-breakpilot-rag}
MINIO_SECURE: "false"
PADDLEOCR_SERVICE_URL: http://paddleocr-service:8095
PADDLEOCR_REMOTE_URL: ${PADDLEOCR_REMOTE_URL:-https://hetzner.meghsakha.com:8095}
PADDLEOCR_API_KEY: ${PADDLEOCR_API_KEY:-}
VAULT_ADDR: http://bp-core-vault:8200
VAULT_TOKEN: ${VAULT_TOKEN:-breakpilot-dev-token}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
@@ -246,12 +231,6 @@ services:
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-llama3.2}
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-llama3.2-vision}
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-llama3.2}
OLLAMA_REVIEW_MODEL: ${OLLAMA_REVIEW_MODEL:-qwen3:0.6b}
OLLAMA_REVIEW_BATCH_SIZE: ${OLLAMA_REVIEW_BATCH_SIZE:-20}
REVIEW_ENGINE: ${REVIEW_ENGINE:-spell}
OCR_ENGINE: ${OCR_ENGINE:-auto}
OLLAMA_HTR_MODEL: ${OLLAMA_HTR_MODEL:-qwen2.5vl:32b}
HTR_FALLBACK_MODEL: ${HTR_FALLBACK_MODEL:-trocr-large}
RAG_SERVICE_URL: http://bp-core-rag-service:8097
extra_hosts:
- "host.docker.internal:host-gateway"
@@ -394,216 +373,6 @@ services:
networks:
- breakpilot-network
# =========================================================
# VOICE SERVICE
# =========================================================
voice-service:
build:
context: ./voice-service
dockerfile: Dockerfile
container_name: bp-lehrer-voice-service
platform: linux/arm64
expose:
- "8091"
volumes:
- voice_session_data:/app/data/sessions
environment:
PORT: 8091
DATABASE_URL: postgresql://${POSTGRES_USER:-breakpilot}:${POSTGRES_PASSWORD:-breakpilot123}@bp-core-postgres:5432/${POSTGRES_DB:-breakpilot_db}
VALKEY_URL: redis://bp-core-valkey:6379/0
KLAUSUR_SERVICE_URL: http://klausur-service:8086
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
OLLAMA_VOICE_MODEL: ${OLLAMA_VOICE_MODEL:-llama3.2}
ENVIRONMENT: ${ENVIRONMENT:-development}
JWT_SECRET: ${JWT_SECRET:-your-super-secret-jwt-key-change-in-production}
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on:
core-health-check:
condition: service_completed_successfully
healthcheck:
test: ["CMD", "curl", "-f", "http://127.0.0.1:8091/health"]
interval: 30s
timeout: 10s
start_period: 60s
retries: 3
restart: unless-stopped
networks:
- breakpilot-network
# =========================================================
# COMMUNICATION: Jitsi Meet
# =========================================================
jitsi-web:
image: jitsi/web:stable-9823
container_name: bp-lehrer-jitsi-web
expose:
- "80"
volumes:
- jitsi_web_config:/config
- jitsi_web_crontabs:/var/spool/cron/crontabs
- jitsi_transcripts:/usr/share/jitsi-meet/transcripts
environment:
ENABLE_XMPP_WEBSOCKET: "true"
ENABLE_COLIBRI_WEBSOCKET: "true"
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
XMPP_BOSH_URL_BASE: http://jitsi-xmpp:5280
XMPP_MUC_DOMAIN: ${XMPP_MUC_DOMAIN:-muc.meet.jitsi}
XMPP_GUEST_DOMAIN: ${XMPP_GUEST_DOMAIN:-guest.meet.jitsi}
TZ: ${TZ:-Europe/Berlin}
PUBLIC_URL: ${JITSI_PUBLIC_URL:-https://macmini:8443}
JICOFO_AUTH_USER: focus
ENABLE_AUTH: ${JITSI_ENABLE_AUTH:-false}
ENABLE_GUESTS: "true"
ENABLE_RECORDING: "true"
ENABLE_LIVESTREAMING: "false"
DISABLE_HTTPS: "true"
APP_NAME: "BreakPilot Meet"
NATIVE_APP_NAME: "BreakPilot Meet"
PROVIDER_NAME: "BreakPilot"
depends_on:
- jitsi-xmpp
networks:
breakpilot-network:
aliases:
- meet.jitsi
jitsi-xmpp:
image: jitsi/prosody:stable-9823
container_name: bp-lehrer-jitsi-xmpp
volumes:
- jitsi_prosody_config:/config
- jitsi_prosody_plugins:/prosody-plugins-custom
environment:
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
XMPP_AUTH_DOMAIN: ${XMPP_AUTH_DOMAIN:-auth.meet.jitsi}
XMPP_MUC_DOMAIN: ${XMPP_MUC_DOMAIN:-muc.meet.jitsi}
XMPP_INTERNAL_MUC_DOMAIN: ${XMPP_INTERNAL_MUC_DOMAIN:-internal-muc.meet.jitsi}
XMPP_GUEST_DOMAIN: ${XMPP_GUEST_DOMAIN:-guest.meet.jitsi}
XMPP_RECORDER_DOMAIN: ${XMPP_RECORDER_DOMAIN:-recorder.meet.jitsi}
XMPP_CROSS_DOMAIN: "true"
TZ: ${TZ:-Europe/Berlin}
JICOFO_AUTH_USER: focus
JICOFO_AUTH_PASSWORD: ${JICOFO_AUTH_PASSWORD:-jicofo_secret}
JVB_AUTH_USER: jvb
JVB_AUTH_PASSWORD: ${JVB_AUTH_PASSWORD:-jvb_secret}
JIBRI_XMPP_USER: jibri
JIBRI_XMPP_PASSWORD: ${JIBRI_XMPP_PASSWORD:-jibri_secret}
JIBRI_RECORDER_USER: recorder
JIBRI_RECORDER_PASSWORD: ${JIBRI_RECORDER_PASSWORD:-recorder_secret}
LOG_LEVEL: ${XMPP_LOG_LEVEL:-warn}
PUBLIC_URL: ${JITSI_PUBLIC_URL:-https://macmini:8443}
ENABLE_AUTH: ${JITSI_ENABLE_AUTH:-false}
ENABLE_GUESTS: "true"
restart: unless-stopped
networks:
breakpilot-network:
aliases:
- xmpp.meet.jitsi
jitsi-jicofo:
image: jitsi/jicofo:stable-9823
container_name: bp-lehrer-jitsi-jicofo
volumes:
- jitsi_jicofo_config:/config
environment:
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
XMPP_AUTH_DOMAIN: ${XMPP_AUTH_DOMAIN:-auth.meet.jitsi}
XMPP_MUC_DOMAIN: ${XMPP_MUC_DOMAIN:-muc.meet.jitsi}
XMPP_INTERNAL_MUC_DOMAIN: ${XMPP_INTERNAL_MUC_DOMAIN:-internal-muc.meet.jitsi}
XMPP_SERVER: jitsi-xmpp
JICOFO_AUTH_USER: focus
JICOFO_AUTH_PASSWORD: ${JICOFO_AUTH_PASSWORD:-jicofo_secret}
TZ: ${TZ:-Europe/Berlin}
ENABLE_AUTH: ${JITSI_ENABLE_AUTH:-false}
AUTH_TYPE: internal
ENABLE_AUTO_OWNER: "true"
depends_on:
- jitsi-xmpp
restart: unless-stopped
networks:
- breakpilot-network
jitsi-jvb:
image: jitsi/jvb:stable-9823
container_name: bp-lehrer-jitsi-jvb
ports:
- "10000:10000/udp"
- "8080:8080"
volumes:
- jitsi_jvb_config:/config
environment:
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
XMPP_AUTH_DOMAIN: ${XMPP_AUTH_DOMAIN:-auth.meet.jitsi}
XMPP_INTERNAL_MUC_DOMAIN: ${XMPP_INTERNAL_MUC_DOMAIN:-internal-muc.meet.jitsi}
XMPP_SERVER: jitsi-xmpp
JVB_AUTH_USER: jvb
JVB_AUTH_PASSWORD: ${JVB_AUTH_PASSWORD:-jvb_secret}
JVB_PORT: 10000
JVB_STUN_SERVERS: ${JVB_STUN_SERVERS:-stun.l.google.com:19302}
TZ: ${TZ:-Europe/Berlin}
PUBLIC_URL: ${JITSI_PUBLIC_URL:-https://macmini:8443}
COLIBRI_REST_ENABLED: "true"
ENABLE_COLIBRI_WEBSOCKET: "true"
depends_on:
- jitsi-xmpp
restart: unless-stopped
networks:
- breakpilot-network
# =========================================================
# COMMUNICATION: Matrix/Synapse
# =========================================================
synapse-db:
image: postgres:16-alpine
container_name: bp-lehrer-synapse-db
profiles: [chat]
environment:
POSTGRES_USER: synapse
POSTGRES_PASSWORD: ${SYNAPSE_DB_PASSWORD:-synapse_secret}
POSTGRES_DB: synapse
POSTGRES_INITDB_ARGS: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
volumes:
- synapse_db_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U synapse"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- breakpilot-network
synapse:
image: matrixdotorg/synapse:latest
container_name: bp-lehrer-synapse
profiles: [chat]
ports:
- "8008:8008"
- "8448:8448"
volumes:
- synapse_data:/data
environment:
SYNAPSE_SERVER_NAME: ${SYNAPSE_SERVER_NAME:-macmini}
SYNAPSE_REPORT_STATS: "no"
SYNAPSE_NO_TLS: "true"
SYNAPSE_ENABLE_REGISTRATION: ${SYNAPSE_ENABLE_REGISTRATION:-true}
SYNAPSE_LOG_LEVEL: ${SYNAPSE_LOG_LEVEL:-WARNING}
UID: "1000"
GID: "1000"
healthcheck:
test: ["CMD", "curl", "-f", "http://127.0.0.1:8008/health"]
interval: 30s
timeout: 10s
start_period: 30s
retries: 3
depends_on:
synapse-db:
condition: service_healthy
restart: unless-stopped
networks:
- breakpilot-network
# =========================================================
# EDU SEARCH
# =========================================================

View File

@@ -1,114 +0,0 @@
# Chunk-Browser
## Uebersicht
Der Chunk-Browser ermoeglicht das sequenzielle Durchblaettern aller Chunks in einer Qdrant-Collection. Er ist als Tab "Chunk-Browser" auf der RAG-Seite (`/ai/rag`) verfuegbar.
**URL:** `https://macmini:3002/ai/rag` → Tab "Chunk-Browser"
---
## Funktionen
### Collection-Auswahl
Dropdown mit allen verfuegbaren Compliance-Collections:
- `bp_compliance_gesetze`
- `bp_compliance_ce`
- `bp_compliance_datenschutz`
- `bp_dsfa_corpus`
- `bp_compliance_recht`
- `bp_legal_templates`
- `bp_compliance_gdpr`
- `bp_compliance_schulrecht`
- `bp_dsfa_templates`
- `bp_dsfa_risks`
### Seitenweise Navigation
- 20 Chunks pro Seite
- Zurueck/Weiter-Buttons
- Seitennummer und Chunk-Zaehler
- Cursor-basierte Pagination via Qdrant Scroll API
### Textsuche
- Filtert Chunks auf der aktuell geladenen Seite
- Treffer werden gelb hervorgehoben
- Suche ueber den Chunk-Text (payload.text, payload.content, payload.chunk_text)
### Chunk-Details
- Klick auf einen Chunk klappt alle Metadaten aus
- Zeigt: regulation_code, article, language, source, licence, etc.
- Chunks haben eine fortlaufende Nummer (#1, #2, ...)
### Integration mit Regulierungen-Tab
Der Button "In Chunks suchen" bei jeder Regulierung wechselt zum Chunk-Browser mit:
- Vorauswahl der richtigen Collection
- Vorausgefuelltem Suchbegriff (Regulierungsname)
---
## API
### Scroll-Endpoint (API Proxy)
```
GET /api/legal-corpus?action=scroll&collection=bp_compliance_ce&limit=20&offset={cursor}
```
**Parameter:**
| Parameter | Typ | Beschreibung |
|-----------|-----|--------------|
| `collection` | string | Qdrant Collection Name |
| `limit` | number | Chunks pro Seite (max 100) |
| `offset` | string | Cursor fuer naechste Seite (optional) |
| `text_search` | string | Textsuche-Filter (optional) |
**Response:**
```json
{
"chunks": [
{
"id": "uuid",
"text": "...",
"regulation_code": "GDPR",
"article": "Art. 5",
"language": "de"
}
],
"next_offset": "uuid-or-null",
"total_in_page": 20
}
```
### Collection-Count-Endpoint
```
GET /api/legal-corpus?action=collection-count&collection=bp_compliance_ce
```
**Response:**
```json
{
"count": 12345
}
```
---
## Technische Details
- Der API-Proxy spricht direkt mit Qdrant (Port 6333) via dessen `POST /collections/{name}/points/scroll` Endpoint
- Kein Embedding oder rag-service erforderlich
- Textsuche ist client-seitig (kein Embedding noetig)
- Pagination ist cursor-basiert (Qdrant `next_page_offset`)
---
## Weitere Features auf der RAG-Seite
### Originalquelle-Links
Jede Regulierung in der Tabelle hat einen "Originalquelle" Link zum offiziellen Dokument (EUR-Lex, gesetze-im-internet.de, etc.). Definiert in `REGULATION_SOURCES` (88 Eintraege).
### Low-Chunk-Warnung
Regulierungen mit weniger als 10 Chunks aber einem erwarteten Wert >= 10 werden mit einem Amber-Warnsymbol markiert. Dies hilft, fehlgeschlagene oder unvollstaendige Ingestions zu erkennen.

File diff suppressed because it is too large Load Diff

View File

@@ -8,15 +8,24 @@ RUN npm install
COPY frontend/ ./
RUN npm run build
# Production stage — uses pre-built base with Tesseract + Python deps.
# Base image contains: python:3.11-slim + tesseract-ocr + all pip packages.
# Rebuild base only when requirements.txt or system deps change:
# docker build -f klausur-service/Dockerfile.base -t klausur-base:latest klausur-service/
FROM klausur-base:latest
# Production stage
FROM python:3.11-slim
WORKDIR /app
# Copy backend code (this is the only layer that changes on code edits)
# Install system dependencies (incl. Tesseract OCR for bounding-box extraction)
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
tesseract-ocr \
tesseract-ocr-deu \
tesseract-ocr-eng \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies
COPY backend/requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
# Copy backend code
COPY backend/ ./
# Copy built frontend to the expected path

View File

@@ -1,27 +0,0 @@
# Base image with system dependencies + Python packages.
# These change rarely — build once, reuse on every --no-cache.
#
# Rebuild manually when requirements.txt or system deps change:
# docker build -f klausur-service/Dockerfile.base -t klausur-base:latest klausur-service/
#
FROM python:3.11-slim
WORKDIR /app
# System dependencies (Tesseract OCR, curl for healthcheck)
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
tesseract-ocr \
tesseract-ocr-deu \
tesseract-ocr-eng \
libgl1 \
libglib2.0-0 \
fonts-liberation \
&& rm -rf /var/lib/apt/lists/*
# Python dependencies
COPY backend/requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
# Clean up pip cache
RUN rm -rf /root/.cache/pip

View File

@@ -1,471 +0,0 @@
"""
Embedded box detection and page zone splitting for the CV vocabulary pipeline.
Detects boxes (grammar tips, exercises, etc.) that span the page width and
interrupt the normal column layout. Splits the page into vertical zones so
that column detection can run independently per zone.
Two-stage algorithm (both run, results merged):
1. Morphological line detection — finds bordered boxes via horizontal lines.
2. Background shading detection — finds shaded/colored boxes via median-blur
background analysis. Works for colored (blue, green) and grayscale
(gray shading on B/W scans) boxes.
Lizenz: Apache 2.0 (kommerziell nutzbar)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
import logging
from typing import List, Optional, Tuple
import cv2
import numpy as np
from cv_vocab_types import DetectedBox, PageZone
logger = logging.getLogger(__name__)
__all__ = [
"detect_boxes",
"split_page_into_zones",
]
# ---------------------------------------------------------------------------
# Stage 1: Morphological line detection
# ---------------------------------------------------------------------------
def _detect_boxes_by_lines(
gray: np.ndarray,
content_x: int,
content_w: int,
content_y: int,
content_h: int,
) -> List[DetectedBox]:
"""Find boxes defined by pairs of long horizontal border lines.
Args:
gray: Grayscale image (full page).
content_x, content_w: Horizontal content bounds.
content_y, content_h: Vertical content bounds.
Returns:
List of DetectedBox for each detected bordered box.
"""
h, w = gray.shape[:2]
# Binarize: dark pixels → white on black background
_, binary = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV)
# Horizontal morphology kernel — at least 50% of content width
kernel_w = max(50, content_w // 2)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_w, 1))
lines_img = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
# Horizontal projection: count line pixels per row
h_proj = np.sum(lines_img[:, content_x:content_x + content_w] > 0, axis=1)
line_threshold = content_w * 0.30
# Group consecutive rows with enough line pixels into line segments
line_segments: List[Tuple[int, int]] = [] # (y_start, y_end)
seg_start: Optional[int] = None
for y in range(h):
if h_proj[y] >= line_threshold:
if seg_start is None:
seg_start = y
else:
if seg_start is not None:
line_segments.append((seg_start, y))
seg_start = None
if seg_start is not None:
line_segments.append((seg_start, h))
if len(line_segments) < 2:
return []
# Pair lines into boxes: top-line + bottom-line
# Minimum box height: 30px. Maximum: 70% of content height.
min_box_h = 30
max_box_h = int(content_h * 0.70)
boxes: List[DetectedBox] = []
used = set()
for i, (top_start, top_end) in enumerate(line_segments):
if i in used:
continue
for j in range(i + 1, len(line_segments)):
if j in used:
continue
bot_start, bot_end = line_segments[j]
box_y = top_start
box_h = bot_end - top_start
if box_h < min_box_h or box_h > max_box_h:
continue
# Estimate border thickness from line segment heights
border_top = top_end - top_start
border_bot = bot_end - bot_start
box = DetectedBox(
x=content_x,
y=box_y,
width=content_w,
height=box_h,
confidence=0.8,
border_thickness=max(border_top, border_bot),
)
boxes.append(box)
used.add(i)
used.add(j)
break # move to next top-line candidate
return boxes
# ---------------------------------------------------------------------------
# Stage 2: Background shading detection (color + grayscale)
# ---------------------------------------------------------------------------
def _detect_boxes_by_shading(
img_bgr: np.ndarray,
content_x: int,
content_w: int,
content_y: int,
content_h: int,
) -> List[DetectedBox]:
"""Find boxes with shaded/colored background (no visible border lines).
Uses heavy median blur to remove text and reveal the underlying background.
Then detects rectangular regions where the background differs from white.
Works for both colored boxes (blue, green) and grayscale shading (gray on
B/W scans).
Args:
img_bgr: BGR color image (full page).
content_x, content_w: Horizontal content bounds.
content_y, content_h: Vertical content bounds.
Returns:
List of DetectedBox for each detected shaded box.
"""
h, w = img_bgr.shape[:2]
# --- Heavy median blur removes text strokes, keeps background ---
blur_size = 31 # large kernel to wipe out text
blurred = cv2.medianBlur(img_bgr, blur_size)
blur_gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
blur_hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)
# Estimate page background from top-left / top-right corners
corner_size = max(20, min(h // 10, w // 10))
corners = np.concatenate([
blur_gray[:corner_size, :corner_size].ravel(),
blur_gray[:corner_size, -corner_size:].ravel(),
])
page_bg = float(np.median(corners))
# Two masks: grayscale shading + color saturation
# Grayscale: regions noticeably darker than the page background
shade_thresh = max(page_bg - 30, 150)
gray_mask = (blur_gray < shade_thresh).astype(np.uint8) * 255
# Color: regions with noticeable saturation (blue/green/etc. boxes)
sat_mask = (blur_hsv[:, :, 1] > 20).astype(np.uint8) * 255
combined = cv2.bitwise_or(gray_mask, sat_mask)
# Morphological cleanup: close gaps, remove small noise
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 10))
combined = cv2.morphologyEx(combined, cv2.MORPH_CLOSE, kernel_close)
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
combined = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel_open)
contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Size thresholds: smaller boxes allowed (e.g. "German leihen" ~30% width)
min_area = content_w * 30 # at least 30px tall at full width
min_box_h = 25
max_box_h = int(content_h * 0.70)
min_width_ratio = 0.25 # boxes can be ~25% of content width
boxes: List[DetectedBox] = []
for cnt in contours:
area = cv2.contourArea(cnt)
if area < min_area:
continue
bx, by, bw, bh = cv2.boundingRect(cnt)
# Width filter
if bw < content_w * min_width_ratio:
continue
# Height filter
if bh < min_box_h or bh > max_box_h:
continue
# Rectangularity check: area / bounding-rect area > 0.6
rect_area = bw * bh
if rect_area > 0 and area / rect_area < 0.5:
continue
# Verify that the background inside this region is actually shaded
roi_gray = blur_gray[by:by + bh, bx:bx + bw]
roi_hsv = blur_hsv[by:by + bh, bx:bx + bw]
if roi_gray.size == 0:
continue
median_val = float(np.median(roi_gray))
median_sat = float(np.median(roi_hsv[:, :, 1]))
# Must be noticeably different from page background
is_shaded = median_val < (page_bg - 15)
is_colored = median_sat > 15
if not is_shaded and not is_colored:
continue
conf = 0.7 if is_colored else 0.6
boxes.append(DetectedBox(
x=bx,
y=by,
width=bw,
height=bh,
confidence=conf,
border_thickness=0,
))
return boxes
# ---------------------------------------------------------------------------
# Validation
# ---------------------------------------------------------------------------
def _validate_box(
box: DetectedBox,
gray: np.ndarray,
content_w: int,
content_h: int,
median_row_gap: int,
) -> bool:
"""Validate that a detected box is genuine (not a table-row separator etc.)."""
# Must span > 25% of content width (lowered from 60% to allow smaller boxes)
if box.width < content_w * 0.25:
return False
# Height constraints
if box.height < 25 or box.height > content_h * 0.70:
return False
# Must not be confused with a table-row separator:
# real boxes are at least 3x the median row gap
if median_row_gap > 0 and box.height < median_row_gap * 3:
return False
# Must contain some text (ink density check)
h, w = gray.shape[:2]
y1 = max(0, box.y)
y2 = min(h, box.y + box.height)
x1 = max(0, box.x)
x2 = min(w, box.x + box.width)
roi = gray[y1:y2, x1:x2]
if roi.size == 0:
return False
ink_ratio = np.sum(roi < 128) / roi.size
if ink_ratio < 0.002: # nearly empty → not a real content box
return False
return True
# ---------------------------------------------------------------------------
# Public API: detect_boxes
# ---------------------------------------------------------------------------
def _merge_overlapping_boxes(boxes: List[DetectedBox]) -> List[DetectedBox]:
"""Merge boxes that overlap significantly (IoU > 0.3 or one contains the other).
When two boxes overlap, keep the one with higher confidence (or the larger
one if confidences are equal).
"""
if len(boxes) <= 1:
return boxes
# Sort by area descending so larger boxes are processed first
boxes = sorted(boxes, key=lambda b: b.width * b.height, reverse=True)
keep = [True] * len(boxes)
for i in range(len(boxes)):
if not keep[i]:
continue
bi = boxes[i]
for j in range(i + 1, len(boxes)):
if not keep[j]:
continue
bj = boxes[j]
# Compute overlap
x1 = max(bi.x, bj.x)
y1 = max(bi.y, bj.y)
x2 = min(bi.x + bi.width, bj.x + bj.width)
y2 = min(bi.y + bi.height, bj.y + bj.height)
if x2 <= x1 or y2 <= y1:
continue # no overlap
inter = (x2 - x1) * (y2 - y1)
area_i = bi.width * bi.height
area_j = bj.width * bj.height
smaller_area = min(area_i, area_j)
# If overlap covers > 50% of the smaller box, merge (drop the weaker)
if smaller_area > 0 and inter / smaller_area > 0.50:
# Keep the one with higher confidence; if equal, keep larger
if bj.confidence > bi.confidence:
keep[i] = False
break
else:
keep[j] = False
return [b for b, k in zip(boxes, keep) if k]
def detect_boxes(
img_bgr: np.ndarray,
content_x: int,
content_w: int,
content_y: int,
content_h: int,
median_row_gap: int = 0,
) -> List[DetectedBox]:
"""Detect embedded boxes on a page image.
Runs BOTH line-based and shading-based detection, then merges and
deduplicates results.
Args:
img_bgr: BGR color image (full page or cropped).
content_x, content_w: Horizontal content bounds.
content_y, content_h: Vertical content bounds.
median_row_gap: Median row gap height (for filtering out table separators).
Returns:
List of validated DetectedBox instances, sorted by y position.
"""
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
# Stage 1: Line-based detection (bordered boxes)
line_boxes = _detect_boxes_by_lines(gray, content_x, content_w, content_y, content_h)
# Stage 2: Shading-based detection (colored/gray background boxes)
shade_boxes = _detect_boxes_by_shading(img_bgr, content_x, content_w, content_y, content_h)
logger.debug("BoxDetect: %d line-based, %d shading-based candidates",
len(line_boxes), len(shade_boxes))
# Combine and deduplicate
all_boxes = line_boxes + shade_boxes
merged = _merge_overlapping_boxes(all_boxes)
# Validate
validated = [b for b in merged if _validate_box(b, gray, content_w, content_h, median_row_gap)]
# Sort top to bottom
validated.sort(key=lambda b: b.y)
if validated:
logger.info("BoxDetect: %d box(es) detected (line=%d, shade=%d, merged=%d)",
len(validated), len(line_boxes), len(shade_boxes), len(merged))
else:
logger.debug("BoxDetect: no boxes detected")
return validated
# ---------------------------------------------------------------------------
# Zone Splitting
# ---------------------------------------------------------------------------
def split_page_into_zones(
content_x: int,
content_y: int,
content_w: int,
content_h: int,
boxes: List[DetectedBox],
min_zone_height: int = 40,
) -> List[PageZone]:
"""Split a page into vertical zones based on detected boxes.
Regions above, between, and below boxes become 'content' zones;
box regions become 'box' zones.
Args:
content_x, content_y, content_w, content_h: Content area bounds.
boxes: Detected boxes, sorted by y position.
min_zone_height: Minimum height for a content zone to be kept.
Returns:
List of PageZone, ordered top to bottom.
"""
if not boxes:
# Single zone: entire content area
return [PageZone(
index=0,
zone_type='content',
y=content_y,
height=content_h,
x=content_x,
width=content_w,
)]
zones: List[PageZone] = []
zone_idx = 0
cursor_y = content_y
content_bottom = content_y + content_h
for box in boxes:
# Content zone above this box
gap_above = box.y - cursor_y
if gap_above >= min_zone_height:
zones.append(PageZone(
index=zone_idx,
zone_type='content',
y=cursor_y,
height=gap_above,
x=content_x,
width=content_w,
))
zone_idx += 1
# Box zone
zones.append(PageZone(
index=zone_idx,
zone_type='box',
y=box.y,
height=box.height,
x=box.x,
width=box.width,
box=box,
))
zone_idx += 1
cursor_y = box.y + box.height
# Content zone below last box
remaining = content_bottom - cursor_y
if remaining >= min_zone_height:
zones.append(PageZone(
index=zone_idx,
zone_type='content',
y=cursor_y,
height=remaining,
x=content_x,
width=content_w,
))
logger.info(f"ZoneSplit: {len(zones)} zones from {len(boxes)} box(es): "
f"{[z.zone_type for z in zones]}")
return zones

File diff suppressed because it is too large Load Diff

View File

@@ -1,303 +0,0 @@
"""
Color detection for OCR word boxes.
Detects the text color of existing OCR words and recovers colored text
regions (e.g. red markers, blue headings) that standard OCR may have missed.
Standard OCR (Tesseract, PaddleOCR) binarises images before processing,
destroying all color information. This module adds it back by sampling
HSV pixel values at word-box positions and finding colored regions that
no word-box covers.
Lizenz: Apache 2.0 (kommerziell nutzbar)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
import logging
from typing import Any, Dict, List, Optional, Tuple
import cv2
import numpy as np
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# HSV color ranges (OpenCV: H 0-180, S 0-255, V 0-255)
# ---------------------------------------------------------------------------
_COLOR_RANGES: Dict[str, List[Tuple[np.ndarray, np.ndarray]]] = {
"red": [
(np.array([0, 70, 50]), np.array([10, 255, 255])),
(np.array([170, 70, 50]), np.array([180, 255, 255])),
],
"orange": [
(np.array([10, 70, 50]), np.array([25, 255, 255])),
],
"yellow": [
(np.array([25, 70, 50]), np.array([35, 255, 255])),
],
"green": [
(np.array([35, 70, 50]), np.array([85, 255, 255])),
],
"blue": [
(np.array([100, 70, 50]), np.array([130, 255, 255])),
],
"purple": [
(np.array([130, 70, 50]), np.array([170, 255, 255])),
],
}
_COLOR_HEX: Dict[str, str] = {
"black": "#000000",
"gray": "#6b7280",
"red": "#dc2626",
"orange": "#ea580c",
"yellow": "#ca8a04",
"green": "#16a34a",
"blue": "#2563eb",
"purple": "#9333ea",
}
def _hue_to_color_name(hue: float) -> str:
"""Map OpenCV hue (0-180) to a color name."""
if hue < 10 or hue > 170:
return "red"
if hue < 25:
return "orange"
if hue < 35:
return "yellow"
if hue < 85:
return "green"
if hue < 130:
return "blue"
return "purple"
# ---------------------------------------------------------------------------
# 1. Color annotation for existing word boxes
# ---------------------------------------------------------------------------
def detect_word_colors(
img_bgr: np.ndarray,
word_boxes: List[Dict],
sat_threshold: int = 70,
min_sat_ratio: float = 0.25,
) -> None:
"""Annotate each word_box in-place with its detected text color.
Adds ``color`` (hex string) and ``color_name`` (e.g. 'red', 'black')
keys to each dict.
Algorithm per word:
1. Crop the word region from the image.
2. Otsu-threshold for text/background separation.
3. Sample background color from border pixels of the crop.
4. Remove text pixels that match the background (avoids colored
backgrounds like blue boxes leaking into the result).
5. Use **median** hue (robust to outliers) and require a minimum
ratio of saturated pixels before classifying as colored.
"""
if img_bgr is None or not word_boxes:
return
img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
img_h, img_w = img_bgr.shape[:2]
colored_count = 0
for wb in word_boxes:
x1 = max(0, int(wb["left"]))
y1 = max(0, int(wb["top"]))
x2 = min(img_w, int(wb["left"] + wb["width"]))
y2 = min(img_h, int(wb["top"] + wb["height"]))
if x2 <= x1 or y2 <= y1:
wb["color"] = _COLOR_HEX["black"]
wb["color_name"] = "black"
continue
crop_hsv = img_hsv[y1:y2, x1:x2]
crop_bgr = img_bgr[y1:y2, x1:x2]
crop_gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
ch, cw = crop_hsv.shape[:2]
# --- Text mask: Otsu (adaptive) + high-saturation pixels ---
_, dark_mask = cv2.threshold(
crop_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU,
)
sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255
text_mask = cv2.bitwise_or(dark_mask, sat_mask)
text_pixels = crop_hsv[text_mask > 0]
if len(text_pixels) < 3:
wb["color"] = _COLOR_HEX["black"]
wb["color_name"] = "black"
continue
# --- Background subtraction via border pixels ---
# Sample background from the 2px border ring of the crop
if ch > 6 and cw > 6:
border = 2
bg_top = crop_hsv[:border, :].reshape(-1, 3)
bg_bot = crop_hsv[-border:, :].reshape(-1, 3)
bg_lft = crop_hsv[border:-border, :border].reshape(-1, 3)
bg_rgt = crop_hsv[border:-border, -border:].reshape(-1, 3)
bg_pixels = np.vstack([bg_top, bg_bot, bg_lft, bg_rgt])
bg_med_h = float(np.median(bg_pixels[:, 0]))
bg_med_s = float(np.median(bg_pixels[:, 1]))
# If background is tinted (S > 15), remove text pixels
# with similar hue to avoid false colored detections
if bg_med_s > 15:
hue_diff = np.minimum(
np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
180.0 - np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
)
keep = hue_diff > 20
if np.any(keep):
text_pixels = text_pixels[keep]
if len(text_pixels) < 3:
wb["color"] = _COLOR_HEX["black"]
wb["color_name"] = "black"
continue
# --- Classification using MEDIAN (robust to outliers) ---
median_sat = float(np.median(text_pixels[:, 1]))
sat_count = int(np.sum(text_pixels[:, 1] > sat_threshold))
sat_ratio = sat_count / len(text_pixels)
if median_sat < sat_threshold or sat_ratio < min_sat_ratio:
wb["color"] = _COLOR_HEX["black"]
wb["color_name"] = "black"
else:
# Use median hue of saturated pixels only for cleaner signal
sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
median_hue = float(np.median(sat_pixels[:, 0]))
name = _hue_to_color_name(median_hue)
wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
wb["color_name"] = name
colored_count += 1
if colored_count:
logger.info("color annotation: %d / %d words are colored",
colored_count, len(word_boxes))
# ---------------------------------------------------------------------------
# 2. Recover colored text that OCR missed
# ---------------------------------------------------------------------------
def recover_colored_text(
img_bgr: np.ndarray,
existing_words: List[Dict],
min_area: int = 40,
max_regions: int = 60,
) -> List[Dict]:
"""Find colored text regions not covered by any existing word box.
Returns a list of recovered word dicts with ``color``, ``color_name``,
and ``recovered=True`` fields. The ``text`` is set via a lightweight
shape heuristic (e.g. ``!`` for tall narrow shapes) or ``?``.
"""
if img_bgr is None:
return []
img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
ih, iw = img_bgr.shape[:2]
max_area = int(ih * iw * 0.005)
# --- Build occupancy mask from existing words (adaptive padding) ---
# Pad word boxes generously to prevent colored-pixel artifacts in
# narrow inter-word gaps from being recovered as false characters.
heights = [wb["height"] for wb in existing_words if wb.get("height", 0) > 0]
median_h = int(np.median(heights)) if heights else 20
pad = max(8, int(median_h * 0.35))
occupied = np.zeros((ih, iw), dtype=np.uint8)
for wb in existing_words:
x1 = max(0, int(wb["left"]) - pad)
y1 = max(0, int(wb["top"]) - pad)
x2 = min(iw, int(wb["left"] + wb["width"]) + pad)
y2 = min(ih, int(wb["top"] + wb["height"]) + pad)
occupied[y1:y2, x1:x2] = 255
recovered: List[Dict] = []
for color_name, ranges in _COLOR_RANGES.items():
# Create mask for this color
mask = np.zeros((ih, iw), dtype=np.uint8)
for lower, upper in ranges:
mask = cv2.bitwise_or(mask, cv2.inRange(img_hsv, lower, upper))
# Remove pixels already covered by existing OCR words
mask = cv2.bitwise_and(mask, cv2.bitwise_not(occupied))
# Morphological cleanup:
# - Close with tall kernel to merge ! stroke + dot
# - Open to remove noise specks
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 8))
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)
contours, _ = cv2.findContours(
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
)
candidates = []
for cnt in contours:
area = cv2.contourArea(cnt)
if area < min_area or area > max_area:
continue
bx, by, bw, bh = cv2.boundingRect(cnt)
if bh < 6:
continue
# Reject regions too wide to be single characters
if bw > median_h * 4:
continue
candidates.append((area, bx, by, bw, bh))
# Keep largest first, limited count
candidates.sort(key=lambda c: c[0], reverse=True)
for area, bx, by, bw, bh in candidates[:max_regions]:
text = _identify_shape(bw, bh)
recovered.append({
"text": text,
"left": bx,
"top": by,
"width": bw,
"height": bh,
"conf": 45,
"color": _COLOR_HEX.get(color_name, "#000000"),
"color_name": color_name,
"recovered": True,
})
if recovered:
logger.info(
"color recovery: %d colored regions found (%s)",
len(recovered),
", ".join(
f"{c}: {sum(1 for r in recovered if r['color_name'] == c)}"
for c in sorted({r["color_name"] for r in recovered})
),
)
return recovered
def _identify_shape(w: int, h: int) -> str:
"""Simple shape heuristic for common single-character text markers."""
aspect = w / h if h > 0 else 1.0
if aspect < 0.55 and h > 10:
# Tall, narrow — likely exclamation mark
return "!"
if 0.6 < aspect < 1.5 and max(w, h) < 25:
# Small, roughly square — bullet or dot
return ""
return "?"

View File

@@ -1,313 +0,0 @@
"""
Graphical element detection for OCR pages.
Region-based approach:
1. Build a color mask (saturation channel — black text is invisible).
2. Dilate heavily to merge nearby colored pixels into regions.
3. For each region, check overlap with OCR word boxes:
- High word overlap → colored text (skip)
- Low word overlap → colored graphic / image (keep)
4. Separately detect large black-ink illustrations via ink mask.
Boxes and text colors are handled by cv_box_detect / cv_color_detect.
Lizenz: Apache 2.0 (kommerziell nutzbar)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
import logging
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
import cv2
import numpy as np
logger = logging.getLogger(__name__)
__all__ = ["detect_graphic_elements", "GraphicElement"]
@dataclass
class GraphicElement:
"""A detected non-text graphical element."""
x: int
y: int
width: int
height: int
area: int
shape: str # image, illustration
color_name: str # dominant color or 'black'
color_hex: str
confidence: float
contour: Any = field(default=None, repr=False)
# ---------------------------------------------------------------------------
# Color helpers
# ---------------------------------------------------------------------------
_COLOR_HEX = {
"black": "#000000",
"gray": "#6b7280",
"red": "#dc2626",
"orange": "#ea580c",
"yellow": "#ca8a04",
"green": "#16a34a",
"blue": "#2563eb",
"purple": "#9333ea",
}
def _dominant_color(hsv_roi: np.ndarray, sat_threshold: int = 40) -> tuple:
"""Return (color_name, color_hex) for an HSV region."""
if hsv_roi.size == 0:
return "black", _COLOR_HEX["black"]
pixels = hsv_roi.reshape(-1, 3)
sat = pixels[:, 1]
sat_mask = sat > sat_threshold
sat_ratio = np.sum(sat_mask) / len(pixels) if len(pixels) > 0 else 0
if sat_ratio < 0.15:
return "black", _COLOR_HEX["black"]
sat_pixels = pixels[sat_mask]
if len(sat_pixels) < 3:
return "black", _COLOR_HEX["black"]
med_hue = float(np.median(sat_pixels[:, 0]))
if med_hue < 10 or med_hue > 170:
name = "red"
elif med_hue < 25:
name = "orange"
elif med_hue < 35:
name = "yellow"
elif med_hue < 85:
name = "green"
elif med_hue < 130:
name = "blue"
else:
name = "purple"
return name, _COLOR_HEX.get(name, _COLOR_HEX["black"])
# ---------------------------------------------------------------------------
# Main detection
# ---------------------------------------------------------------------------
def detect_graphic_elements(
img_bgr: np.ndarray,
word_boxes: List[Dict],
detected_boxes: Optional[List[Dict]] = None,
max_elements: int = 50,
) -> List[GraphicElement]:
"""Find non-text graphical regions on the page.
Region-based: dilate color mask to form regions, then check word
overlap to distinguish colored text from colored graphics.
Args:
img_bgr: BGR color image.
word_boxes: List of OCR word dicts with left/top/width/height.
detected_boxes: Optional list of detected box dicts (x/y/w/h).
max_elements: Maximum number of elements to return.
Returns:
List of GraphicElement, sorted by area descending.
"""
if img_bgr is None:
return []
h, w = img_bgr.shape[:2]
logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
w, h, len(word_boxes), len(detected_boxes or []))
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
candidates: List[GraphicElement] = []
# --- Build word mask (for overlap checking) ---
word_mask = np.zeros((h, w), dtype=np.uint8)
for wb in word_boxes:
x1 = max(0, int(wb.get("left", 0)))
y1 = max(0, int(wb.get("top", 0)))
x2 = min(w, int(wb.get("left", 0) + wb.get("width", 0)))
y2 = min(h, int(wb.get("top", 0) + wb.get("height", 0)))
word_mask[y1:y2, x1:x2] = 255
# =====================================================================
# PASS 1 — COLORED IMAGE REGIONS
# =====================================================================
# Color mask: saturated pixels (black text has sat ≈ 0 → invisible)
sat_mask = (hsv[:, :, 1] > 40).astype(np.uint8) * 255
val_mask = (hsv[:, :, 2] < 240).astype(np.uint8) * 255
color_pixels = cv2.bitwise_and(sat_mask, val_mask)
# Remove tiny speckle
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
color_pixels = cv2.morphologyEx(color_pixels, cv2.MORPH_OPEN, kernel_open)
# Count raw colored pixels before dilation (for density check later)
color_pixel_raw = color_pixels.copy()
# Heavy dilation to merge nearby colored elements into regions.
# A 25x25 kernel merges elements within ~12px of each other.
kernel_dilate = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (25, 25))
region_mask = cv2.dilate(color_pixels, kernel_dilate, iterations=1)
contours_regions, _ = cv2.findContours(
region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
)
logger.debug("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))
for cnt in contours_regions:
bx, by, bw, bh = cv2.boundingRect(cnt)
# Skip tiny regions
if bw < 15 or bh < 15:
continue
# Skip page-spanning regions
if bw > w * 0.5 or bh > h * 0.5:
logger.debug("GraphicDetect PASS1 skip page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
continue
bbox_area = bw * bh
# Check: how much of this region's bounding box overlaps with words?
roi_words = word_mask[by:by + bh, bx:bx + bw]
word_pixel_count = int(np.sum(roi_words > 0))
word_overlap = word_pixel_count / bbox_area if bbox_area > 0 else 0
# Check: how many actual colored pixels are in this region?
roi_color = color_pixel_raw[by:by + bh, bx:bx + bw]
color_pixel_count = int(np.sum(roi_color > 0))
# If most of the region is covered by word boxes → colored text, skip
if word_overlap > 0.5:
logger.debug("GraphicDetect PASS1 skip text region (%d,%d) %dx%d overlap=%.0f%%",
bx, by, bw, bh, word_overlap * 100)
continue
# Need a minimum number of colored pixels (not just dilated area)
if color_pixel_count < 200:
continue
# Determine dominant color from the actual colored pixels
roi_hsv = hsv[by:by + bh, bx:bx + bw]
color_px_mask = roi_color > 0
if np.sum(color_px_mask) > 0:
masked_hsv = roi_hsv[color_px_mask]
color_name, color_hex = _dominant_color(masked_hsv)
else:
color_name, color_hex = "black", _COLOR_HEX["black"]
# Confidence based on color density and low word overlap
density = color_pixel_count / bbox_area if bbox_area > 0 else 0
conf = min(0.95, 0.5 + density * 0.5)
logger.debug("GraphicDetect PASS1 accept (%d,%d) %dx%d px=%d overlap=%.0f%% %s",
bx, by, bw, bh, color_pixel_count, word_overlap * 100, color_name)
candidates.append(GraphicElement(
x=bx, y=by, width=bw, height=bh,
area=color_pixel_count,
shape="image",
color_name=color_name, color_hex=color_hex,
confidence=round(conf, 2), contour=cnt,
))
# =====================================================================
# PASS 2 — LARGE BLACK-INK ILLUSTRATIONS
# =====================================================================
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
_, dark_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Exclude words and colored regions already found
exclusion = np.zeros((h, w), dtype=np.uint8)
word_pad = 5
for wb in word_boxes:
x1 = max(0, int(wb.get("left", 0)) - word_pad)
y1 = max(0, int(wb.get("top", 0)) - word_pad)
x2 = min(w, int(wb.get("left", 0) + wb.get("width", 0)) + word_pad)
y2 = min(h, int(wb.get("top", 0) + wb.get("height", 0)) + word_pad)
exclusion[y1:y2, x1:x2] = 255
if detected_boxes:
for box in detected_boxes:
bbx = int(box.get("x", 0))
bby = int(box.get("y", 0))
bbw = int(box.get("w", box.get("width", 0)))
bbh = int(box.get("h", box.get("height", 0)))
inset = 8
x1 = max(0, bbx + inset)
y1 = max(0, bby + inset)
x2 = min(w, bbx + bbw - inset)
y2 = min(h, bby + bbh - inset)
if x2 > x1 and y2 > y1:
exclusion[y1:y2, x1:x2] = 255
ink_only = cv2.bitwise_and(dark_mask, cv2.bitwise_not(exclusion))
ink_only = cv2.bitwise_and(ink_only, cv2.bitwise_not(color_pixels))
contours_ink, _ = cv2.findContours(
ink_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
)
logger.debug("GraphicDetect PASS2 ink: %d contours", len(contours_ink))
for cnt in contours_ink:
area = cv2.contourArea(cnt)
bx, by, bw, bh = cv2.boundingRect(cnt)
if area < 5000 or min(bw, bh) < 40:
continue
if bw > w * 0.8 or bh > h * 0.8:
continue
logger.debug("GraphicDetect PASS2 accept (%d,%d) %dx%d area=%d",
bx, by, bw, bh, int(area))
candidates.append(GraphicElement(
x=bx, y=by, width=bw, height=bh,
area=int(area), shape="illustration",
color_name="black", color_hex="#000000",
confidence=0.5, contour=cnt,
))
# =====================================================================
# Deduplicate and return
# =====================================================================
candidates.sort(key=lambda g: g.area, reverse=True)
final: List[GraphicElement] = []
for c in candidates:
overlap = False
for f in final:
ix1 = max(c.x, f.x)
iy1 = max(c.y, f.y)
ix2 = min(c.x + c.width, f.x + f.width)
iy2 = min(c.y + c.height, f.y + f.height)
if ix2 > ix1 and iy2 > iy1:
inter = (ix2 - ix1) * (iy2 - iy1)
smaller = min(c.width * c.height, f.width * f.height)
if smaller > 0 and inter / smaller > 0.5:
overlap = True
break
if not overlap:
final.append(c)
result = final[:max_elements]
if result:
shape_counts: Dict[str, int] = {}
for g in result:
shape_counts[g.shape] = shape_counts.get(g.shape, 0) + 1
logger.info(
"GraphicDetect: %d elements found (%s)",
len(result),
", ".join(f"{s}: {c}" for s, c in sorted(shape_counts.items())),
)
else:
logger.info("GraphicDetect: no graphic elements found")
return result

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,180 +0,0 @@
"""
Shared types, constants, and availability guards for the CV vocabulary pipeline.
Lizenz: Apache 2.0 (kommerziell nutzbar)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
import json
import logging
import os
import re # noqa: F401 — re-exported for downstream modules
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
import numpy as np # noqa: F401
logger = logging.getLogger(__name__)
# --- Availability Guards ---
try:
import cv2 # noqa: F401
CV2_AVAILABLE = True
except ImportError:
cv2 = None # type: ignore[assignment]
CV2_AVAILABLE = False
logger.warning("OpenCV not available — CV pipeline disabled")
try:
import pytesseract # noqa: F401
from PIL import Image # noqa: F401
TESSERACT_AVAILABLE = True
except ImportError:
pytesseract = None # type: ignore[assignment]
Image = None # type: ignore[assignment,misc]
TESSERACT_AVAILABLE = False
logger.warning("pytesseract/Pillow not available — CV pipeline disabled")
CV_PIPELINE_AVAILABLE = CV2_AVAILABLE and TESSERACT_AVAILABLE
# --- IPA Dictionary ---
IPA_AVAILABLE = False
_ipa_convert_american = None
_britfone_dict: Dict[str, str] = {}
try:
import eng_to_ipa as _eng_to_ipa
_ipa_convert_american = _eng_to_ipa.convert
IPA_AVAILABLE = True
logger.info("eng_to_ipa available — American IPA lookup enabled")
except ImportError:
logger.info("eng_to_ipa not installed — American IPA disabled")
# Load Britfone dictionary (MIT license, ~15k British English IPA entries)
_britfone_path = os.path.join(os.path.dirname(__file__), 'data', 'britfone_ipa.json')
if os.path.exists(_britfone_path):
try:
with open(_britfone_path, 'r', encoding='utf-8') as f:
_britfone_dict = json.load(f)
IPA_AVAILABLE = True
logger.info(f"Britfone loaded — {len(_britfone_dict)} British IPA entries")
except Exception as e:
logger.warning(f"Failed to load Britfone: {e}")
else:
logger.info("Britfone not found — British IPA disabled")
# --- Language Detection Constants ---
GERMAN_FUNCTION_WORDS = {'der', 'die', 'das', 'und', 'ist', 'ein', 'eine', 'nicht',
'von', 'zu', 'mit', 'auf', 'fuer', 'den', 'dem', 'sich', 'auch', 'wird',
'nach', 'bei', 'aus', 'wie', 'oder', 'wenn', 'noch', 'aber', 'hat', 'nur',
'ueber', 'kann', 'als', 'ich', 'er', 'sie', 'es', 'wir', 'ihr', 'haben',
'sein', 'werden', 'war', 'sind', 'muss', 'soll', 'dieser', 'diese', 'diesem'}
ENGLISH_FUNCTION_WORDS = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'of',
'and', 'in', 'that', 'it', 'for', 'on', 'with', 'as', 'at', 'by', 'from',
'or', 'but', 'not', 'be', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
'would', 'can', 'could', 'should', 'may', 'might', 'this', 'they', 'you', 'he',
'she', 'we', 'my', 'your', 'his', 'her', 'its', 'our', 'their', 'which'}
# --- Data Classes ---
@dataclass
class PageRegion:
"""A detected region on the page."""
type: str # 'column_en', 'column_de', 'column_example', 'page_ref', 'column_marker', 'column_text', 'header', 'footer', 'margin_top', 'margin_bottom'
x: int
y: int
width: int
height: int
classification_confidence: float = 1.0 # 0.0-1.0
classification_method: str = "" # 'content', 'position_enhanced', 'position_fallback'
@dataclass
class ColumnGeometry:
"""Geometrisch erkannte Spalte vor Typ-Klassifikation."""
index: int # 0-basiert, links->rechts
x: int
y: int
width: int
height: int
word_count: int
words: List[Dict] # Wort-Dicts aus Tesseract (text, conf, left, top, ...)
width_ratio: float # width / content_width (0.0-1.0)
is_sub_column: bool = False # True if created by _detect_sub_columns() split
@dataclass
class RowGeometry:
"""Geometrisch erkannte Zeile mit Kopf-/Fusszeilen-Klassifikation."""
index: int # 0-basiert, oben→unten
x: int # absolute left (= content left_x)
y: int # absolute y start
width: int # content width
height: int # Zeilenhoehe in px
word_count: int
words: List[Dict]
row_type: str = 'content' # 'content' | 'header' | 'footer'
gap_before: int = 0 # Gap in px ueber dieser Zeile
@dataclass
class VocabRow:
"""A single vocabulary entry assembled from multi-column OCR."""
english: str = ""
german: str = ""
example: str = ""
source_page: str = ""
confidence: float = 0.0
y_position: int = 0
@dataclass
class PipelineResult:
"""Complete result of the CV pipeline."""
vocabulary: List[Dict[str, Any]] = field(default_factory=list)
word_count: int = 0
columns_detected: int = 0
duration_seconds: float = 0.0
stages: Dict[str, float] = field(default_factory=dict)
error: Optional[str] = None
image_width: int = 0
image_height: int = 0
@dataclass
class DocumentTypeResult:
"""Result of automatic document type detection."""
doc_type: str # 'vocab_table' | 'full_text' | 'generic_table'
confidence: float # 0.0-1.0
pipeline: str # 'cell_first' | 'full_page'
skip_steps: List[str] = field(default_factory=list) # e.g. ['columns', 'rows']
features: Dict[str, Any] = field(default_factory=dict) # debug info
@dataclass
class DetectedBox:
"""An embedded box (e.g. grammar tip, exercise) detected on the page."""
x: int # absolute pixel position
y: int
width: int
height: int
confidence: float # 0.0-1.0
border_thickness: int = 0
@dataclass
class PageZone:
"""A horizontal zone of the page — either normal content or a detected box."""
index: int # 0-based, top to bottom
zone_type: str # 'content' | 'box'
y: int # absolute pixel y
height: int
x: int
width: int
box: Optional[DetectedBox] = None
columns: List[ColumnGeometry] = field(default_factory=list)

View File

@@ -1,355 +0,0 @@
"""
Words-First Grid Builder (Bottom-Up).
Builds a cell grid from Tesseract word_boxes directly, without requiring
pre-detected columns or rows. Algorithm:
1. Cluster words into columns by X-gap analysis
2. Cluster words into rows by Y-proximity
3. Build cells at (column, row) intersections
Returns the same (cells, columns_meta) format as build_cell_grid_v2().
Lizenz: Apache 2.0 (kommerziell nutzbar)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
import logging
import re
import statistics
from typing import Any, Dict, List, Optional, Tuple
from cv_ocr_engines import (
_group_words_into_lines,
_words_to_reading_order_text,
)
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# 1. Column clustering
# ---------------------------------------------------------------------------
def _cluster_columns(
words: List[Dict],
img_w: int,
min_gap_pct: float = 3.0,
) -> List[Dict[str, Any]]:
"""Cluster words into columns by finding large horizontal gaps.
Returns a list of column dicts:
[{'index': 0, 'type': 'column_1', 'x_min': ..., 'x_max': ...}, ...]
sorted left-to-right.
"""
if not words:
return []
# Sort by X center
sorted_w = sorted(words, key=lambda w: w['left'] + w['width'] / 2)
# Collect word heights to compute adaptive threshold
heights = [w['height'] for w in sorted_w if w.get('height', 0) > 0]
median_h = statistics.median(heights) if heights else 30
# Adaptive gap threshold: 3× median word height, but at least min_gap_pct of image width
min_gap_px = max(median_h * 3, img_w * min_gap_pct / 100) if img_w > 0 else median_h * 3
# Find X-gap boundaries between consecutive words (sorted by X-center)
# For each word, compute right edge; for next word, compute left edge
boundaries: List[float] = [] # X positions where columns split
for i in range(len(sorted_w) - 1):
right_edge = sorted_w[i]['left'] + sorted_w[i]['width']
left_edge = sorted_w[i + 1]['left']
gap = left_edge - right_edge
if gap > min_gap_px:
# Split point is midway through the gap
boundaries.append((right_edge + left_edge) / 2)
# Build column ranges from boundaries
# Column ranges: (-inf, boundary[0]), (boundary[0], boundary[1]), ..., (boundary[-1], +inf)
col_edges = [0.0] + boundaries + [float(img_w)]
columns = []
for ci in range(len(col_edges) - 1):
columns.append({
'index': ci,
'type': f'column_{ci + 1}' if len(col_edges) > 2 else 'column_text',
'x_min': col_edges[ci],
'x_max': col_edges[ci + 1],
})
return columns
# ---------------------------------------------------------------------------
# 2. Row clustering
# ---------------------------------------------------------------------------
def _cluster_rows(
words: List[Dict],
) -> List[Dict[str, Any]]:
"""Cluster words into visual rows by Y-proximity.
Uses half the median word height as Y-tolerance.
Returns a list of row dicts:
[{'index': 0, 'y_min': ..., 'y_max': ..., 'y_center': ...}, ...]
sorted top-to-bottom.
"""
if not words:
return []
heights = [w['height'] for w in words if w.get('height', 0) > 0]
median_h = statistics.median(heights) if heights else 20
y_tol = max(median_h * 0.5, 5)
lines = _group_words_into_lines(words, y_tolerance_px=int(y_tol))
rows = []
for ri, line_words in enumerate(lines):
y_min = min(w['top'] for w in line_words)
y_max = max(w['top'] + w['height'] for w in line_words)
rows.append({
'index': ri,
'y_min': y_min,
'y_max': y_max,
'y_center': (y_min + y_max) / 2,
})
return rows
# ---------------------------------------------------------------------------
# 3. Build cells
# ---------------------------------------------------------------------------
def _assign_word_to_column(word: Dict, columns: List[Dict]) -> int:
"""Return column index for a word based on its X-center."""
x_center = word['left'] + word['width'] / 2
for col in columns:
if col['x_min'] <= x_center < col['x_max']:
return col['index']
# Fallback: nearest column
return min(columns, key=lambda c: abs((c['x_min'] + c['x_max']) / 2 - x_center))['index']
def _assign_word_to_row(word: Dict, rows: List[Dict]) -> int:
"""Return row index for a word based on its Y-center.
When rows overlap (e.g. due to tall border-ghost characters inflating
a row's y_max), prefer the row whose y_center is closest.
"""
y_center = word['top'] + word['height'] / 2
# Find all rows whose y_range contains this word's center
matching = [r for r in rows if r['y_min'] <= y_center <= r['y_max']]
if matching:
return min(matching, key=lambda r: abs(r['y_center'] - y_center))['index']
# Fallback: nearest row by Y-center
return min(rows, key=lambda r: abs(r['y_center'] - y_center))['index']
def _build_cells(
words: List[Dict],
columns: List[Dict],
rows: List[Dict],
img_w: int,
img_h: int,
) -> List[Dict[str, Any]]:
"""Build cell dicts from word assignments to (column, row) pairs."""
if not columns or not rows:
return []
# Bucket words into (col_idx, row_idx)
buckets: Dict[Tuple[int, int], List[Dict]] = {}
for w in words:
ci = _assign_word_to_column(w, columns)
ri = _assign_word_to_row(w, rows)
buckets.setdefault((ci, ri), []).append(w)
cells = []
for (ci, ri), cell_words in sorted(buckets.items(), key=lambda kv: (kv[0][1], kv[0][0])):
col = columns[ci]
row = rows[ri]
# Compute tight bbox from actual word positions
x_min = min(w['left'] for w in cell_words)
y_min = min(w['top'] for w in cell_words)
x_max = max(w['left'] + w['width'] for w in cell_words)
y_max = max(w['top'] + w['height'] for w in cell_words)
bw = x_max - x_min
bh = y_max - y_min
# Text from words in reading order
text = _words_to_reading_order_text(cell_words, y_tolerance_px=max(10, int(bh * 0.4)))
# Average confidence
confs = [w.get('conf', 0) for w in cell_words if w.get('conf', 0) > 0]
avg_conf = sum(confs) / len(confs) if confs else 0.0
# Word boxes with absolute pixel coordinates (consistent with cv_cell_grid.py).
# PaddleOCR returns phrase-level boxes (e.g. "competition [kompa'tifn]"),
# but the overlay slide mechanism expects one box per word. Split multi-word
# boxes into individual word positions proportional to character length.
# Also split at "[" boundaries (IPA patterns like "badge[bxd3]").
#
# Sort in reading order: group by Y (same visual line), then sort by X.
# Simple (top, left) sort fails when words on the same line have slightly
# different top values (1-6px), causing wrong word order.
y_tol_wb = max(10, int(bh * 0.4))
reading_lines = _group_words_into_lines(cell_words, y_tolerance_px=y_tol_wb)
ordered_cell_words = [w for line in reading_lines for w in line]
word_boxes = []
for w in ordered_cell_words:
raw_text = w.get('text', '').strip()
# Split by whitespace, at "[" boundaries (IPA), and after leading "!"
# e.g. "badge[bxd3]" → ["badge", "[bxd3]"]
# e.g. "profit['proft]" → ["profit", "['proft]"]
# e.g. "!Betonung" → ["!", "Betonung"]
tokens = re.split(r'\s+|(?=\[)|(?<=!)(?=[A-Za-z\u00c0-\u024f])', raw_text)
tokens = [t for t in tokens if t] # remove empty strings
if len(tokens) <= 1:
# Single word — keep as-is
word_boxes.append({
'text': raw_text,
'left': w['left'],
'top': w['top'],
'width': w['width'],
'height': w['height'],
'conf': w.get('conf', 0),
})
else:
# Multi-word phrase — split proportionally by character count
total_chars = sum(len(t) for t in tokens)
if total_chars == 0:
continue
# Small gap between words (2% of box width per gap)
n_gaps = len(tokens) - 1
gap_px = w['width'] * 0.02
usable_w = w['width'] - gap_px * n_gaps
cursor = w['left']
for t in tokens:
token_w = max(1, usable_w * len(t) / total_chars)
word_boxes.append({
'text': t,
'left': round(cursor),
'top': w['top'],
'width': round(token_w),
'height': w['height'],
'conf': w.get('conf', 0),
})
cursor += token_w + gap_px
cells.append({
'cell_id': f"R{ri:02d}_C{ci}",
'row_index': ri,
'col_index': ci,
'col_type': col['type'],
'text': text,
'confidence': round(avg_conf, 1),
'bbox_px': {'x': x_min, 'y': y_min, 'w': bw, 'h': bh},
'bbox_pct': {
'x': round(x_min / img_w * 100, 2) if img_w else 0,
'y': round(y_min / img_h * 100, 2) if img_h else 0,
'w': round(bw / img_w * 100, 2) if img_w else 0,
'h': round(bh / img_h * 100, 2) if img_h else 0,
},
'word_boxes': word_boxes,
'ocr_engine': 'words_first',
'is_bold': False,
})
return cells
# ---------------------------------------------------------------------------
# 4. Public API
# ---------------------------------------------------------------------------
def build_grid_from_words(
word_dicts: List[Dict],
img_w: int,
img_h: int,
min_confidence: int = 30,
box_rects: Optional[List[Dict]] = None,
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
"""Build a cell grid bottom-up from Tesseract word boxes.
Args:
word_dicts: Flat list of word dicts with keys:
text, left, top, width, height, conf
(absolute pixel coordinates).
img_w: Image width in pixels.
img_h: Image height in pixels.
min_confidence: Minimum OCR confidence to keep a word.
box_rects: Optional list of box dicts with keys x, y, width, height.
Words inside these boxes are excluded from column clustering
(box-internal columns are detected separately in sub-sessions).
Returns:
(cells, columns_meta) — same format as build_cell_grid_v2().
cells: list of cell dicts with cell_id, bbox_px, bbox_pct, etc.
columns_meta: list of {'index', 'type', 'x', 'width'} dicts.
"""
if not word_dicts:
logger.info("build_grid_from_words: no words — returning empty grid")
return [], []
# Filter by confidence
words = [
w for w in word_dicts
if w.get('conf', 0) >= min_confidence and w.get('text', '').strip()
]
if not words:
logger.info("build_grid_from_words: all words filtered (conf < %d)", min_confidence)
return [], []
logger.info("build_grid_from_words: %d words (after confidence filter from %d)", len(words), len(word_dicts))
# Exclude words inside detected boxes — box columns are detected separately
if box_rects:
content_words = []
for w in words:
w_cx = w['left'] + w['width'] / 2
w_cy = w['top'] + w['height'] / 2
inside = any(
b['x'] <= w_cx <= b['x'] + b['width']
and b['y'] <= w_cy <= b['y'] + b['height']
for b in box_rects
)
if not inside:
content_words.append(w)
excluded = len(words) - len(content_words)
if excluded:
logger.info("build_grid_from_words: excluded %d words inside %d box(es)",
excluded, len(box_rects))
words = content_words
if not words:
logger.info("build_grid_from_words: all words inside boxes — returning empty grid")
return [], []
# Step 1: cluster columns
columns = _cluster_columns(words, img_w)
logger.info("build_grid_from_words: %d column(s) detected", len(columns))
# Step 2: cluster rows
rows = _cluster_rows(words)
logger.info("build_grid_from_words: %d row(s) detected", len(rows))
# Step 3: build cells
cells = _build_cells(words, columns, rows, img_w, img_h)
logger.info("build_grid_from_words: %d cells built", len(cells))
# Build columns_meta in same format as build_cell_grid_v2
columns_meta = []
for col in columns:
x = int(col['x_min'])
w = int(col['x_max'] - col['x_min'])
columns_meta.append({
'index': col['index'],
'type': col['type'],
'x': x,
'width': w,
})
return cells, columns_meta

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -1,276 +0,0 @@
"""
Handwriting HTR API - Hochwertige Handschriftenerkennung (HTR) fuer Klausurkorrekturen.
Endpoints:
- POST /api/v1/htr/recognize - Bild hochladen → handgeschriebener Text
- POST /api/v1/htr/recognize-session - OCR-Pipeline Session als Quelle nutzen
Modell-Strategie:
1. qwen2.5vl:32b via Ollama (primaer, hoechste Qualitaet als VLM)
2. microsoft/trocr-large-handwritten (Fallback, offline, kein Ollama)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal auf dem Mac Mini.
"""
import io
import os
import logging
import time
import base64
from typing import Optional
import cv2
import numpy as np
from fastapi import APIRouter, HTTPException, Query, UploadFile, File
from pydantic import BaseModel
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/htr", tags=["HTR"])
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
OLLAMA_HTR_MODEL = os.getenv("OLLAMA_HTR_MODEL", "qwen2.5vl:32b")
HTR_FALLBACK_MODEL = os.getenv("HTR_FALLBACK_MODEL", "trocr-large")
# ---------------------------------------------------------------------------
# Pydantic Models
# ---------------------------------------------------------------------------
class HTRSessionRequest(BaseModel):
session_id: str
model: str = "auto" # "auto" | "qwen2.5vl" | "trocr-large"
use_clean: bool = True # Prefer clean_png (after handwriting removal)
# ---------------------------------------------------------------------------
# Preprocessing
# ---------------------------------------------------------------------------
def _preprocess_for_htr(img_bgr: np.ndarray) -> np.ndarray:
"""
CLAHE contrast enhancement + upscale to improve HTR accuracy.
Returns grayscale enhanced image.
"""
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# Upscale if image is too small
h, w = enhanced.shape
if min(h, w) < 800:
scale = 800 / min(h, w)
enhanced = cv2.resize(
enhanced, None, fx=scale, fy=scale,
interpolation=cv2.INTER_CUBIC
)
return enhanced
def _bgr_to_png_bytes(img_bgr: np.ndarray) -> bytes:
"""Convert BGR ndarray to PNG bytes."""
success, buf = cv2.imencode(".png", img_bgr)
if not success:
raise RuntimeError("Failed to encode image to PNG")
return buf.tobytes()
def _preprocess_image_bytes(image_bytes: bytes) -> bytes:
"""Load image, apply HTR preprocessing, return PNG bytes."""
arr = np.frombuffer(image_bytes, dtype=np.uint8)
img_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
if img_bgr is None:
raise ValueError("Could not decode image")
enhanced = _preprocess_for_htr(img_bgr)
# Convert grayscale back to BGR for encoding
enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
return _bgr_to_png_bytes(enhanced_bgr)
# ---------------------------------------------------------------------------
# Backend: Ollama qwen2.5vl
# ---------------------------------------------------------------------------
async def _recognize_with_qwen_vl(image_bytes: bytes, language: str) -> Optional[str]:
"""
Send image to Ollama qwen2.5vl:32b for HTR.
Returns extracted text or None on error.
"""
import httpx
lang_hint = {
"de": "Deutsch",
"en": "Englisch",
"de+en": "Deutsch und Englisch",
}.get(language, "Deutsch")
prompt = (
f"Du bist ein OCR-Experte fuer handgeschriebenen Text auf {lang_hint}. "
"Lies den Text im Bild exakt ab — korrigiere KEINE Rechtschreibfehler. "
"Antworte NUR mit dem erkannten Text, ohne Erklaerungen."
)
img_b64 = base64.b64encode(image_bytes).decode("utf-8")
payload = {
"model": OLLAMA_HTR_MODEL,
"prompt": prompt,
"images": [img_b64],
"stream": False,
}
try:
async with httpx.AsyncClient(timeout=120.0) as client:
resp = await client.post(f"{OLLAMA_BASE_URL}/api/generate", json=payload)
resp.raise_for_status()
data = resp.json()
return data.get("response", "").strip()
except Exception as e:
logger.warning(f"Ollama qwen2.5vl HTR failed: {e}")
return None
# ---------------------------------------------------------------------------
# Backend: TrOCR-large fallback
# ---------------------------------------------------------------------------
async def _recognize_with_trocr_large(image_bytes: bytes) -> Optional[str]:
"""
Use microsoft/trocr-large-handwritten via trocr_service.py.
Returns extracted text or None on error.
"""
try:
from services.trocr_service import run_trocr_ocr, _check_trocr_available
if not _check_trocr_available():
logger.warning("TrOCR not available for HTR fallback")
return None
text, confidence = await run_trocr_ocr(image_bytes, handwritten=True, size="large")
return text.strip() if text else None
except Exception as e:
logger.warning(f"TrOCR-large HTR failed: {e}")
return None
# ---------------------------------------------------------------------------
# Core recognition logic
# ---------------------------------------------------------------------------
async def _do_recognize(
image_bytes: bytes,
model: str = "auto",
preprocess: bool = True,
language: str = "de",
) -> dict:
"""
Core HTR logic: preprocess → try Ollama → fallback to TrOCR-large.
Returns dict with text, model_used, processing_time_ms.
"""
t0 = time.monotonic()
if preprocess:
try:
image_bytes = _preprocess_image_bytes(image_bytes)
except Exception as e:
logger.warning(f"HTR preprocessing failed, using raw image: {e}")
text: Optional[str] = None
model_used: str = "none"
use_qwen = model in ("auto", "qwen2.5vl")
use_trocr = model in ("auto", "trocr-large") or (use_qwen and text is None)
if use_qwen:
text = await _recognize_with_qwen_vl(image_bytes, language)
if text is not None:
model_used = f"qwen2.5vl ({OLLAMA_HTR_MODEL})"
if text is None and (use_trocr or model == "trocr-large"):
text = await _recognize_with_trocr_large(image_bytes)
if text is not None:
model_used = "trocr-large-handwritten"
if text is None:
text = ""
model_used = "none (all backends failed)"
elapsed_ms = int((time.monotonic() - t0) * 1000)
return {
"text": text,
"model_used": model_used,
"processing_time_ms": elapsed_ms,
"language": language,
"preprocessed": preprocess,
}
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.post("/recognize")
async def recognize_handwriting(
file: UploadFile = File(...),
model: str = Query("auto", description="auto | qwen2.5vl | trocr-large"),
preprocess: bool = Query(True, description="Apply CLAHE + upscale before recognition"),
language: str = Query("de", description="de | en | de+en"),
):
"""
Upload an image and get back the handwritten text as plain text.
Tries qwen2.5vl:32b via Ollama first, falls back to TrOCR-large-handwritten.
"""
if model not in ("auto", "qwen2.5vl", "trocr-large"):
raise HTTPException(status_code=400, detail="model must be one of: auto, qwen2.5vl, trocr-large")
if language not in ("de", "en", "de+en"):
raise HTTPException(status_code=400, detail="language must be one of: de, en, de+en")
image_bytes = await file.read()
if not image_bytes:
raise HTTPException(status_code=400, detail="Empty file")
return await _do_recognize(image_bytes, model=model, preprocess=preprocess, language=language)
@router.post("/recognize-session")
async def recognize_from_session(req: HTRSessionRequest):
"""
Use an OCR-Pipeline session as image source for HTR.
Set use_clean=true to prefer the clean image (after handwriting removal step).
This is useful when you want to do HTR on isolated handwriting regions.
"""
from ocr_pipeline_session_store import get_session_db, get_session_image
session = await get_session_db(req.session_id)
if not session:
raise HTTPException(status_code=404, detail=f"Session {req.session_id} not found")
# Choose source image
image_bytes: Optional[bytes] = None
source_used: str = ""
if req.use_clean:
image_bytes = await get_session_image(req.session_id, "clean")
if image_bytes:
source_used = "clean"
if not image_bytes:
image_bytes = await get_session_image(req.session_id, "deskewed")
if image_bytes:
source_used = "deskewed"
if not image_bytes:
image_bytes = await get_session_image(req.session_id, "original")
source_used = "original"
if not image_bytes:
raise HTTPException(status_code=404, detail="No image available in session")
result = await _do_recognize(image_bytes, model=req.model)
result["session_id"] = req.session_id
result["source_image"] = source_used
return result

View File

@@ -42,14 +42,6 @@ try:
except ImportError:
trocr_router = None
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
from ocr_pipeline_api import router as ocr_pipeline_router, _cache as ocr_pipeline_cache
from grid_editor_api import router as grid_editor_router
from orientation_crop_api import router as orientation_crop_router, set_cache_ref as set_orientation_crop_cache
from ocr_pipeline_session_store import init_ocr_pipeline_tables
try:
from handwriting_htr_api import router as htr_router
except ImportError:
htr_router = None
try:
from dsfa_rag_api import router as dsfa_rag_router, set_db_pool as set_dsfa_db_pool
from dsfa_corpus_ingestion import DSFAQdrantService, DATABASE_URL as DSFA_DATABASE_URL
@@ -83,13 +75,6 @@ async def lifespan(app: FastAPI):
except Exception as e:
print(f"Warning: Vocab sessions database initialization failed: {e}")
# Initialize OCR Pipeline session tables
try:
await init_ocr_pipeline_tables()
print("OCR Pipeline session tables initialized")
except Exception as e:
print(f"Warning: OCR Pipeline tables initialization failed: {e}")
# Initialize database pool for DSFA RAG
dsfa_db_pool = None
if DSFA_DATABASE_URL and set_dsfa_db_pool:
@@ -119,19 +104,6 @@ async def lifespan(app: FastAPI):
# Ensure EH upload directory exists
os.makedirs(EH_UPLOAD_DIR, exist_ok=True)
# Preload LightOnOCR model if OCR_ENGINE=lighton (avoids cold-start on first request)
ocr_engine_env = os.getenv("OCR_ENGINE", "auto")
if ocr_engine_env == "lighton":
try:
import asyncio
from services.lighton_ocr_service import get_lighton_model
loop = asyncio.get_event_loop()
print("Preloading LightOnOCR-2-1B at startup (OCR_ENGINE=lighton)...")
await loop.run_in_executor(None, get_lighton_model)
print("LightOnOCR-2-1B preloaded")
except Exception as e:
print(f"Warning: LightOnOCR preload failed: {e}")
yield
print("Klausur-Service shutting down...")
@@ -178,12 +150,6 @@ app.include_router(mail_router) # Unified Inbox Mail
if trocr_router:
app.include_router(trocr_router) # TrOCR Handwriting OCR
app.include_router(vocab_router) # Vocabulary Worksheet Generator
app.include_router(ocr_pipeline_router) # OCR Pipeline (step-by-step)
app.include_router(grid_editor_router) # Grid Editor (Excel-like)
set_orientation_crop_cache(ocr_pipeline_cache)
app.include_router(orientation_crop_router) # OCR Pipeline: Orientation + Crop
if htr_router:
app.include_router(htr_router) # Handwriting HTR (Klausur)
if dsfa_rag_router:
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search

View File

@@ -1,28 +0,0 @@
-- OCR Pipeline Sessions - Persistent session storage
-- Applied automatically by ocr_pipeline_session_store.init_ocr_pipeline_tables()
CREATE TABLE IF NOT EXISTS ocr_pipeline_sessions (
id UUID PRIMARY KEY,
name VARCHAR(255) NOT NULL,
filename VARCHAR(255),
status VARCHAR(50) DEFAULT 'active',
current_step INT DEFAULT 1,
original_png BYTEA,
deskewed_png BYTEA,
binarized_png BYTEA,
dewarped_png BYTEA,
deskew_result JSONB,
dewarp_result JSONB,
column_result JSONB,
ground_truth JSONB DEFAULT '{}',
auto_shear_degrees FLOAT,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Index for listing sessions
CREATE INDEX IF NOT EXISTS idx_ocr_pipeline_sessions_created
ON ocr_pipeline_sessions (created_at DESC);
CREATE INDEX IF NOT EXISTS idx_ocr_pipeline_sessions_status
ON ocr_pipeline_sessions (status);

View File

@@ -1,4 +0,0 @@
-- Migration 003: Add row_result column for row geometry detection
-- Stores detected row geometries including header/footer classification
ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS row_result JSONB;

View File

@@ -1,4 +0,0 @@
-- Migration 004: Add word_result column for OCR Pipeline Step 5
-- Stores the word recognition grid result (entries with english/german/example + bboxes)
ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS word_result JSONB;

View File

@@ -1,7 +0,0 @@
-- Migration 005: Add document type detection columns
-- These columns store the result of automatic document type detection
-- (vocab_table, full_text, generic_table) after dewarp.
ALTER TABLE ocr_pipeline_sessions
ADD COLUMN IF NOT EXISTS doc_type VARCHAR(50),
ADD COLUMN IF NOT EXISTS doc_type_result JSONB;

Some files were not shown because too many files have changed in this diff Show More